Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ 6e0010bc

Historique | Voir | Annoter | Télécharger (87,08 ko)

1 55c1cdff Florent Chuffart
# get_content = function(# Get content from cached
2 6b5a528c Florent Chuffart
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
3 6b5a528c Florent Chuffart
# obj, ##<< The object that we want its content.
4 6b5a528c Florent Chuffart
# ... ##<< Parameters that will be passed to my_read
5 6b5a528c Florent Chuffart
# ) {
6 6b5a528c Florent Chuffart
#   UseMethod("get_content", obj)
7 55c1cdff Florent Chuffart
#   ### Returns the cached content of the current object.
8 6b5a528c Florent Chuffart
# }
9 55c1cdff Florent Chuffart
#
10 55c1cdff Florent Chuffart
# get_content.default = structure( function(# Get content from cached
11 6b5a528c Florent Chuffart
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
12 6b5a528c Florent Chuffart
# obj, ##<< The object that we want its content.
13 6b5a528c Florent Chuffart
# ... ##<< Parameters that will be passed to my_read
14 6b5a528c Florent Chuffart
# ) {
15 6b5a528c Florent Chuffart
#   if (inherits(try(NM_CACHE,TRUE), "try-error") || is.null(NM_CACHE)) {
16 6b5a528c Florent Chuffart
#     NM_CACHE <<- list()
17 6b5a528c Florent Chuffart
#   }
18 6b5a528c Florent Chuffart
#   if (is.null(NM_CACHE[[obj$filename]])) {
19 6b5a528c Florent Chuffart
#     print(paste("Loading file ",obj$filename, sep=""))
20 6b5a528c Florent Chuffart
#     tmp_content = my_read(obj, ...)
21 6b5a528c Florent Chuffart
#     print("affect it...")
22 6b5a528c Florent Chuffart
#     NM_CACHE[[obj$filename]] <<- tmp_content
23 6b5a528c Florent Chuffart
#     print("done.")
24 55c1cdff Florent Chuffart
#   }
25 6b5a528c Florent Chuffart
#   return(NM_CACHE[[obj$filename]])
26 55c1cdff Florent Chuffart
# ### Returns the cached content of the current object.
27 6b5a528c Florent Chuffart
# }, ex=function(){
28 6b5a528c Florent Chuffart
#   # Create a dataframe
29 6b5a528c Florent Chuffart
#   df = NULL
30 6b5a528c Florent Chuffart
#   df = dfadd(df, list(key1 = "value1", key2 = "value2"))
31 6b5a528c Florent Chuffart
#   df = dfadd(df, list(key1 = "value1'", key2 = "value3'"))
32 6b5a528c Florent Chuffart
#   # Dump it into tmp file
33 6b5a528c Florent Chuffart
#   write.table(df, file="/tmp/tmp.dump.table.tmp")
34 6b5a528c Florent Chuffart
#   # Load it into cache using feature.
35 6b5a528c Florent Chuffart
#   cached_content_object  = list(filename="/tmp/tmp.dump.table.tmp")
36 55c1cdff Florent Chuffart
#   class(cached_content_object) = "table"
37 6b5a528c Florent Chuffart
#   # First time it will be load into cache
38 6b5a528c Florent Chuffart
#   print(get_content(cached_content_object))
39 6b5a528c Florent Chuffart
#   # Second time not
40 6b5a528c Florent Chuffart
#   print(get_content(cached_content_object))
41 55c1cdff Florent Chuffart
#
42 6b5a528c Florent Chuffart
# })
43 55c1cdff Florent Chuffart
#
44 6b5a528c Florent Chuffart
# my_read = function(
45 55c1cdff Florent Chuffart
# ### Abstract my_read function.
46 6b5a528c Florent Chuffart
#   obj, ...) {
47 6b5a528c Florent Chuffart
#   UseMethod("my_read", obj)
48 6b5a528c Florent Chuffart
# }
49 55c1cdff Florent Chuffart
#
50 6b5a528c Florent Chuffart
# my_read.default = function(
51 55c1cdff Florent Chuffart
# ### Default my_read function.
52 6b5a528c Florent Chuffart
#   obj, ...){
53 55c1cdff Florent Chuffart
#   stop(paste("ERROR, my_read is not defined for any Objects (file ", obj$filename," )", sep=""))
54 6b5a528c Florent Chuffart
# }
55 55c1cdff Florent Chuffart
#
56 6b5a528c Florent Chuffart
# my_read.fasta = function(
57 55c1cdff Florent Chuffart
# ### my_read function for fasta files.
58 6b5a528c Florent Chuffart
#   obj, ...){
59 6b5a528c Florent Chuffart
#   # require(seqinr)
60 6b5a528c Florent Chuffart
#   return(read.fasta(obj$filename, ...))
61 6b5a528c Florent Chuffart
# }
62 55c1cdff Florent Chuffart
#
63 6b5a528c Florent Chuffart
# my_read.table = function(
64 55c1cdff Florent Chuffart
# ### my_read function for table files.
65 6b5a528c Florent Chuffart
#   obj, ...){
66 6b5a528c Florent Chuffart
#   if (rev(unlist(strsplit(obj$filename, ".", fixed=TRUE)))[1] == "gz") {
67 55c1cdff Florent Chuffart
#     return(read.table(file=gzfile(obj$filename), ...))
68 55c1cdff Florent Chuffart
#   } else {
69 6b5a528c Florent Chuffart
#     return(read.table(file=obj$filename, ...))
70 6b5a528c Florent Chuffart
#   }
71 55c1cdff Florent Chuffart
# }
72 55c1cdff Florent Chuffart
#
73 6b5a528c Florent Chuffart
# my_read.cvs = function(
74 55c1cdff Florent Chuffart
# ### my_read function for cvs files.
75 6b5a528c Florent Chuffart
#   obj, ...){
76 6b5a528c Florent Chuffart
#   return(read.csv(file=obj$filename, ...))
77 55c1cdff Florent Chuffart
# }
78 6b5a528c Florent Chuffart
79 6b5a528c Florent Chuffart
80 6b5a528c Florent Chuffart
81 6b5a528c Florent Chuffart
82 6b5a528c Florent Chuffart
83 6b5a528c Florent Chuffart
84 6b5a528c Florent Chuffart
85 6b5a528c Florent Chuffart
86 6b5a528c Florent Chuffart
87 6b5a528c Florent Chuffart
FDR = structure(function#  False Discovery Rate
88 6b5a528c Florent Chuffart
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
89 55c1cdff Florent Chuffart
##author<< Gael Yvert,
90 6b5a528c Florent Chuffart
(
91 6b5a528c Florent Chuffart
x, ##<< A vector x of independent p-values.
92 6b5a528c Florent Chuffart
FDR ##<< The specified FDR.
93 6b5a528c Florent Chuffart
) {
94 6b5a528c Florent Chuffart
  x <- sort(na.omit(x))
95 6b5a528c Florent Chuffart
  N = length(x)
96 6b5a528c Florent Chuffart
  i = 1;
97 6b5a528c Florent Chuffart
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
98 6b5a528c Florent Chuffart
  if (i == 1)
99 6b5a528c Florent Chuffart
    return (NA)
100 6b5a528c Florent Chuffart
  else
101 6b5a528c Florent Chuffart
    return( x[i-1] )
102 6b5a528c Florent Chuffart
### Return the the corresponding cutoff.
103 6b5a528c Florent Chuffart
}, ex=function(){
104 6b5a528c Florent Chuffart
  print("example")
105 6b5a528c Florent Chuffart
})
106 6b5a528c Florent Chuffart
107 6b5a528c Florent Chuffart
108 6b5a528c Florent Chuffart
109 6b5a528c Florent Chuffart
lod_score_vecs = structure(function # Likelihood ratio
110 6b5a528c Florent Chuffart
### Compute the likelihood log of two set of value from two models Vs. a unique model.
111 6b5a528c Florent Chuffart
(
112 6b5a528c Florent Chuffart
x ,##<< First vector.
113 6b5a528c Florent Chuffart
y ##<< Second vector.
114 6b5a528c Florent Chuffart
) {
115 6b5a528c Florent Chuffart
	if (length(x) <=1 | length(y) <= 1) {
116 6b5a528c Florent Chuffart
		return(NA)
117 6b5a528c Florent Chuffart
	}
118 6b5a528c Florent Chuffart
  meanX = mean(x)
119 6b5a528c Florent Chuffart
  sdX = sd(x)
120 6b5a528c Florent Chuffart
  meanY = mean(y)
121 6b5a528c Florent Chuffart
  sdY = sd(y)
122 6b5a528c Florent Chuffart
  meanXY = mean(c(x,y))
123 6b5a528c Florent Chuffart
  sdXY = sd(c(x,y))
124 6b5a528c Florent Chuffart
  llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
125 6b5a528c Florent Chuffart
  llY = sum(log(dnorm(y,mean=meanY,sd=sdY)))
126 6b5a528c Florent Chuffart
  llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY)))
127 729c934e Florent Chuffart
  ratio = llX + llY - llXY
128 6b5a528c Florent Chuffart
  return(ratio)
129 6b5a528c Florent Chuffart
### Returns the likelihood ratio.
130 6b5a528c Florent Chuffart
}, ex=function(){
131 6b5a528c Florent Chuffart
  # LOD score for 2 set of values
132 6b5a528c Florent Chuffart
  mean1=5; sd1=2; card2 = 250
133 6b5a528c Florent Chuffart
  mean2=6; sd2=3; card1 = 200
134 6b5a528c Florent Chuffart
  x1 = rnorm(card1, mean1, sd1)
135 55c1cdff Florent Chuffart
  x2 = rnorm(card2, mean2, sd2)
136 6b5a528c Florent Chuffart
  min = floor(min(c(x1,x2)))
137 6b5a528c Florent Chuffart
  max = ceiling(max(c(x1,x2)))
138 6b5a528c Florent Chuffart
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
139 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
140 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
141 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
142 6b5a528c Florent Chuffart
  lod_score_vecs(x1,x2)
143 6b5a528c Florent Chuffart
 })
144 6b5a528c Florent Chuffart
145 6b5a528c Florent Chuffart
dfadd = structure(function# Adding list to a dataframe.
146 6b5a528c Florent Chuffart
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
147 6b5a528c Florent Chuffart
	(df, ##<<  A dataframe
148 6b5a528c Florent Chuffart
		l ##<<  A list
149 6b5a528c Florent Chuffart
	) {
150 6b5a528c Florent Chuffart
  if (is.null(df)) {
151 6b5a528c Florent Chuffart
    df = data.frame(l,stringsAsFactors=FALSE)
152 6b5a528c Florent Chuffart
  } else {
153 6b5a528c Florent Chuffart
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
154 6b5a528c Florent Chuffart
  }
155 6b5a528c Florent Chuffart
  return(df)
156 6b5a528c Florent Chuffart
### Return the dataframe \emph{df}.
157 6b5a528c Florent Chuffart
}, ex=function(){
158 6b5a528c Florent Chuffart
		## Here dataframe is NULL
159 6b5a528c Florent Chuffart
		print(df)
160 6b5a528c Florent Chuffart
		df = NULL
161 5bfac5a3 Florent Chuffart
162 6b5a528c Florent Chuffart
		# Initialize df
163 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
164 6b5a528c Florent Chuffart
		print(df)
165 5bfac5a3 Florent Chuffart
166 6b5a528c Florent Chuffart
		# Adding elements to df
167 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
168 6b5a528c Florent Chuffart
		print(df)
169 6b5a528c Florent Chuffart
})
170 6b5a528c Florent Chuffart
171 6b5a528c Florent Chuffart
172 6b5a528c Florent Chuffart
sign_from_strand = function(
173 55c1cdff Florent Chuffart
### Get the sign of strand
174 6b5a528c Florent Chuffart
strands) {
175 6b5a528c Florent Chuffart
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
176 6b5a528c Florent Chuffart
### If strand in forward then returns 1 else returns -1
177 6b5a528c Florent Chuffart
}
178 6b5a528c Florent Chuffart
179 6b5a528c Florent Chuffart
flat_reads = function(
180 6b5a528c Florent Chuffart
### Extract reads coordinates from TempleteFilter input sequence
181 55c1cdff Florent Chuffart
reads, ##<< TemplateFilter input reads
182 6b5a528c Florent Chuffart
nuc_width ##<< Width used to shift F and R reads.
183 6b5a528c Florent Chuffart
) {
184 6b5a528c Florent Chuffart
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
185 6b5a528c Florent Chuffart
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
186 6b5a528c Florent Chuffart
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
187 6b5a528c Florent Chuffart
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
188 6b5a528c Florent Chuffart
### Returns a list of F reads, R reads and joint/shifted F and R reads.
189 6b5a528c Florent Chuffart
}
190 6b5a528c Florent Chuffart
191 6b5a528c Florent Chuffart
192 6b5a528c Florent Chuffart
193 6b5a528c Florent Chuffart
filter_tf_outputs = function(# Filter TemplateFilter outputs
194 6b5a528c Florent Chuffart
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlap threshold.
195 6b5a528c Florent Chuffart
tf_outputs, ##<< TemplateFilter outputs.
196 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
197 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
198 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
199 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
200 6b5a528c Florent Chuffart
ol_bp = 59, ##<< Overlap Threshold.
201 6b5a528c Florent Chuffart
corr_thres = 0.5 ##<< Correlation threshold.
202 6b5a528c Florent Chuffart
) {
203 6b5a528c Florent Chuffart
  if (x_min < 0) {
204 0d5fbf02 Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= -x_max & tf_outputs$center + tf_outputs$width/2 <=  -x_min,]
205 6b5a528c Florent Chuffart
	} else {
206 0d5fbf02 Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= x_min & tf_outputs$center + tf_outputs$width/2 <= x_max,]
207 6b5a528c Florent Chuffart
  }
208 6b5a528c Florent Chuffart
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
209 6b5a528c Florent Chuffart
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
210 55c1cdff Florent Chuffart
  tf_outputs = tf_outputs[tf_outputs$correlation >= corr_thres,]
211 55c1cdff Florent Chuffart
  tf_outputs = tf_outputs[order(tf_outputs$correlation,decreasing=TRUE),]
212 6b5a528c Florent Chuffart
  i = 1
213 55c1cdff Florent Chuffart
  while (i <= length(tf_outputs[,1])) {
214 55c1cdff Florent Chuffart
    lb = tf_outputs[i,]$low
215 6b5a528c Florent Chuffart
    ub = tf_outputs[i,]$up
216 6b5a528c Florent Chuffart
    tf_outputs = tf_outputs[!(tf_outputs$low <= (ub-ol_bp) & tf_outputs$up > ub) & !(tf_outputs$up >= (lb+ol_bp) & tf_outputs$low < lb),]
217 6b5a528c Florent Chuffart
    i = i+1
218 6b5a528c Florent Chuffart
  }
219 6b5a528c Florent Chuffart
  return(tf_outputs)
220 6b5a528c Florent Chuffart
### Returns filtered TemplateFilter Outputs
221 6b5a528c Florent Chuffart
}
222 6b5a528c Florent Chuffart
223 6b5a528c Florent Chuffart
224 6b5a528c Florent Chuffart
225 6b5a528c Florent Chuffart
filter_tf_inputs = function(# Filter TemplateFilter inputs
226 6b5a528c Florent Chuffart
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
227 6b5a528c Florent Chuffart
inputs, ##<< TF inputs to be filtered.
228 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
229 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
230 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
231 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
232 6b5a528c Florent Chuffart
only_f = FALSE, ##<< Filter only F reads.
233 b8a95426 Florent Chuffart
only_r = FALSE, ##<< Filter only R reads.
234 b8a95426 Florent Chuffart
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
235 6b5a528c Florent Chuffart
) {
236 6b5a528c Florent Chuffart
	if (only_f) {
237 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
238 6b5a528c Florent Chuffart
	} else if (only_r) {
239 5bfac5a3 Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
240 6b5a528c Florent Chuffart
	} else {
241 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
242 6b5a528c Florent Chuffart
	}
243 b8a95426 Florent Chuffart
  if (!filter_for_coverage) {
244 b8a95426 Florent Chuffart
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
245 55c1cdff Florent Chuffart
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
246 b8a95426 Florent Chuffart
  }
247 6b5a528c Florent Chuffart
	return(inputs)
248 6b5a528c Florent Chuffart
### Returns filtred inputs.
249 6b5a528c Florent Chuffart
}
250 6b5a528c Florent Chuffart
251 6b5a528c Florent Chuffart
252 6b5a528c Florent Chuffart
253 6b5a528c Florent Chuffart
get_comp_strand = function(
254 6b5a528c Florent Chuffart
### Compute the complementatry strand.
255 6b5a528c Florent Chuffart
strand ##<< The original strand.
256 6b5a528c Florent Chuffart
) {
257 6b5a528c Florent Chuffart
	apply(t(strand),2, function(n){
258 55c1cdff Florent Chuffart
	  if (n=="a") {return("t")}
259 6b5a528c Florent Chuffart
		if (n=="t") {return("a")}
260 55c1cdff Florent Chuffart
		if (n=="c") {return("g")}
261 55c1cdff Florent Chuffart
		if (n=="g") {return("c")}
262 6b5a528c Florent Chuffart
	})
263 6b5a528c Florent Chuffart
### Returns the complementatry strand.
264 6b5a528c Florent Chuffart
}
265 6b5a528c Florent Chuffart
266 6b5a528c Florent Chuffart
267 55c1cdff Florent Chuffart
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
268 6b5a528c Florent Chuffart
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
269 6b5a528c Florent Chuffart
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
270 729c934e Florent Chuffart
lod_thres=20, ##<< Log likelihood ration threshold.
271 6b5a528c Florent Chuffart
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
272 6b5a528c Florent Chuffart
){
273 6b5a528c Florent Chuffart
	end_of_tracks = function(tracks) {
274 6b5a528c Florent Chuffart
		if (length(tracks) == 0) {
275 6b5a528c Florent Chuffart
			return(TRUE)
276 6b5a528c Florent Chuffart
		}
277 6b5a528c Florent Chuffart
	  for (lower_bound in tracks) {
278 6b5a528c Florent Chuffart
			if(!is.na(lower_bound)) {
279 6b5a528c Florent Chuffart
	      if (lower_bound < coord_max) {
280 6b5a528c Florent Chuffart
	        return(FALSE)
281 6b5a528c Florent Chuffart
	      }
282 6b5a528c Florent Chuffart
	  	}
283 6b5a528c Florent Chuffart
	  }
284 6b5a528c Florent Chuffart
	  return(TRUE)
285 6b5a528c Florent Chuffart
	}
286 6b5a528c Florent Chuffart
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
287 6b5a528c Florent Chuffart
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
288 6b5a528c Florent Chuffart
			new_cluster$wp = TRUE
289 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
290 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
291 6b5a528c Florent Chuffart
		  	clusters[[length(clusters) + 1]] = new_cluster
292 6b5a528c Florent Chuffart
				# print(new_cluster)
293 6b5a528c Florent Chuffart
		  }
294 6b5a528c Florent Chuffart
		} else {
295 6b5a528c Florent Chuffart
			new_cluster$wp = FALSE
296 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
297 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
298 6b5a528c Florent Chuffart
			  clusters[[length(clusters) + 1]] = new_cluster
299 6b5a528c Florent Chuffart
			}
300 6b5a528c Florent Chuffart
		}
301 6b5a528c Florent Chuffart
		return(clusters)
302 6b5a528c Florent Chuffart
	}
303 6b5a528c Florent Chuffart
	strain = samples[[1]]$strain
304 6b5a528c Florent Chuffart
	lod_scores = c()
305 6b5a528c Florent Chuffart
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
306 6b5a528c Florent Chuffart
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
307 6b5a528c Florent Chuffart
  # compute clusters
308 6b5a528c Florent Chuffart
  clusters = list()
309 6b5a528c Florent Chuffart
  cluster_contents = list()
310 6b5a528c Florent Chuffart
  # Init reader
311 6b5a528c Florent Chuffart
  indexes = c()
312 6b5a528c Florent Chuffart
  track_readers = c()
313 6b5a528c Florent Chuffart
  current_nuc = NULL
314 55c1cdff Florent Chuffart
	lod_score = lod_thres + 1
315 55c1cdff Florent Chuffart
  # Read nucs from TF outputs
316 6b5a528c Florent Chuffart
  tf_outs = list()
317 6b5a528c Florent Chuffart
	i = 1
318 6b5a528c Florent Chuffart
  for (sample in samples) {
319 6b5a528c Florent Chuffart
		# print(sample$roi$chr)
320 6b5a528c Florent Chuffart
		# print(min_nuc_center)
321 6b5a528c Florent Chuffart
		# print(max_nuc_center)
322 6b5a528c Florent Chuffart
		# print(sample$outputs)
323 6b5a528c Florent Chuffart
		# tf_outs[[i]] = filter_tf_outputs(sample$outputs, sample$roi$chr, min_nuc_center, max_nuc_center)
324 6b5a528c Florent Chuffart
		# print(tf_outs[[i]])
325 6b5a528c Florent Chuffart
		tf_outs[[i]] = sample$outputs
326 55c1cdff Florent Chuffart
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]
327 6b5a528c Florent Chuffart
    indexes[i] = 1
328 6b5a528c Florent Chuffart
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
329 5bfac5a3 Florent Chuffart
      track_readers[i] = coord_max
330 6b5a528c Florent Chuffart
	  } else {
331 5bfac5a3 Florent Chuffart
      track_readers[i] = tf_outs[[i]][indexes[i],]$center
332 6b5a528c Florent Chuffart
		}
333 5bfac5a3 Florent Chuffart
		i = i+1
334 6b5a528c Florent Chuffart
  }
335 6b5a528c Florent Chuffart
	# print(track_readers)
336 6b5a528c Florent Chuffart
  new_cluster = NULL
337 6b5a528c Florent Chuffart
  nb_nucs_in_cluster = 0
338 6b5a528c Florent Chuffart
  nuc_from_track = c()
339 6b5a528c Florent Chuffart
  for (i in 1:length(tf_outs)){
340 6b5a528c Florent Chuffart
    nuc_from_track[i] = FALSE
341 6b5a528c Florent Chuffart
  }
342 6b5a528c Florent Chuffart
  # Start clustering
343 6b5a528c Florent Chuffart
  while (!end_of_tracks(track_readers)) {
344 6b5a528c Florent Chuffart
    new_center = min(track_readers)
345 5bfac5a3 Florent Chuffart
		current_track = which(track_readers == new_center)[1]
346 6b5a528c Florent Chuffart
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
347 6b5a528c Florent Chuffart
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
348 6b5a528c Florent Chuffart
		new_nuc$inputs = samples[[current_track]]$inputs
349 6b5a528c Florent Chuffart
		new_nuc$chr = samples[[current_track]]$roi$chr
350 6b5a528c Florent Chuffart
		new_nuc$track = current_track
351 5bfac5a3 Florent Chuffart
352 55c1cdff Florent Chuffart
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width)
353 6b5a528c Florent Chuffart
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
354 6b5a528c Florent Chuffart
		new_nuc$original_reads = flatted_reads[[3]]
355 6b5a528c Florent Chuffart
356 6b5a528c Florent Chuffart
    new_upper_bound = new_nuc$upper_bound
357 6b5a528c Florent Chuffart
358 6b5a528c Florent Chuffart
    if (!is.null(current_nuc)) {
359 6b5a528c Florent Chuffart
			lod_score = lod_score_vecs(current_nuc$original_reads,new_nuc$original_reads)
360 6b5a528c Florent Chuffart
			lod_scores = c(lod_scores,lod_score)
361 6b5a528c Florent Chuffart
		}
362 6b5a528c Florent Chuffart
		# print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
363 6b5a528c Florent Chuffart
		if (is.na(lod_score)) {
364 55c1cdff Florent Chuffart
			lod_score = lod_thres + 1
365 6b5a528c Florent Chuffart
		}
366 729c934e Florent Chuffart
		# Store lod_score
367 5bfac5a3 Florent Chuffart
		new_nuc$lod_score = lod_score
368 729c934e Florent Chuffart
	  if (lod_score < lod_thres) {
369 6b5a528c Florent Chuffart
      # aggregate to current cluster
370 6b5a528c Florent Chuffart
      #   update bound
371 6b5a528c Florent Chuffart
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
372 55c1cdff Florent Chuffart
        new_cluster$upper_bound = new_nuc$upper_bound
373 6b5a528c Florent Chuffart
      }
374 6b5a528c Florent Chuffart
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
375 55c1cdff Florent Chuffart
        new_cluster$lower_bound = new_nuc$lower_bound
376 6b5a528c Florent Chuffart
      }
377 6b5a528c Florent Chuffart
      #   add nucleosome to current cluster
378 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
379 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
380 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
381 6b5a528c Florent Chuffart
    } else {
382 6b5a528c Florent Chuffart
			if (!is.null(new_cluster)) {
383 6b5a528c Florent Chuffart
        # store old cluster
384 6b5a528c Florent Chuffart
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
385 6b5a528c Florent Chuffart
			}
386 6b5a528c Florent Chuffart
      # Reinit current cluster composition stuff
387 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = 0
388 6b5a528c Florent Chuffart
      nuc_from_track = c()
389 6b5a528c Florent Chuffart
      for (i in 1:length(tf_outs)){
390 6b5a528c Florent Chuffart
        nuc_from_track[i] = FALSE
391 6b5a528c Florent Chuffart
      }
392 55c1cdff Florent Chuffart
      # create new cluster
393 6b5a528c Florent Chuffart
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
394 6b5a528c Florent Chuffart
      # update upper bound
395 6b5a528c Florent Chuffart
      current_upper_bound = new_upper_bound
396 6b5a528c Florent Chuffart
      # add nucleosome to current cluster
397 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
398 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
399 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
400 5bfac5a3 Florent Chuffart
401 6b5a528c Florent Chuffart
		}
402 5bfac5a3 Florent Chuffart
403 6b5a528c Florent Chuffart
		current_nuc = new_nuc
404 6b5a528c Florent Chuffart
405 6b5a528c Florent Chuffart
    # update indexes
406 55c1cdff Florent Chuffart
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {
407 6b5a528c Florent Chuffart
      indexes[current_track] = indexes[current_track] + 1
408 6b5a528c Florent Chuffart
      # update track
409 6b5a528c Florent Chuffart
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
410 6b5a528c Florent Chuffart
    } else {
411 6b5a528c Florent Chuffart
      # update track
412 6b5a528c Florent Chuffart
      track_readers[current_track] = coord_max
413 6b5a528c Florent Chuffart
    }
414 6b5a528c Florent Chuffart
  }
415 6b5a528c Florent Chuffart
  # store last cluster
416 6b5a528c Florent Chuffart
  if (!is.null(new_cluster)) {
417 6b5a528c Florent Chuffart
    # store old cluster
418 6b5a528c Florent Chuffart
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
419 6b5a528c Florent Chuffart
  }
420 6b5a528c Florent Chuffart
	return(list(clusters, lod_scores))
421 6b5a528c Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed lod scores.
422 5bfac5a3 Florent Chuffart
}, ex=function(){
423 6b5a528c Florent Chuffart
	# Dealing with a region of interest
424 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
425 6b5a528c Florent Chuffart
	samples = list()
426 6b5a528c Florent Chuffart
	for (i in 1:3) {
427 6b5a528c Florent Chuffart
		# Create TF output
428 6b5a528c Florent Chuffart
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
429 6b5a528c Florent Chuffart
		outputs = dfadd(NULL,tf_nuc)
430 6b5a528c Florent Chuffart
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
431 6b5a528c Florent Chuffart
		# Generate corresponding reads
432 6b5a528c Florent Chuffart
		nb_reads = round(runif(1,170,230))
433 6b5a528c Florent Chuffart
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
434 6b5a528c Florent Chuffart
		u_reads = sort(unique(reads))
435 6b5a528c Florent Chuffart
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
436 6b5a528c Florent Chuffart
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
437 6b5a528c Florent Chuffart
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
438 6b5a528c Florent Chuffart
		u_reads = u_reads + shifts
439 55c1cdff Florent Chuffart
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
440 55c1cdff Florent Chuffart
		                         "V2" = u_reads,
441 55c1cdff Florent Chuffart
														 "V3" = strands,
442 6b5a528c Florent Chuffart
														 "V4" = counts), stringsAsFactors=FALSE)
443 6b5a528c Florent Chuffart
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
444 6b5a528c Florent Chuffart
	}
445 6b5a528c Florent Chuffart
	print(aggregate_intra_strain_nucs(samples))
446 6b5a528c Florent Chuffart
})
447 6b5a528c Florent Chuffart
448 7646593d Florent Chuffart
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
449 7646593d Florent Chuffart
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
450 7646593d Florent Chuffart
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
451 4016229d Florent Chuffart
cur_index ##<< the index of the roi involved
452 7646593d Florent Chuffart
) {
453 7646593d Florent Chuffart
	if  (length(partial_strain_maps) == 0 ){
454 4016229d Florent Chuffart
		print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." ))
455 55c1cdff Florent Chuffart
    tmp_strain_maps = list()
456 7646593d Florent Chuffart
	} else {
457 7646593d Florent Chuffart
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
458 7646593d Florent Chuffart
			tmp_nuc = partial_strain_maps[[i]]
459 7646593d Florent Chuffart
			tmp_nuc_as_list = list()
460 7646593d Florent Chuffart
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
461 7646593d Florent Chuffart
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
462 7646593d Florent Chuffart
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
463 4016229d Florent Chuffart
			tmp_nuc_as_list[["cur_index"]] = cur_index
464 7646593d Florent Chuffart
			tmp_nuc_as_list[["index_nuc"]] = i
465 7646593d Florent Chuffart
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
466 7646593d Florent Chuffart
			all_original_reads = c()
467 7646593d Florent Chuffart
			for (j in 1:length(tmp_nuc$nucs)) {
468 7646593d Florent Chuffart
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
469 7646593d Florent Chuffart
			}
470 7646593d Florent Chuffart
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
471 0d5fbf02 Florent Chuffart
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
472 7646593d Florent Chuffart
			if (tmp_nuc$wp) {
473 7646593d Florent Chuffart
				tmp_nuc_as_list[["lod_1"]] = signif(tmp_nuc$nucs[[2]]$lod_score,5)
474 7646593d Florent Chuffart
				tmp_nuc_as_list[["lod_2"]] = signif(tmp_nuc$nucs[[3]]$lod_score,5)
475 7646593d Florent Chuffart
			} else {
476 7646593d Florent Chuffart
				tmp_nuc_as_list[["lod_1"]] = NA
477 5bfac5a3 Florent Chuffart
				tmp_nuc_as_list[["lod_2"]] = NA
478 7646593d Florent Chuffart
			}
479 7646593d Florent Chuffart
      return(tmp_nuc_as_list)
480 7646593d Florent Chuffart
    })
481 7646593d Florent Chuffart
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
482 7646593d Florent Chuffart
	}
483 21b8928f Florent Chuffart
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
484 7646593d Florent Chuffart
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
485 7646593d Florent Chuffart
}
486 6b5a528c Florent Chuffart
487 6b5a528c Florent Chuffart
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
488 6b5a528c Florent Chuffart
### This function aligns nucs between two strains for a given genome region.
489 55c1cdff Florent Chuffart
replicates, ##<< Set of replicates, ideally 3 per strain.
490 6b5a528c Florent Chuffart
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
491 6b5a528c Florent Chuffart
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
492 6b5a528c Florent Chuffart
corr_thres=0.5, ##<< Correlation threshold.
493 729c934e Florent Chuffart
lod_thres=100, ##<< LOD cut off.
494 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
495 5bfac5a3 Florent Chuffart
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
496 6b5a528c Florent Chuffart
) {
497 6b5a528c Florent Chuffart
	if (length(replicates) < 2) {
498 6b5a528c Florent Chuffart
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
499 6b5a528c Florent Chuffart
	} else if (length(replicates) > 2) {
500 5bfac5a3 Florent Chuffart
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
501 6b5a528c Florent Chuffart
	}
502 6b5a528c Florent Chuffart
	common_nuc = NULL
503 6b5a528c Florent Chuffart
	lod_scores = c()
504 6b5a528c Florent Chuffart
	chr = replicates[[1]][[1]]$roi$chr
505 6b5a528c Florent Chuffart
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
506 6b5a528c Florent Chuffart
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
507 6b5a528c Florent Chuffart
	strain_ref1 = replicates[[1]][[1]]$strain
508 6b5a528c Florent Chuffart
	strain_ref2 = replicates[[2]][[1]]$strain
509 4016229d Florent Chuffart
	big_cur = replicates[[1]][[1]]$roi
510 4016229d Florent Chuffart
  orig_big_cur = replicates[[1]][[1]]$orig_roi
511 4016229d Florent Chuffart
	if(big_cur$end - big_cur$begin < 0) {
512 4016229d Florent Chuffart
		tmp_begin = big_cur$begin
513 4016229d Florent Chuffart
		big_cur$begin =  big_cur$end
514 4016229d Florent Chuffart
		big_cur$end =  tmp_begin
515 6b5a528c Florent Chuffart
	}
516 6b5a528c Florent Chuffart
	# GO!
517 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref1)) {
518 6b5a528c Florent Chuffart
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
519 6b5a528c Florent Chuffart
	}
520 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref2)) {
521 6b5a528c Florent Chuffart
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
522 6b5a528c Florent Chuffart
  }
523 6b5a528c Florent Chuffart
	lws = c()
524 6b5a528c Florent Chuffart
	ups = c()
525 6b5a528c Florent Chuffart
	for (na in wp_nucs_strain_ref2) {
526 6b5a528c Florent Chuffart
		lws = c(lws, na$lower_bound)
527 6b5a528c Florent Chuffart
		ups = c(ups, na$upper_bound)
528 6b5a528c Florent Chuffart
	}
529 6b5a528c Florent Chuffart
530 5bfac5a3 Florent Chuffart
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))
531 6b5a528c Florent Chuffart
	roi_strain_ref1 = NULL
532 6b5a528c Florent Chuffart
	roi_strain_ref2 = NULL
533 6b5a528c Florent Chuffart
	if (length(wp_nucs_strain_ref1) > 0) {
534 6b5a528c Florent Chuffart
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
535 6b5a528c Florent Chuffart
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
536 6b5a528c Florent Chuffart
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
537 6b5a528c Florent Chuffart
			# Filtering on Well Positionned
538 883439d6 Florent Chuffart
			if (nuc_strain_ref1$wp) {
539 6b5a528c Florent Chuffart
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
540 4016229d Florent Chuffart
				roi_strain_ref2 = translate_cur(roi_strain_ref1, strain_ref2, big_cur=orig_big_cur, config=config)
541 1d833b97 Florent Chuffart
        if (!is.null(roi_strain_ref2)){
542 6b5a528c Florent Chuffart
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
543 6b5a528c Florent Chuffart
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
544 6b5a528c Florent Chuffart
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
545 6b5a528c Florent Chuffart
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
546 6b5a528c Florent Chuffart
					for (index_nuc_strain_ref2 in matching_nas) {
547 6b5a528c Florent Chuffart
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
548 6b5a528c Florent Chuffart
						# Filtering on Well Positionned
549 55c1cdff Florent Chuffart
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)
550 4016229d Florent Chuffart
						if (!is.null(translate_cur(nuc_strain_ref2_to_roi, strain_ref1, big_cur=orig_big_cur, config=config)) &
551 1e34bb1f Florent Chuffart
                nuc_strain_ref2$wp) {
552 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score and collecting reads
553 6b5a528c Florent Chuffart
							SKIP = FALSE
554 6b5a528c Florent Chuffart
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
555 6b5a528c Florent Chuffart
							reads_strain_ref1 = c()
556 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref1$nucs){
557 5bfac5a3 Florent Chuffart
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)
558 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
559 6b5a528c Florent Chuffart
									SKIP = TRUE
560 6b5a528c Florent Chuffart
								}
561 6b5a528c Florent Chuffart
							}
562 6b5a528c Florent Chuffart
							reads_strain_ref2 = c()
563 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref2$nucs){
564 5bfac5a3 Florent Chuffart
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)
565 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
566 6b5a528c Florent Chuffart
									SKIP = TRUE
567 6b5a528c Florent Chuffart
								}
568 6b5a528c Florent Chuffart
							}
569 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score
570 6b5a528c Florent Chuffart
							if (!SKIP) {
571 6b5a528c Florent Chuffart
								# tranlation of reads into strain 2 coords
572 6b5a528c Florent Chuffart
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
573 6b5a528c Florent Chuffart
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
574 6b5a528c Florent Chuffart
								lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2)
575 6b5a528c Florent Chuffart
								lod_scores = c(lod_scores, lod_score)
576 5bfac5a3 Florent Chuffart
								# Filtering on LOD Score
577 729c934e Florent Chuffart
								if (lod_score < lod_thres) {
578 6b5a528c Florent Chuffart
									tmp_nuc = list()
579 6b5a528c Florent Chuffart
									# strain_ref1
580 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
581 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
582 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
583 6b5a528c Florent Chuffart
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
584 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
585 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
586 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
587 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr1_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[1]]$corr,5)
588 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr2_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[2]]$corr,5)
589 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr3_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[3]]$corr,5)
590 6b5a528c Florent Chuffart
									# strain_ref2
591 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
592 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
593 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
594 6b5a528c Florent Chuffart
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
595 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
596 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
597 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
598 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr1_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[1]]$corr,5)
599 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr2_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[2]]$corr,5)
600 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr3_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[3]]$corr,5)
601 6b5a528c Florent Chuffart
									# common
602 6b5a528c Florent Chuffart
									tmp_nuc[["lod_score"]] = signif(lod_score,5)
603 6b5a528c Florent Chuffart
									# print(tmp_nuc)
604 6b5a528c Florent Chuffart
									common_nuc = dfadd(common_nuc, tmp_nuc)
605 6b5a528c Florent Chuffart
								}
606 6b5a528c Florent Chuffart
							}
607 6b5a528c Florent Chuffart
						}
608 5bfac5a3 Florent Chuffart
					}
609 6b5a528c Florent Chuffart
				} else {
610 6b5a528c Florent Chuffart
		      print("WARNING! No roi for strain ref 2.")
611 6b5a528c Florent Chuffart
			  }
612 6b5a528c Florent Chuffart
		  }
613 6b5a528c Florent Chuffart
		}
614 5bfac5a3 Florent Chuffart
615 6b5a528c Florent Chuffart
		if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
616 6b5a528c Florent Chuffart
			index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
617 5bfac5a3 Florent Chuffart
			to_remove_list = c()
618 5bfac5a3 Florent Chuffart
			for (i in 1:length(index_redundant)) {
619 6b5a528c Florent Chuffart
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
620 6b5a528c Florent Chuffart
				  to_remove = index_redundant[i]
621 6b5a528c Florent Chuffart
				}	 else {
622 6b5a528c Florent Chuffart
					to_remove = index_redundant[i] + 1
623 5bfac5a3 Florent Chuffart
			  }
624 6b5a528c Florent Chuffart
				to_remove_list = c(to_remove_list, to_remove)
625 6b5a528c Florent Chuffart
			}
626 6b5a528c Florent Chuffart
			common_nuc = common_nuc[-to_remove_list,]
627 6b5a528c Florent Chuffart
		}
628 5bfac5a3 Florent Chuffart
629 6b5a528c Florent Chuffart
		if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
630 8c87ee64 Florent Chuffart
			index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
631 5bfac5a3 Florent Chuffart
			to_remove_list = c()
632 5bfac5a3 Florent Chuffart
			for (i in 1:length(index_redundant)) {
633 6b5a528c Florent Chuffart
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
634 6b5a528c Florent Chuffart
				  to_remove = index_redundant[i]
635 6b5a528c Florent Chuffart
				}	 else {
636 6b5a528c Florent Chuffart
					to_remove = index_redundant[i] + 1
637 5bfac5a3 Florent Chuffart
			  }
638 6b5a528c Florent Chuffart
				to_remove_list = c(to_remove_list, to_remove)
639 6b5a528c Florent Chuffart
			}
640 6b5a528c Florent Chuffart
			common_nuc = common_nuc[-to_remove_list,]
641 6b5a528c Florent Chuffart
		}
642 5bfac5a3 Florent Chuffart
643 6b5a528c Florent Chuffart
		return(list(common_nuc, lod_scores))
644 6b5a528c Florent Chuffart
	} else {
645 6b5a528c Florent Chuffart
		print("WARNING, no nucs for strain_ref1.")
646 6b5a528c Florent Chuffart
		return(NULL)
647 6b5a528c Florent Chuffart
	}
648 6b5a528c Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed lod scores.
649 5bfac5a3 Florent Chuffart
}, ex=function(){
650 1d833b97 Florent Chuffart
651 4016229d Florent Chuffart
    # Define new translate_cur function...
652 4016229d Florent Chuffart
    translate_cur = function(roi, strain2, big_cur=NULL, config=NULL) {
653 1d833b97 Florent Chuffart
      return(roi)
654 1d833b97 Florent Chuffart
    }
655 1d833b97 Florent Chuffart
    # Binding it by uncomment follwing lines.
656 4016229d Florent Chuffart
    unlockBinding("translate_cur", as.environment("package:nucleominer"))
657 4016229d Florent Chuffart
    unlockBinding("translate_cur", getNamespace("nucleominer"))
658 4016229d Florent Chuffart
    assign("translate_cur", translate_cur, "package:nucleominer")
659 4016229d Florent Chuffart
    assign("translate_cur", translate_cur, getNamespace("nucleominer"))
660 4016229d Florent Chuffart
    lockBinding("translate_cur", getNamespace("nucleominer"))
661 4016229d Florent Chuffart
    lockBinding("translate_cur", as.environment("package:nucleominer"))
662 1d833b97 Florent Chuffart
663 6b5a528c Florent Chuffart
	# Dealing with a region of interest
664 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
665 4016229d Florent Chuffart
	roi2 = translate_cur(roi, roi$strain_ref1)
666 6b5a528c Florent Chuffart
	replicates = list()
667 6b5a528c Florent Chuffart
	for (j in 1:2) {
668 6b5a528c Florent Chuffart
		samples = list()
669 6b5a528c Florent Chuffart
		for (i in 1:3) {
670 6b5a528c Florent Chuffart
			# Create TF output
671 6b5a528c Florent Chuffart
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
672 6b5a528c Florent Chuffart
			outputs = dfadd(NULL,tf_nuc)
673 6b5a528c Florent Chuffart
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
674 6b5a528c Florent Chuffart
			# Generate corresponding reads
675 6b5a528c Florent Chuffart
			nb_reads = round(runif(1,170,230))
676 6b5a528c Florent Chuffart
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
677 6b5a528c Florent Chuffart
			u_reads = sort(unique(reads))
678 6b5a528c Florent Chuffart
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
679 6b5a528c Florent Chuffart
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
680 6b5a528c Florent Chuffart
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
681 6b5a528c Florent Chuffart
			u_reads = u_reads + shifts
682 55c1cdff Florent Chuffart
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
683 55c1cdff Florent Chuffart
			                         "V2" = u_reads,
684 55c1cdff Florent Chuffart
															 "V3" = strands,
685 6b5a528c Florent Chuffart
															 "V4" = counts), stringsAsFactors=FALSE)
686 6b5a528c Florent Chuffart
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
687 6b5a528c Florent Chuffart
		}
688 6b5a528c Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
689 6b5a528c Florent Chuffart
	}
690 6b5a528c Florent Chuffart
	print(align_inter_strain_nucs(replicates))
691 6b5a528c Florent Chuffart
})
692 6b5a528c Florent Chuffart
693 6b5a528c Florent Chuffart
694 6b5a528c Florent Chuffart
695 6b5a528c Florent Chuffart
696 6b5a528c Florent Chuffart
697 6b5a528c Florent Chuffart
698 6b5a528c Florent Chuffart
699 6b5a528c Florent Chuffart
700 6b5a528c Florent Chuffart
701 6b5a528c Florent Chuffart
702 6b5a528c Florent Chuffart
703 6b5a528c Florent Chuffart
704 6b5a528c Florent Chuffart
705 6b5a528c Florent Chuffart
706 6b5a528c Florent Chuffart
707 6b5a528c Florent Chuffart
708 6b5a528c Florent Chuffart
709 6b5a528c Florent Chuffart
710 6b5a528c Florent Chuffart
711 6b5a528c Florent Chuffart
712 6b5a528c Florent Chuffart
713 6b5a528c Florent Chuffart
714 6b5a528c Florent Chuffart
715 6b5a528c Florent Chuffart
716 6b5a528c Florent Chuffart
717 6b5a528c Florent Chuffart
718 6b5a528c Florent Chuffart
fetch_mnase_replicates = function(# Prefetch data
719 6b5a528c Florent Chuffart
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
720 6b5a528c Florent Chuffart
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
721 6b5a528c Florent Chuffart
roi, ##<< Region of interest.
722 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
723 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
724 6b5a528c Florent Chuffart
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
725 6b5a528c Florent Chuffart
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
726 6b5a528c Florent Chuffart
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
727 6b5a528c Florent Chuffart
) {
728 6b5a528c Florent Chuffart
	samples=list()
729 6b5a528c Florent Chuffart
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
730 6b5a528c Florent Chuffart
	for (i in samples_ids) {
731 6b5a528c Florent Chuffart
		sample = as.list(all_samples[all_samples$id==i,])
732 a0b91fee Florent Chuffart
    sample$orig_roi = roi
733 4016229d Florent Chuffart
    sample$roi = translate_cur(roi, sample$strain, config = config)
734 6b5a528c Florent Chuffart
		if (get_genome) {
735 6b5a528c Florent Chuffart
			# Get Genome
736 5bfac5a3 Florent Chuffart
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
737 6b5a528c Florent Chuffart
		}
738 6b5a528c Florent Chuffart
		# Get inputs
739 55c1cdff Florent Chuffart
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
740 5bfac5a3 Florent Chuffart
		sample$total_reads = sum(sample$inputs[,4])
741 6b5a528c Florent Chuffart
		if (!only_fetch) {
742 6b5a528c Florent Chuffart
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
743 6b5a528c Florent Chuffart
	  }
744 6b5a528c Florent Chuffart
	  # Get TF outputs for Mnase_Seq samples
745 5bfac5a3 Florent Chuffart
		if (sample$marker == "Mnase_Seq" & get_ouputs) {
746 a52f85d7 Florent Chuffart
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
747 6b5a528c Florent Chuffart
			if (!only_fetch) {
748 6b5a528c Florent Chuffart
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
749 6b5a528c Florent Chuffart
  		}
750 6b5a528c Florent Chuffart
		}
751 5bfac5a3 Florent Chuffart
		samples[[length(samples) + 1]] = sample
752 6b5a528c Florent Chuffart
	}
753 6b5a528c Florent Chuffart
  return(samples)
754 6b5a528c Florent Chuffart
}
755 6b5a528c Florent Chuffart
756 6b5a528c Florent Chuffart
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
757 6b5a528c Florent Chuffart
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
758 55c1cdff Florent Chuffart
region1, ##<< Original regions.
759 6b5a528c Florent Chuffart
region2 ##<< Regions to substract.
760 6b5a528c Florent Chuffart
) {
761 6b5a528c Florent Chuffart
  rec_substract_region = function(region1, region2) {
762 6b5a528c Florent Chuffart
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
763 6b5a528c Florent Chuffart
    cur_fuzzy = region1[i,]
764 6b5a528c Florent Chuffart
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
765 6b5a528c Florent Chuffart
    if (length(inter_wp[,1]) > 0) {
766 6b5a528c Florent Chuffart
      ret = c()
767 6b5a528c Florent Chuffart
      for (j in 1:length(inter_wp[,1])) {
768 6b5a528c Florent Chuffart
        cur_wp = inter_wp[j,]
769 6b5a528c Florent Chuffart
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
770 6b5a528c Florent Chuffart
          # remove cur_fuzzy
771 6b5a528c Florent Chuffart
          ret = c()
772 6b5a528c Florent Chuffart
          break
773 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
774 55c1cdff Florent Chuffart
          # crop fuzzy
775 55c1cdff Florent Chuffart
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1
776 6b5a528c Florent Chuffart
          ret = cur_fuzzy
777 6b5a528c Florent Chuffart
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
778 55c1cdff Florent Chuffart
          # crop fuzzy
779 55c1cdff Florent Chuffart
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1
780 6b5a528c Florent Chuffart
          ret = cur_fuzzy
781 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
782 6b5a528c Florent Chuffart
          # split fuzzy
783 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_1 = cur_fuzzy
784 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1
785 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_2 = cur_fuzzy
786 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1
787 6b5a528c Florent Chuffart
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
788 6b5a528c Florent Chuffart
          # print(ret)
789 6b5a528c Florent Chuffart
          # ret = cur_fuzzy
790 6b5a528c Florent Chuffart
          break
791 6b5a528c Florent Chuffart
        } else {
792 6b5a528c Florent Chuffart
          stop("WARNING NO ADAPTED CASE!")
793 55c1cdff Florent Chuffart
        }
794 6b5a528c Florent Chuffart
      }
795 6b5a528c Florent Chuffart
      return(ret)
796 6b5a528c Florent Chuffart
    } else {
797 6b5a528c Florent Chuffart
      return(cur_fuzzy)
798 6b5a528c Florent Chuffart
    }
799 6b5a528c Florent Chuffart
  })
800 6b5a528c Florent Chuffart
  }
801 21b8928f Florent Chuffart
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
802 6b5a528c Florent Chuffart
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
803 b20637ed Florent Chuffart
  tmp_ulist = unlist(non_inter_fuzzy)
804 b20637ed Florent Chuffart
  tmp_names = names(tmp_ulist)[1:4]
805 55c1cdff Florent Chuffart
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE)
806 b20637ed Florent Chuffart
  names(non_inter_fuzzy) = tmp_names
807 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)
808 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)
809 b20637ed Florent Chuffart
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
810 b20637ed Florent Chuffart
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
811 55c1cdff Florent Chuffart
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]
812 b20637ed Florent Chuffart
  return(non_inter_fuzzy)
813 6b5a528c Florent Chuffart
}
814 6b5a528c Florent Chuffart
815 6b5a528c Florent Chuffart
union_regions = function(# Aggregate regions that intersect themnselves.
816 55c1cdff Florent Chuffart
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
817 6b5a528c Florent Chuffart
regions ##<< The Regions to be aggregated
818 6b5a528c Florent Chuffart
) {
819 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {return(regions)}
820 0303dbe8 Florent Chuffart
  if (nrow(regions) == 0) {return(regions)}
821 6b5a528c Florent Chuffart
  old_length = length(regions[,1])
822 6b5a528c Florent Chuffart
  new_length = 0
823 55c1cdff Florent Chuffart
824 6b5a528c Florent Chuffart
  while (old_length != new_length) {
825 6b5a528c Florent Chuffart
    regions = regions[order(regions$lower_bound), ]
826 55c1cdff Florent Chuffart
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE)
827 6b5a528c Florent Chuffart
    vec_end_1 = which(regions$stop)
828 6b5a528c Florent Chuffart
    if (length(vec_end_1) == 0) {
829 55c1cdff Florent Chuffart
      vec_end_1 = c(length(regions$stop))
830 55c1cdff Florent Chuffart
    }
831 6b5a528c Florent Chuffart
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
832 6b5a528c Florent Chuffart
      vec_end_1 = c(vec_end_1, length(regions$stop))
833 6b5a528c Florent Chuffart
    }
834 6b5a528c Florent Chuffart
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
835 6b5a528c Florent Chuffart
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
836 6b5a528c Florent Chuffart
      chr = regions$chr[vec_beg_1[i]]
837 6b5a528c Florent Chuffart
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
838 6b5a528c Florent Chuffart
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
839 4016229d Florent Chuffart
      cur_index = regions$cur_index[vec_beg_1[i]]
840 4016229d Florent Chuffart
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, cur_index=cur_index))
841 6b5a528c Florent Chuffart
      })
842 59ad95ca Florent Chuffart
    union = collapse_regions(union)
843 6b5a528c Florent Chuffart
    old_length = length(regions[,1])
844 6b5a528c Florent Chuffart
    new_length = length(union[,1])
845 6b5a528c Florent Chuffart
    regions = union
846 6b5a528c Florent Chuffart
  }
847 6b5a528c Florent Chuffart
  return(union)
848 6b5a528c Florent Chuffart
}
849 6b5a528c Florent Chuffart
850 0303dbe8 Florent Chuffart
# remove_aligned_wp = function(# Remove wp nucs from common nucs list.
851 0303dbe8 Florent Chuffart
# ### It is based on common wp nucs index on nucs and region.
852 0303dbe8 Florent Chuffart
# strain_maps, ##<< Nuc maps.
853 4016229d Florent Chuffart
# cur_index, ##<< The region of interest index.
854 0303dbe8 Florent Chuffart
# tmp_common_nucs, ##<< the list of wp nucs.
855 0303dbe8 Florent Chuffart
# strain##<< The strain to consider.
856 0303dbe8 Florent Chuffart
# ){
857 0303dbe8 Florent Chuffart
#   fuzzy_nucs = strain_maps[[strain]]
858 4016229d Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$cur_index == cur_index,]
859 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
860 0303dbe8 Florent Chuffart
#   if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
861 0303dbe8 Florent Chuffart
#   if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
862 0303dbe8 Florent Chuffart
#   anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
863 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
864 0303dbe8 Florent Chuffart
#   return(fuzzy_nucs)
865 0303dbe8 Florent Chuffart
# }
866 6b5a528c Florent Chuffart
867 6b5a528c Florent Chuffart
translate_regions = function(# Translate a list of regions from a strain ref to another.
868 4016229d Florent Chuffart
### This function is an eloborated call to translate_cur.
869 55c1cdff Florent Chuffart
regions, ##<< Regions to be translated.
870 6b5a528c Florent Chuffart
combi, ##<< Combination of strains.
871 4016229d Florent Chuffart
cur_index, ##<< The region of interest index.
872 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
873 6b5a528c Florent Chuffart
roi ##<< The region of interest.
874 6b5a528c Florent Chuffart
) {
875 6b5a528c Florent Chuffart
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
876 6b5a528c Florent Chuffart
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
877 4016229d Florent Chuffart
    big_cur =  roi
878 4016229d Florent Chuffart
    trs_tmp_regions_ref2 = translate_cur(tmp_regions_ref2, combi[1], config = config, big_cur = big_cur)
879 5bfac5a3 Florent Chuffart
    if (is.null(trs_tmp_regions_ref2)) {
880 5bfac5a3 Florent Chuffart
      return(NULL)
881 5bfac5a3 Florent Chuffart
    } else {
882 4016229d Florent Chuffart
      return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), cur_index=cur_index)))
883 5bfac5a3 Florent Chuffart
    }
884 5bfac5a3 Florent Chuffart
  })
885 5bfac5a3 Florent Chuffart
886 59ad95ca Florent Chuffart
  return(collapse_regions(tr_regions))
887 59ad95ca Florent Chuffart
}
888 59ad95ca Florent Chuffart
889 59ad95ca Florent Chuffart
collapse_regions = function(# reformat an "apply  manipulated" list of regions
890 59ad95ca Florent Chuffart
### Utils to reformat an "apply  manipulated" list of regions
891 59ad95ca Florent Chuffart
regions ##< a list of regions
892 59ad95ca Florent Chuffart
) {
893 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {
894 5bfac5a3 Florent Chuffart
    return(NULL)
895 5bfac5a3 Florent Chuffart
  } else {
896 5bfac5a3 Florent Chuffart
    regions = do.call(rbind, regions)
897 5bfac5a3 Florent Chuffart
    regions$chr = as.character(regions$chr)
898 5bfac5a3 Florent Chuffart
    regions$chr = as.numeric(regions$chr)
899 5bfac5a3 Florent Chuffart
    regions$lower_bound = as.numeric(regions$lower_bound)
900 5bfac5a3 Florent Chuffart
    regions$upper_bound = as.numeric(regions$upper_bound)
901 5bfac5a3 Florent Chuffart
    regions = regions[order(regions$lower_bound),]
902 5bfac5a3 Florent Chuffart
    return(regions)
903 5bfac5a3 Florent Chuffart
  }
904 6b5a528c Florent Chuffart
}
905 6b5a528c Florent Chuffart
906 6b5a528c Florent Chuffart
907 6b5a528c Florent Chuffart
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
908 4016229d Florent Chuffart
### The fucntion is no more necessary since we remove "big_cur" bug in translate_cur function.
909 6b5a528c Florent Chuffart
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
910 6b5a528c Florent Chuffart
roi, ##<< The region of interest.
911 1d833b97 Florent Chuffart
strain, ##<< The strain to consider.
912 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
913 6b5a528c Florent Chuffart
) {
914 4016229d Florent Chuffart
  tr_roi = translate_cur(roi, strain, config = config)
915 6b5a528c Florent Chuffart
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
916 6b5a528c Florent Chuffart
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
917 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
918 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
919 6b5a528c Florent Chuffart
  }
920 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
921 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
922 6b5a528c Florent Chuffart
  }
923 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
924 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
925 6b5a528c Florent Chuffart
  }
926 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
927 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
928 6b5a528c Florent Chuffart
  }
929 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
930 6b5a528c Florent Chuffart
  return(tmp_fuzzy_nucs)
931 6b5a528c Florent Chuffart
}
932 6b5a528c Florent Chuffart
933 55c1cdff Florent Chuffart
get_all_reads = function(# Retrieve Reads
934 6b5a528c Florent Chuffart
### Retrieve reads for a given marker, combi, form.
935 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
936 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
937 cc54c799 Florent Chuffart
form="wp", ##<< The nuc form to considere.
938 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
939 6b5a528c Florent Chuffart
) {
940 6b5a528c Florent Chuffart
	all_reads = NULL
941 6b5a528c Florent Chuffart
  for (manip in c("Mnase_Seq", marker)) {
942 4016229d Florent Chuffart
    if (form == "unr") {
943 4016229d Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
944 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
945 6b5a528c Florent Chuffart
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
946 6b5a528c Florent Chuffart
      tmp_res$form = form
947 6b5a528c Florent Chuffart
    } else if (form == "wp") {
948 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
949 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
950 6b5a528c Florent Chuffart
      tmp_res$form = form
951 4016229d Florent Chuffart
    } else if (form == "wpunr") {
952 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
953 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
954 6b5a528c Florent Chuffart
      tmp_res$form = "wp"
955 4016229d Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
956 5bfac5a3 Florent Chuffart
  		tmp_res2 = read.table(file=out_filename, header=TRUE)
957 6b5a528c Florent Chuffart
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
958 4016229d Florent Chuffart
      tmp_res2$form = "unr"
959 6b5a528c Florent Chuffart
      tmp_res = rbind(tmp_res, tmp_res2)
960 6b5a528c Florent Chuffart
    }
961 6b5a528c Florent Chuffart
		if (is.null(all_reads)) {
962 6b5a528c Florent Chuffart
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
963 55c1cdff Florent Chuffart
		}
964 6b5a528c Florent Chuffart
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
965 6b5a528c Florent Chuffart
		all_reads = cbind(all_reads, tmp_res)
966 6b5a528c Florent Chuffart
  }
967 5bfac5a3 Florent Chuffart
  return(all_reads)
968 6b5a528c Florent Chuffart
}
969 6b5a528c Florent Chuffart
970 6b5a528c Florent Chuffart
get_design = function(# Build the design for deseq
971 6b5a528c Florent Chuffart
### This function build the design according sample properties.
972 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
973 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
974 6b5a528c Florent Chuffart
all_samples ##<< Global list of samples.
975 6b5a528c Florent Chuffart
) {
976 6b5a528c Florent Chuffart
  off1 = 0
977 6b5a528c Florent Chuffart
  off2 = 0
978 6b5a528c Florent Chuffart
	manips = c("Mnase_Seq", marker)
979 6b5a528c Florent Chuffart
	design_rownames = c()
980 6b5a528c Florent Chuffart
	design_manip = c()
981 6b5a528c Florent Chuffart
	design_strain = c()
982 5bfac5a3 Florent Chuffart
  off2index = function(off) {
983 55c1cdff Florent Chuffart
  	switch(toString(off),
984 6b5a528c Florent Chuffart
  		"1"=c(0,1,1),
985 6b5a528c Florent Chuffart
  	  "2"=c(1,0,1),
986 55c1cdff Florent Chuffart
    	"3"=c(1,1,0),
987 6b5a528c Florent Chuffart
  		c(1,1,1)
988 5bfac5a3 Florent Chuffart
  		)
989 6b5a528c Florent Chuffart
  }
990 6b5a528c Florent Chuffart
	for (manip in manips) {
991 6b5a528c Florent Chuffart
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
992 6b5a528c Florent Chuffart
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
993 6b5a528c Florent Chuffart
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
994 6b5a528c Florent Chuffart
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
995 6b5a528c Florent Chuffart
		} else {
996 6b5a528c Florent Chuffart
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
997 6b5a528c Florent Chuffart
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
998 6b5a528c Florent Chuffart
			}
999 6b5a528c Florent Chuffart
		}
1000 5bfac5a3 Florent Chuffart
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))
1001 5bfac5a3 Florent Chuffart
		for (strain in combi) {
1002 6b5a528c Florent Chuffart
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
1003 6b5a528c Florent Chuffart
			design_strain = c(design_strain, rep(strain, length(cols)))
1004 6b5a528c Florent Chuffart
			design_rownames = c(design_rownames, cols)
1005 6b5a528c Florent Chuffart
		}
1006 6b5a528c Florent Chuffart
	}
1007 6b5a528c Florent Chuffart
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
1008 5bfac5a3 Florent Chuffart
	return(snep_design)
1009 6b5a528c Florent Chuffart
}
1010 6b5a528c Florent Chuffart
1011 6b5a528c Florent Chuffart
plot_dist_samples = function(# Plot the distribution of reads.
1012 6b5a528c Florent Chuffart
### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
1013 6b5a528c Florent Chuffart
strain, ##<< The strain to considere.
1014 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
1015 6b5a528c Florent Chuffart
res, ##<< Data
1016 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
1017 6b5a528c Florent Chuffart
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
1018 5bfac5a3 Florent Chuffart
) {
1019 6b5a528c Florent Chuffart
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
1020 6b5a528c Florent Chuffart
	snepCountTable = res[,cols]
1021 6b5a528c Florent Chuffart
	snepDesign = data.frame(
1022 6b5a528c Florent Chuffart
		row.names = cols,
1023 6b5a528c Florent Chuffart
		manip = rep(marker, length(cols)),
1024 6b5a528c Florent Chuffart
		strain = rep(strain, length(cols))
1025 6b5a528c Florent Chuffart
		)
1026 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
1027 6b5a528c Florent Chuffart
	sizeFactors = estimateSizeFactors(cdsFull)
1028 6b5a528c Florent Chuffart
	# print(sizeFactors[[1]])
1029 6b5a528c Florent Chuffart
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
1030 6b5a528c Florent Chuffart
	if (NEWPLOT) {
1031 5bfac5a3 Florent Chuffart
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))
1032 6b5a528c Florent Chuffart
		NEWPLOT = FALSE
1033 6b5a528c Florent Chuffart
	}
1034 6b5a528c Florent Chuffart
	for (it in 1:length(sample_ids)) {
1035 6b5a528c Florent Chuffart
		sample_id = sample_ids[it]
1036 6b5a528c Florent Chuffart
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1037 6b5a528c Florent Chuffart
	}
1038 7164d3ac Florent Chuffart
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1039 6b5a528c Florent Chuffart
}
1040 6b5a528c Florent Chuffart
1041 6b5a528c Florent Chuffart
analyse_design = function(# Launch deseq methods.
1042 6b5a528c Florent Chuffart
### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
1043 6b5a528c Florent Chuffart
snep_design, ##<< The design to considere.
1044 55c1cdff Florent Chuffart
reads ##<< The data to considere.
1045 6b5a528c Florent Chuffart
) {
1046 6b5a528c Florent Chuffart
	snep_count_table = reads[, rownames(snep_design)]
1047 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1048 6b5a528c Florent Chuffart
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1049 6b5a528c Florent Chuffart
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1050 6b5a528c Florent Chuffart
1051 6b5a528c Florent Chuffart
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1052 6b5a528c Florent Chuffart
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1053 6b5a528c Florent Chuffart
1054 6b5a528c Florent Chuffart
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1055 5bfac5a3 Florent Chuffart
	return(list(fit1, fit0, snep_design, pvalsGLM))
1056 6b5a528c Florent Chuffart
}
1057 6b5a528c Florent Chuffart
1058 6b5a528c Florent Chuffart
1059 6b5a528c Florent Chuffart
1060 6b5a528c Florent Chuffart
1061 6b5a528c Florent Chuffart
1062 6b5a528c Florent Chuffart
1063 6b5a528c Florent Chuffart
1064 6b5a528c Florent Chuffart
1065 6b5a528c Florent Chuffart
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1066 55c1cdff Florent Chuffart
### This function uses
1067 6b5a528c Florent Chuffart
marker, ##<< The marker involved.
1068 6b5a528c Florent Chuffart
combi, ##<< The strain combination involved.
1069 55c1cdff Florent Chuffart
form, ##<< the nuc form involved.
1070 cc54c799 Florent Chuffart
all_samples, ##<< Global list of samples.
1071 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1072 6b5a528c Florent Chuffart
) {
1073 6b5a528c Florent Chuffart
  # PRETREAT
1074 4016229d Florent Chuffart
  snep_design = get_design(marker, combi, all_samples)
1075 cc54c799 Florent Chuffart
  reads = get_all_reads(marker, combi, form, config=config)
1076 6b5a528c Florent Chuffart
  # RUN ANALYSE
1077 4016229d Florent Chuffart
  tmp_analyse = analyse_design(snep_design, reads)
1078 6b5a528c Florent Chuffart
  # RESULTS
1079 6b5a528c Florent Chuffart
	fit1 = tmp_analyse[[1]]
1080 6b5a528c Florent Chuffart
	fit0 = tmp_analyse[[2]]
1081 6b5a528c Florent Chuffart
  k = names(fit1)
1082 6b5a528c Florent Chuffart
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1083 6b5a528c Florent Chuffart
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1084 6b5a528c Florent Chuffart
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1085 6b5a528c Florent Chuffart
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1086 6b5a528c Florent Chuffart
	snep_design = tmp_analyse[[3]]
1087 4016229d Florent Chuffart
  # print(snep_design)
1088 6b5a528c Florent Chuffart
	fdr = 0.0001
1089 55c1cdff Florent Chuffart
	thres = FDR(reads$pvalsGLM, fdr)
1090 6b5a528c Florent Chuffart
	reads$snep_index = reads$pvalsGLM < thres
1091 6b5a528c Florent Chuffart
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = ""))
1092 6b5a528c Florent Chuffart
  return(reads)
1093 5bfac5a3 Florent Chuffart
  },  ex=function(){
1094 6b5a528c Florent Chuffart
    marker = "H3K4me1"
1095 55c1cdff Florent Chuffart
    combi = c("BY", "YJM")
1096 4016229d Florent Chuffart
    form = "wpunr" # "wp" | "unr" | "wpunr"
1097 6b5a528c Florent Chuffart
    # foo = get_sneps(marker, combi, form)
1098 6b5a528c Florent Chuffart
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1099 6b5a528c Florent Chuffart
})
1100 6b5a528c Florent Chuffart
1101 6b5a528c Florent Chuffart
1102 6b5a528c Florent Chuffart
1103 6b5a528c Florent Chuffart
1104 6b5a528c Florent Chuffart
1105 6b5a528c Florent Chuffart
1106 6b5a528c Florent Chuffart
1107 6b5a528c Florent Chuffart
1108 6b5a528c Florent Chuffart
1109 6b5a528c Florent Chuffart
1110 6b5a528c Florent Chuffart
1111 6b5a528c Florent Chuffart
1112 6b5a528c Florent Chuffart
1113 6b5a528c Florent Chuffart
1114 6b5a528c Florent Chuffart
1115 6b5a528c Florent Chuffart
1116 6b5a528c Florent Chuffart
1117 6b5a528c Florent Chuffart
1118 6b5a528c Florent Chuffart
1119 6b5a528c Florent Chuffart
1120 6b5a528c Florent Chuffart
1121 6b5a528c Florent Chuffart
1122 6b5a528c Florent Chuffart
1123 6b5a528c Florent Chuffart
1124 1d833b97 Florent Chuffart
ROM2ARAB = function(# Roman to Arabic pair list.
1125 1d833b97 Florent Chuffart
### Util to convert Roman to Arabic
1126 1d833b97 Florent Chuffart
){list(
1127 1d833b97 Florent Chuffart
  "I" = 1,
1128 1d833b97 Florent Chuffart
  "II" = 2,
1129 1d833b97 Florent Chuffart
  "III" = 3,
1130 1d833b97 Florent Chuffart
  "IV" = 4,
1131 1d833b97 Florent Chuffart
  "V" = 5,
1132 1d833b97 Florent Chuffart
  "VI" = 6,
1133 1d833b97 Florent Chuffart
  "VII" = 7,
1134 1d833b97 Florent Chuffart
  "VIII" = 8,
1135 1d833b97 Florent Chuffart
  "IX" = 9,
1136 1d833b97 Florent Chuffart
  "X" = 10,
1137 1d833b97 Florent Chuffart
  "XI" = 11,
1138 1d833b97 Florent Chuffart
  "XII" = 12,
1139 1d833b97 Florent Chuffart
  "XIII" = 13,
1140 1d833b97 Florent Chuffart
  "XIV" = 14,
1141 1d833b97 Florent Chuffart
  "XV" = 15,
1142 1d833b97 Florent Chuffart
  "XVI" = 16,
1143 1d833b97 Florent Chuffart
  "XVII" = 17,
1144 1d833b97 Florent Chuffart
  "XVIII" = 18,
1145 1d833b97 Florent Chuffart
  "XIX" = 19,
1146 1d833b97 Florent Chuffart
  "XX" = 20
1147 1d833b97 Florent Chuffart
)}
1148 1d833b97 Florent Chuffart
1149 1d833b97 Florent Chuffart
switch_pairlist = structure(function(# Switch a pairlist
1150 1d833b97 Florent Chuffart
### Take a pairlist key:value and return the switched pairlist value:key.
1151 55c1cdff Florent Chuffart
l ##<< The pairlist to switch.
1152 1d833b97 Florent Chuffart
) {
1153 1d833b97 Florent Chuffart
	ret = list()
1154 1d833b97 Florent Chuffart
	for (name in names(l)) {
1155 1d833b97 Florent Chuffart
		ret[[as.character(l[[name]])]] = name
1156 1d833b97 Florent Chuffart
	}
1157 1d833b97 Florent Chuffart
	ret
1158 1d833b97 Florent Chuffart
### The switched pairlist.
1159 1d833b97 Florent Chuffart
}, ex=function(){
1160 1d833b97 Florent Chuffart
	l = list(key1 = "value1", key2 = "value2")
1161 1d833b97 Florent Chuffart
	print(switch_pairlist(l))
1162 1d833b97 Florent Chuffart
})
1163 1d833b97 Florent Chuffart
1164 1d833b97 Florent Chuffart
ARAB2ROM = function(# Arabic to Roman pair list.
1165 1d833b97 Florent Chuffart
### Util to convert Arabicto Roman
1166 1d833b97 Florent Chuffart
){switch_pairlist(ROM2ARAB())}
1167 1d833b97 Florent Chuffart
1168 1d833b97 Florent Chuffart
1169 1d833b97 Florent Chuffart
1170 1d833b97 Florent Chuffart
1171 5bfac5a3 Florent Chuffart
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1172 5bfac5a3 Florent Chuffart
### This fonction allow to acces to a specific part of the c2c file.
1173 5bfac5a3 Florent Chuffart
strain1, ##<< the key strain
1174 5bfac5a3 Florent Chuffart
strain2, ##<< the target strain
1175 5bfac5a3 Florent Chuffart
chr=NULL, ##<< if defined, the c2c will filtered according to the chromosome value
1176 5bfac5a3 Florent Chuffart
lower_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome upper than lower_bound
1177 5bfac5a3 Florent Chuffart
upper_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome lower than upper_bound
1178 5bfac5a3 Florent Chuffart
config=NULL##<<  GLOBAL config variable
1179 1d833b97 Florent Chuffart
) {
1180 1d833b97 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1181 1d833b97 Florent Chuffart
	# Launch c2c file
1182 1d833b97 Florent Chuffart
	if (reverse) {
1183 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]
1184 1d833b97 Florent Chuffart
	} else {
1185 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]
1186 1d833b97 Florent Chuffart
	}
1187 5bfac5a3 Florent Chuffart
	c2c = get_content(c2c_filename, "table", stringsAsFactors=FALSE)
1188 5bfac5a3 Florent Chuffart
  # Filtering unagapped
1189 1d833b97 Florent Chuffart
  c2c = c2c[c2c$V6=="-",]
1190 1d833b97 Florent Chuffart
	# Reverse
1191 1d833b97 Florent Chuffart
	if (reverse) {
1192 1d833b97 Florent Chuffart
		tmp_col = c2c$V1
1193 1d833b97 Florent Chuffart
		c2c$V1 = c2c$V7
1194 1d833b97 Florent Chuffart
		c2c$V7 = tmp_col
1195 1d833b97 Florent Chuffart
		tmp_col = c2c$V2
1196 1d833b97 Florent Chuffart
		c2c$V2 = c2c$V9
1197 1d833b97 Florent Chuffart
		c2c$V9 = tmp_col
1198 1d833b97 Florent Chuffart
		tmp_col = c2c$V3
1199 1d833b97 Florent Chuffart
		c2c$V3 = c2c$V10
1200 1d833b97 Florent Chuffart
		c2c$V10 = tmp_col
1201 1d833b97 Florent Chuffart
	}
1202 5bfac5a3 Florent Chuffart
  if (!is.null(chr)) {
1203 5bfac5a3 Florent Chuffart
  	if (strain1 == "BY") {
1204 5bfac5a3 Florent Chuffart
  		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1205 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "RM") {
1206 5bfac5a3 Florent Chuffart
  	  chro_1 = paste("supercontig_1.",chr,sep="")
1207 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "YJM") {
1208 5bfac5a3 Florent Chuffart
  	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1209 5bfac5a3 Florent Chuffart
  	}
1210 5bfac5a3 Florent Chuffart
  	c2c = c2c[c2c$V1 == chro_1,]
1211 5bfac5a3 Florent Chuffart
    if (!is.null(lower_bound)) {
1212 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound}
1213 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound}      
1214 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1215 5bfac5a3 Florent Chuffart
    }
1216 5bfac5a3 Florent Chuffart
    if (!is.null(upper_bound)) {
1217 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound}
1218 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound}      
1219 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1220 5bfac5a3 Florent Chuffart
    }
1221 5bfac5a3 Florent Chuffart
  }
1222 5bfac5a3 Florent Chuffart
  return(c2c)
1223 5bfac5a3 Florent Chuffart
# It retruns the appropriate c2c file part.
1224 5bfac5a3 Florent Chuffart
}
1225 5bfac5a3 Florent Chuffart
1226 4016229d Florent Chuffart
translate_cur = structure(function(# Translate coords of a genome region.
1227 5bfac5a3 Florent Chuffart
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1228 5bfac5a3 Florent Chuffart
roi, ##<< Original genome region of interest.
1229 5bfac5a3 Florent Chuffart
strain2, ##<< The strain in wich you want the genome region of interest.
1230 5bfac5a3 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
1231 4016229d Florent Chuffart
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1232 5bfac5a3 Florent Chuffart
) {
1233 5bfac5a3 Florent Chuffart
	strain1 = roi$strain_ref
1234 5bfac5a3 Florent Chuffart
	if (strain1 == strain2) {
1235 5bfac5a3 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1236 5bfac5a3 Florent Chuffart
		return(roi)
1237 5bfac5a3 Florent Chuffart
	}
1238 5bfac5a3 Florent Chuffart
1239 5bfac5a3 Florent Chuffart
	# Extract c2c file
1240 4016229d Florent Chuffart
	if (!is.null(big_cur)) {
1241 4016229d Florent Chuffart
  	# Dealing with big_cur
1242 4016229d Florent Chuffart
		if (roi$strain_ref != big_cur$strain_ref) {
1243 4016229d Florent Chuffart
      big_cur = translate_cur(big_cur, roi$strain_ref, config=config)
1244 62d69d53 Florent Chuffart
    }
1245 4016229d Florent Chuffart
    if (big_cur$end < big_cur$begin) {
1246 4016229d Florent Chuffart
      tmp_var = big_cur$begin
1247 4016229d Florent Chuffart
      big_cur$begin = big_cur$end
1248 4016229d Florent Chuffart
      big_cur$end = tmp_var
1249 4016229d Florent Chuffart
      big_cur$length = big_cur$end - big_cur$begin + 1
1250 55c1cdff Florent Chuffart
    }
1251 4016229d Florent Chuffart
    if (big_cur$chr!=roi$chr | roi$end > big_cur$end | roi$end < big_cur$begin | roi$begin > big_cur$end | roi$begin < big_cur$begin) {
1252 4016229d Florent Chuffart
      print("WARNING! Trying to translate a roi not included in a big_cur.")
1253 ded736c8 Florent Chuffart
      return(NULL)
1254 ded736c8 Florent Chuffart
    }
1255 4016229d Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, big_cur$chr, big_cur$begin, big_cur$end, config=config)
1256 5bfac5a3 Florent Chuffart
  } else {
1257 4016229d Florent Chuffart
    # No big_cur
1258 5bfac5a3 Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, roi$chr, config=config)    
1259 5bfac5a3 Florent Chuffart
  }
1260 5bfac5a3 Florent Chuffart
  
1261 1d833b97 Florent Chuffart
  #	Convert initial roi$chr into c2c format
1262 5bfac5a3 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1263 1d833b97 Florent Chuffart
	begin_1 = roi$begin
1264 1d833b97 Florent Chuffart
  end_1 = roi$end
1265 5bfac5a3 Florent Chuffart
  if (reverse) {
1266 5bfac5a3 Florent Chuffart
  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1267 5bfac5a3 Florent Chuffart
    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1268 1d833b97 Florent Chuffart
	} else {
1269 5bfac5a3 Florent Chuffart
		tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1270 5bfac5a3 Florent Chuffart
	  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1271 1d833b97 Florent Chuffart
	}
1272 5bfac5a3 Florent Chuffart
1273 5bfac5a3 Florent Chuffart
	# Never happend conditions ...
1274 1d833b97 Florent Chuffart
	{
1275 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) == 0) {
1276 1d833b97 Florent Chuffart
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1277 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1278 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2>=begin_1,]
1279 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V2)
1280 54cc8283 Florent Chuffart
      } else {
1281 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3>=begin_1,]
1282 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V3)
1283 54cc8283 Florent Chuffart
      }
1284 1d833b97 Florent Chuffart
			if (reverse) {
1285 5bfac5a3 Florent Chuffart
		  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1286 1d833b97 Florent Chuffart
			} else {
1287 5bfac5a3 Florent Chuffart
				tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1288 1d833b97 Florent Chuffart
			}
1289 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) == 0) {
1290 4016229d Florent Chuffart
				if (!is.null(big_cur)) {
1291 1d833b97 Florent Chuffart
					return(NULL)
1292 4016229d Florent Chuffart
					tmptransfostart = c2c[c2c$V3>=big_cur$begin & c2c$V2<=big_cur$begin,]
1293 55c1cdff Florent Chuffart
				} else {
1294 1d833b97 Florent Chuffart
					print(tmptransfostart)
1295 1d833b97 Florent Chuffart
					print(tmptransfostop)
1296 5bfac5a3 Florent Chuffart
					stop("Never happend condition 1.")
1297 1d833b97 Florent Chuffart
				}
1298 1d833b97 Florent Chuffart
			}
1299 55c1cdff Florent Chuffart
		}
1300 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) == 0) {
1301 1d833b97 Florent Chuffart
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1302 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1303 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3<=end_1,]
1304 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V3)
1305 54cc8283 Florent Chuffart
      } else {
1306 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2<=end_1,]
1307 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V2)
1308 54cc8283 Florent Chuffart
      }
1309 1d833b97 Florent Chuffart
			if (reverse) {
1310 5bfac5a3 Florent Chuffart
		    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1311 1d833b97 Florent Chuffart
			} else {
1312 5bfac5a3 Florent Chuffart
			  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1313 1d833b97 Florent Chuffart
			}
1314 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) == 0) {
1315 4016229d Florent Chuffart
				if (!is.null(big_cur)) {
1316 1d833b97 Florent Chuffart
					return(NULL)
1317 4016229d Florent Chuffart
				  tmptransfostop = c2c[c2c$V3>=big_cur$end & c2c$V2<=big_cur$end,]
1318 1d833b97 Florent Chuffart
				} else {
1319 1d833b97 Florent Chuffart
					print(tmptransfostart)
1320 1d833b97 Florent Chuffart
					print(tmptransfostop)
1321 1d833b97 Florent Chuffart
					stop("Never happend condition 2.")
1322 55c1cdff Florent Chuffart
				}
1323 55c1cdff Florent Chuffart
			}
1324 1d833b97 Florent Chuffart
		}
1325 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) != 1) {
1326 1d833b97 Florent Chuffart
			# print("many start")
1327 5bfac5a3 Florent Chuffart
			tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1328 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) != 1) {
1329 1d833b97 Florent Chuffart
				print(tmptransfostart)
1330 1d833b97 Florent Chuffart
				print(tmptransfostop)
1331 1d833b97 Florent Chuffart
  			stop("Never happend condition 3.")
1332 1d833b97 Florent Chuffart
			}
1333 1d833b97 Florent Chuffart
		}
1334 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) != 1) {
1335 1d833b97 Florent Chuffart
			# print("many stop")
1336 5bfac5a3 Florent Chuffart
		  tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1337 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) != 1) {
1338 1d833b97 Florent Chuffart
				print(tmptransfostart)
1339 1d833b97 Florent Chuffart
				print(tmptransfostop)
1340 1d833b97 Florent Chuffart
  			stop("Never happend condition 4.")
1341 1d833b97 Florent Chuffart
			}
1342 5bfac5a3 Florent Chuffart
		}
1343 1d833b97 Florent Chuffart
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1344 1d833b97 Florent Chuffart
			print(tmptransfostart)
1345 5bfac5a3 Florent Chuffart
			print(tmptransfostop)
1346 1d833b97 Florent Chuffart
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1347 1d833b97 Florent Chuffart
		}
1348 55c1cdff Florent Chuffart
	}
1349 5bfac5a3 Florent Chuffart
  
1350 1d833b97 Florent Chuffart
  # Deal with strand
1351 1d833b97 Florent Chuffart
  if (tmptransfostart$V8 == 1) {
1352 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1353 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1354 1d833b97 Florent Chuffart
  } else {
1355 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1356 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1357 1d833b97 Florent Chuffart
  }
1358 1d833b97 Florent Chuffart
	# Build returned roi
1359 1d833b97 Florent Chuffart
	roi$strain_ref = strain2
1360 1d833b97 Florent Chuffart
	if (roi$strain_ref == "BY") {
1361 1d833b97 Florent Chuffart
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1362 1d833b97 Florent Chuffart
	} else {
1363 1d833b97 Florent Chuffart
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1364 1d833b97 Florent Chuffart
	}
1365 1d833b97 Florent Chuffart
  roi$begin = begin_2
1366 1d833b97 Florent Chuffart
  roi$end = end_2
1367 1d833b97 Florent Chuffart
	if (sign(roi$end - roi$begin) == 0) {
1368 5bfac5a3 Florent Chuffart
		roi$length = 1
1369 5bfac5a3 Florent Chuffart
	} else {
1370 5bfac5a3 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1371 1d833b97 Florent Chuffart
	}
1372 55c1cdff Florent Chuffart
  return(roi)
1373 1d833b97 Florent Chuffart
}, ex=function(){
1374 4016229d Florent Chuffart
	# Define new translate_cur function...
1375 4016229d Florent Chuffart
	translate_cur = function(roi, strain2, config) {
1376 1d833b97 Florent Chuffart
		strain1 = roi$strain_ref
1377 1d833b97 Florent Chuffart
		if (strain1 == strain2) {
1378 1d833b97 Florent Chuffart
			return(roi)
1379 1d833b97 Florent Chuffart
		} else {
1380 4016229d Florent Chuffart
		  stop("Here is my new translate_cur function...")
1381 5bfac5a3 Florent Chuffart
		}
1382 1d833b97 Florent Chuffart
	}
1383 1d833b97 Florent Chuffart
	# Binding it by uncomment follwing lines.
1384 4016229d Florent Chuffart
	# unlockBinding("translate_cur", as.environment("package:nm"))
1385 4016229d Florent Chuffart
	# unlockBinding("translate_cur", getNamespace("nm"))
1386 4016229d Florent Chuffart
	# assign("translate_cur", translate_cur, "package:nm")
1387 4016229d Florent Chuffart
	# assign("translate_cur", translate_cur, getNamespace("nm"))
1388 4016229d Florent Chuffart
	# lockBinding("translate_cur", getNamespace("nm"))
1389 4016229d Florent Chuffart
	# lockBinding("translate_cur", as.environment("package:nm"))
1390 1d833b97 Florent Chuffart
})
1391 1d833b97 Florent Chuffart
1392 1d833b97 Florent Chuffart
1393 5bfac5a3 Florent Chuffart
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1394 5bfac5a3 Florent Chuffart
### CURs are regions that can be aligned between the genomes
1395 5bfac5a3 Florent Chuffart
diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
1396 5bfac5a3 Florent Chuffart
min_cur_width = 4000, ##<< The minimum width of a CUR
1397 5bfac5a3 Florent Chuffart
config = NULL ##<< GLOBAL config variable
1398 5bfac5a3 Florent Chuffart
) {
1399 1d833b97 Florent Chuffart
1400 5bfac5a3 Florent Chuffart
  check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
1401 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1402 5bfac5a3 Florent Chuffart
    check_homogeneity(c2c)
1403 5bfac5a3 Florent Chuffart
  	if (length(c2c[,1]) == 0 ) {
1404 5bfac5a3 Florent Chuffart
      stop("WARNING! checking overlapping for a region corresponding to an empty c2c.")
1405 5bfac5a3 Florent Chuffart
    } else {
1406 5bfac5a3 Florent Chuffart
  		lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)})
1407 5bfac5a3 Florent Chuffart
  		upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)})
1408 5bfac5a3 Florent Chuffart
  		tmp_index = order(lower_bounds)
1409 5bfac5a3 Florent Chuffart
      lower_bounds = lower_bounds[tmp_index]
1410 5bfac5a3 Florent Chuffart
      upper_bounds = upper_bounds[tmp_index]
1411 5bfac5a3 Florent Chuffart
      tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)]
1412 5bfac5a3 Florent Chuffart
      ov_index = which(tmp_diff < 0)
1413 5bfac5a3 Florent Chuffart
      if(length(ov_index < 0) !=0 ) {
1414 5bfac5a3 Florent Chuffart
        ov_index = ov_index[1]
1415 5bfac5a3 Florent Chuffart
        print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep=""))
1416 5bfac5a3 Florent Chuffart
        c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),]
1417 5bfac5a3 Florent Chuffart
        print(c2c_corrupted)
1418 5bfac5a3 Florent Chuffart
        return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1))
1419 5bfac5a3 Florent Chuffart
      }
1420 5bfac5a3 Florent Chuffart
      return(NULL)
1421 5bfac5a3 Florent Chuffart
    }
1422 5bfac5a3 Florent Chuffart
  }
1423 1d833b97 Florent Chuffart
1424 5bfac5a3 Florent Chuffart
  check_homogeneity = function(sub_c2c) {
1425 5bfac5a3 Florent Chuffart
    tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3)
1426 5bfac5a3 Florent Chuffart
    tmp_signs = tmp_signs[tmp_signs != 0]
1427 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs[1]  != tmp_signs)) {
1428 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign)! ********************"))
1429 5bfac5a3 Florent Chuffart
      print(tmp_signs)
1430 5bfac5a3 Florent Chuffart
  	}
1431 5bfac5a3 Florent Chuffart
    tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10)
1432 5bfac5a3 Florent Chuffart
    tmp_signs2 = tmp_signs2[tmp_signs2 != 0]
1433 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs2[1]  != tmp_signs2)) {
1434 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign2)! ********************"))
1435 5bfac5a3 Florent Chuffart
      print(tmp_signs2)
1436 5bfac5a3 Florent Chuffart
  	}
1437 5bfac5a3 Florent Chuffart
  	if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1438 5bfac5a3 Florent Chuffart
  		print("*************** ERROR, non homogenous region chrs or V8! ********************")
1439 5bfac5a3 Florent Chuffart
  	}
1440 5bfac5a3 Florent Chuffart
  }
1441 1d833b97 Florent Chuffart
1442 5bfac5a3 Florent Chuffart
  test_and_squeeze_rois = function(foo, config=NULL) {
1443 5bfac5a3 Florent Chuffart
    is_it_ok = function(list1, list2) {
1444 5bfac5a3 Florent Chuffart
      bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length))
1445 5bfac5a3 Florent Chuffart
      ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0
1446 5bfac5a3 Florent Chuffart
      if (!ok) {
1447 5bfac5a3 Florent Chuffart
        print(bar[bar[,3] != 0 | bar[,6] != 0, ])
1448 5bfac5a3 Florent Chuffart
      }
1449 5bfac5a3 Florent Chuffart
      return (ok)
1450 5bfac5a3 Florent Chuffart
    }
1451 5bfac5a3 Florent Chuffart
    squeeze_rois = function(list1, list2) {
1452 5bfac5a3 Florent Chuffart
      rois = apply(t(1:nrow(list1)), 2, function(i){
1453 5bfac5a3 Florent Chuffart
        roi = list1[i,]
1454 5bfac5a3 Florent Chuffart
        roi2 = list2[i,]
1455 5bfac5a3 Florent Chuffart
        roi$begin = max(roi$begin, roi2$begin)
1456 5bfac5a3 Florent Chuffart
        roi$end = min(roi$end, roi2$end)
1457 5bfac5a3 Florent Chuffart
        roi$length =  roi$end - roi$begin + 1
1458 5bfac5a3 Florent Chuffart
        return(roi)
1459 5bfac5a3 Florent Chuffart
      })
1460 5bfac5a3 Florent Chuffart
      return(do.call(rbind, rois))
1461 5bfac5a3 Florent Chuffart
    }
1462 5bfac5a3 Florent Chuffart
    # foo_orig = compute_inter_all_strain_curs2(config=config)
1463 5bfac5a3 Florent Chuffart
    # foo = foo_orig
1464 5bfac5a3 Florent Chuffart
    STOP = FALSE
1465 5bfac5a3 Florent Chuffart
    nb_round = 0
1466 5bfac5a3 Florent Chuffart
    while(!STOP) {
1467 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1468 5bfac5a3 Florent Chuffart
      print(paste("2-2 round #", nb_round, sep=""))
1469 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1470 4016229d Florent Chuffart
      fooyjm = translate_curs(foo, "YJM", config=config)
1471 4016229d Florent Chuffart
      fooyjmby = translate_curs(fooyjm, "BY", config=config)
1472 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, fooyjmby)) {
1473 5bfac5a3 Florent Chuffart
        print("case 1")
1474 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, fooyjmby)
1475 5bfac5a3 Florent Chuffart
    		next
1476 5bfac5a3 Florent Chuffart
      }
1477 4016229d Florent Chuffart
      foorm = translate_curs(foo, "RM", config=config)
1478 4016229d Florent Chuffart
      foormby = translate_curs(foorm, "BY", config=config)
1479 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foormby)) {
1480 5bfac5a3 Florent Chuffart
        print("case 2")
1481 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foormby)
1482 5bfac5a3 Florent Chuffart
    		next
1483 5bfac5a3 Florent Chuffart
      }
1484 4016229d Florent Chuffart
      fooyjmrm = translate_curs(fooyjm, "RM", config=config)
1485 4016229d Florent Chuffart
      fooyjmrmyjm = translate_curs(fooyjmrm, "YJM", config=config)
1486 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooyjm, fooyjmrmyjm)) {
1487 5bfac5a3 Florent Chuffart
        print("case 3")
1488 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooyjm, fooyjmrmyjm)
1489 5bfac5a3 Florent Chuffart
        next
1490 5bfac5a3 Florent Chuffart
      }
1491 4016229d Florent Chuffart
      foormyjm = translate_curs(foorm, "YJM", config=config)
1492 4016229d Florent Chuffart
      foormyjmrm = translate_curs(foormyjm, "RM", config=config)
1493 5bfac5a3 Florent Chuffart
      if (!is_it_ok(foorm, foormyjmrm)) {
1494 5bfac5a3 Florent Chuffart
        print("case 4")
1495 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(foorm, foormyjmrm)
1496 5bfac5a3 Florent Chuffart
        next
1497 5bfac5a3 Florent Chuffart
      }
1498 4016229d Florent Chuffart
      foo = translate_curs(foo, "BY", config=config)
1499 5bfac5a3 Florent Chuffart
      STOP = TRUE
1500 5bfac5a3 Florent Chuffart
    }
1501 5bfac5a3 Florent Chuffart
    STOP = FALSE
1502 5bfac5a3 Florent Chuffart
    nb_round = 0
1503 5bfac5a3 Florent Chuffart
    while(!STOP) {
1504 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1505 5bfac5a3 Florent Chuffart
      print(paste("3-3 round #", nb_round, sep=""))
1506 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1507 4016229d Florent Chuffart
      foobyrm = translate_curs(fooby, "RM", config=config)
1508 4016229d Florent Chuffart
      foobyrmyjm = translate_curs(foobyrm, "YJM", config=config)
1509 4016229d Florent Chuffart
      foobyrmyjmby = translate_curs(foobyrmyjm, "BY", config=config)
1510 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyrmyjmby)) {
1511 5bfac5a3 Florent Chuffart
        print("case 1")
1512 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyrmyjmby)
1513 5bfac5a3 Florent Chuffart
      }
1514 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1515 4016229d Florent Chuffart
      foobyyjm = translate_curs(fooby, "YJM", config=config)
1516 4016229d Florent Chuffart
      foobyyjmrm = translate_curs(foobyyjm, "RM", config=config)
1517 4016229d Florent Chuffart
      foobyyjmrmby = translate_curs(foobyyjmrm, "BY", config=config)
1518 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyyjmrmby)) {
1519 5bfac5a3 Florent Chuffart
        print("case 2")
1520 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyyjmrmby)
1521 5bfac5a3 Florent Chuffart
        next
1522 5bfac5a3 Florent Chuffart
      }
1523 4016229d Florent Chuffart
      foo = translate_curs(foo, "BY", config=config)
1524 5bfac5a3 Florent Chuffart
      STOP = TRUE
1525 5bfac5a3 Florent Chuffart
    }
1526 5bfac5a3 Florent Chuffart
    print("end")
1527 5bfac5a3 Florent Chuffart
    return(foo)
1528 5bfac5a3 Florent Chuffart
  }
1529 1d833b97 Florent Chuffart
1530 5bfac5a3 Florent Chuffart
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) {
1531 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, config=config)
1532 5bfac5a3 Florent Chuffart
    # computing diffs
1533 55c1cdff Florent Chuffart
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
1534 55c1cdff Florent Chuffart
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
1535 1d833b97 Florent Chuffart
    # Filtering
1536 1d833b97 Florent Chuffart
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1537 1d833b97 Florent Chuffart
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1538 5bfac5a3 Florent Chuffart
    rois = apply(t(1:length(indexes_start)), 2, function(i) {
1539 5bfac5a3 Florent Chuffart
      if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start)))
1540 5bfac5a3 Florent Chuffart
      returned_rois = NULL
1541 1d833b97 Florent Chuffart
  		start = indexes_start[i]
1542 1d833b97 Florent Chuffart
  		stop = indexes_stop[i]
1543 1d833b97 Florent Chuffart
  		sub_c2c = c2c[start:stop,]
1544 5bfac5a3 Florent Chuffart
      check_homogeneity(sub_c2c)
1545 1d833b97 Florent Chuffart
  		if (strain1 == "BY") {
1546 1d833b97 Florent Chuffart
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1547 5bfac5a3 Florent Chuffart
  		} else {
1548 1d833b97 Florent Chuffart
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1549 1d833b97 Florent Chuffart
  		}
1550 5bfac5a3 Florent Chuffart
  		roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1)
1551 5bfac5a3 Florent Chuffart
  		roi$length = roi$end - roi$begin + 1
1552 1d833b97 Florent Chuffart
  		if (roi$length >= min_cur_width) {
1553 5bfac5a3 Florent Chuffart
  			lower_bound = roi$begin
1554 5bfac5a3 Florent Chuffart
  			upper_bound = roi$end
1555 5bfac5a3 Florent Chuffart
        check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1556 5bfac5a3 Florent Chuffart
        while(!is.null(check)) {
1557 5bfac5a3 Florent Chuffart
          # print(check)
1558 5bfac5a3 Florent Chuffart
      		roi1 = roi
1559 5bfac5a3 Florent Chuffart
          roi1$end = check[[1]]
1560 5bfac5a3 Florent Chuffart
      		roi1$length = roi1$end - roi1$begin + 1
1561 5bfac5a3 Florent Chuffart
      		if (roi1$length >= min_cur_width) {
1562 5bfac5a3 Florent Chuffart
            returned_rois = dfadd(returned_rois,roi1)
1563 5bfac5a3 Florent Chuffart
          }
1564 5bfac5a3 Florent Chuffart
          roi$begin = check[[2]]
1565 5bfac5a3 Florent Chuffart
      		roi$length = roi$end - roi$begin + 1
1566 5bfac5a3 Florent Chuffart
      		if (roi$length >= min_cur_width) {
1567 5bfac5a3 Florent Chuffart
      			lower_bound = min(roi$begin, roi$end)
1568 5bfac5a3 Florent Chuffart
      			upper_bound = max(roi$begin, roi$end)
1569 5bfac5a3 Florent Chuffart
            check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1570 5bfac5a3 Florent Chuffart
          } else {
1571 5bfac5a3 Florent Chuffart
            check = NULL
1572 5bfac5a3 Florent Chuffart
            roi = NULL
1573 5bfac5a3 Florent Chuffart
          }
1574 5bfac5a3 Florent Chuffart
        }
1575 5bfac5a3 Florent Chuffart
        returned_rois = dfadd(returned_rois,roi)
1576 1d833b97 Florent Chuffart
  	  }
1577 5bfac5a3 Florent Chuffart
    })
1578 5bfac5a3 Florent Chuffart
    rois = do.call(rbind,rois)
1579 5bfac5a3 Florent Chuffart
    rois = rois[order(as.numeric(rois$chr), rois$begin), ]
1580 1d833b97 Florent Chuffart
  	return(rois)
1581 1d833b97 Florent Chuffart
  }
1582 1d833b97 Florent Chuffart
1583 4016229d Florent Chuffart
  translate_curs = function(rois, target_strain, config) {
1584 5bfac5a3 Florent Chuffart
    tr_rois = apply(t(1:nrow(rois)), 2, function(i){
1585 5bfac5a3 Florent Chuffart
      roi = rois[i,]
1586 4016229d Florent Chuffart
      tr_roi = translate_cur(roi, target_strain, config=config)  
1587 5bfac5a3 Florent Chuffart
      tmp_begin = min(tr_roi$begin, tr_roi$end)
1588 5bfac5a3 Florent Chuffart
      tmp_end = max(tr_roi$begin, tr_roi$end)
1589 5bfac5a3 Florent Chuffart
      tr_roi$begin = tmp_begin
1590 5bfac5a3 Florent Chuffart
      tr_roi$end = tmp_end
1591 5bfac5a3 Florent Chuffart
      tr_roi$length =  tr_roi$end - tr_roi$begin + 1
1592 5bfac5a3 Florent Chuffart
      return(tr_roi)
1593 5bfac5a3 Florent Chuffart
    })
1594 5bfac5a3 Florent Chuffart
    tr_rois = do.call(rbind, tr_rois)
1595 5bfac5a3 Florent Chuffart
    return(tr_rois)
1596 5bfac5a3 Florent Chuffart
  }
1597 729c934e Florent Chuffart
1598 5bfac5a3 Florent Chuffart
  combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"))
1599 5bfac5a3 Florent Chuffart
  rois = list()
1600 5bfac5a3 Florent Chuffart
  for (combi in combis) {
1601 5bfac5a3 Florent Chuffart
    strain1 = combi[1]
1602 5bfac5a3 Florent Chuffart
    strain2 = combi[2]
1603 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1604 5bfac5a3 Florent Chuffart
    rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1605 5bfac5a3 Florent Chuffart
    strain1 = combi[2]
1606 5bfac5a3 Florent Chuffart
    strain2 = combi[1]
1607 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1608 5bfac5a3 Florent Chuffart
    rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1609 4016229d Florent Chuffart
    tr_rois_rev = translate_curs(rois_rev, combi[1], config)      
1610 5bfac5a3 Florent Chuffart
    region1 = rois_fwd
1611 5bfac5a3 Florent Chuffart
    region2 = tr_rois_rev
1612 5bfac5a3 Florent Chuffart
    rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev)
1613 5bfac5a3 Florent Chuffart
  }
1614 5bfac5a3 Florent Chuffart
  reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
1615 5bfac5a3 Florent Chuffart
  reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
1616 4016229d Florent Chuffart
  tr_reducted_1_rois = translate_curs(reducted_1_rois, "RM", config)  
1617 5bfac5a3 Florent Chuffart
  reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]])
1618 5bfac5a3 Florent Chuffart
  reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ]
1619 4016229d Florent Chuffart
  reducted_rois = translate_curs(reducted_2_rois, "BY", config)  
1620 5bfac5a3 Florent Chuffart
  reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
1621 5bfac5a3 Florent Chuffart
  squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
1622 5bfac5a3 Florent Chuffart
  return (squeezed_rois)
1623 1d833b97 Florent Chuffart
}
1624 1d833b97 Florent Chuffart
1625 5bfac5a3 Florent Chuffart
intersect_region = function(# Returns the intersection of 2 list on regions.
1626 5bfac5a3 Florent Chuffart
### This function...
1627 5bfac5a3 Florent Chuffart
region1, ##<< Original regions.
1628 5bfac5a3 Florent Chuffart
region2 ##<< Regions to intersect.
1629 5bfac5a3 Florent Chuffart
) {
1630 5bfac5a3 Florent Chuffart
  intersection = apply(t(1:nrow(region1)), 2, function(i) {
1631 5bfac5a3 Florent Chuffart
    roi1 = region1[i, ]
1632 5bfac5a3 Florent Chuffart
    sub_regions2 = region2[region2$chr == roi1$chr, ]
1633 5bfac5a3 Florent Chuffart
    sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end |
1634 5bfac5a3 Florent Chuffart
                                roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end |
1635 5bfac5a3 Florent Chuffart
                                sub_regions2$begin < roi1$begin  & roi1$end < sub_regions2$end
1636 5bfac5a3 Florent Chuffart
                                , ]
1637 5bfac5a3 Florent Chuffart
    if (nrow(sub_regions2) == 0) {
1638 5bfac5a3 Florent Chuffart
      print("removing a region")
1639 5bfac5a3 Florent Chuffart
      return(NULL)
1640 5bfac5a3 Florent Chuffart
    } else if (nrow(sub_regions2) > 1) {
1641 5bfac5a3 Florent Chuffart
      print("more than one region in intersect_region")          
1642 5bfac5a3 Florent Chuffart
      return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])})))
1643 5bfac5a3 Florent Chuffart
    } else {
1644 5bfac5a3 Florent Chuffart
      roi2 = sub_regions2[1,]
1645 5bfac5a3 Florent Chuffart
      if (roi1$begin < roi2$begin) {
1646 5bfac5a3 Florent Chuffart
        print("not the same begin")
1647 5bfac5a3 Florent Chuffart
        roi1$begin = roi2$begin
1648 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1649 5bfac5a3 Florent Chuffart
      }
1650 5bfac5a3 Florent Chuffart
      if (roi1$end > roi2$end) {
1651 5bfac5a3 Florent Chuffart
        print("not the same end")
1652 5bfac5a3 Florent Chuffart
        roi1$end = roi2$end
1653 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1654 5bfac5a3 Florent Chuffart
      }
1655 5bfac5a3 Florent Chuffart
      return(roi1)
1656 5bfac5a3 Florent Chuffart
    }         
1657 5bfac5a3 Florent Chuffart
  })
1658 5bfac5a3 Florent Chuffart
  return(do.call(rbind,intersection))
1659 5bfac5a3 Florent Chuffart
}
1660 1d833b97 Florent Chuffart
1661 55c1cdff Florent Chuffart
build_replicates = structure(function(# Stage replicates data
1662 61914235 Florent Chuffart
### This function loads in memory data corresponding to the given experiments.
1663 61914235 Florent Chuffart
expe, ##<< a list of vector corresponding to vector of replicates.
1664 61914235 Florent Chuffart
roi, ##<< the region that we are interested in.
1665 61914235 Florent Chuffart
only_fetch=FALSE, ##<< filter or not inputs.
1666 61914235 Florent Chuffart
get_genome=FALSE,##<< Load or not corresponding genome.
1667 61914235 Florent Chuffart
all_samples, ##<< Global list of samples.
1668 61914235 Florent Chuffart
config=NULL ##<< GLOBAL config variable.
1669 61914235 Florent Chuffart
) {
1670 61914235 Florent Chuffart
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1671 61914235 Florent Chuffart
  	samples=list()
1672 61914235 Florent Chuffart
  	for (i in samples_ids) {
1673 61914235 Florent Chuffart
  		sample = as.list(all_samples[all_samples$id==i,])
1674 a0b91fee Florent Chuffart
      sample$orig_roi = roi
1675 4016229d Florent Chuffart
      sample$roi = translate_cur(roi, sample$strain, config = config)
1676 61914235 Florent Chuffart
  		if (get_genome) {
1677 61914235 Florent Chuffart
  			# Get Genome
1678 61914235 Florent Chuffart
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1679 5bfac5a3 Florent Chuffart
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1680 61914235 Florent Chuffart
  		}
1681 61914235 Florent Chuffart
  		# Get inputs
1682 61914235 Florent Chuffart
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1683 55c1cdff Florent Chuffart
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE)
1684 5bfac5a3 Florent Chuffart
  		sample$total_reads = sum(sample$inputs[,4])
1685 61914235 Florent Chuffart
  		if (!only_fetch) {
1686 b8a95426 Florent Chuffart
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
1687 61914235 Florent Chuffart
  	  }
1688 61914235 Florent Chuffart
  	  # Get TF outputs for Mnase_Seq samples
1689 5bfac5a3 Florent Chuffart
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {
1690 61914235 Florent Chuffart
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1691 61914235 Florent Chuffart
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1692 61914235 Florent Chuffart
  			if (!only_fetch) {
1693 61914235 Florent Chuffart
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1694 61914235 Florent Chuffart
    		}
1695 61914235 Florent Chuffart
  		}
1696 5bfac5a3 Florent Chuffart
  		samples[[length(samples) + 1]] = sample
1697 61914235 Florent Chuffart
  	}
1698 61914235 Florent Chuffart
  	return(samples)
1699 61914235 Florent Chuffart
  }
1700 61914235 Florent Chuffart
	replicates = list()
1701 61914235 Florent Chuffart
	for(samples_ids in expe) {
1702 61914235 Florent Chuffart
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1703 61914235 Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
1704 61914235 Florent Chuffart
	}
1705 61914235 Florent Chuffart
	return(replicates)
1706 61914235 Florent Chuffart
  }, ex = function() {
1707 61914235 Florent Chuffart
    # library(rjson)
1708 61914235 Florent Chuffart
    # library(nucleominer)
1709 55c1cdff Florent Chuffart
    #
1710 61914235 Florent Chuffart
    # # Read config file
1711 61914235 Florent Chuffart
    # json_conf_file = "nucleo_miner_config.json"
1712 61914235 Florent Chuffart
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1713 61914235 Florent Chuffart
    # # Read sample file
1714 55c1cdff Florent Chuffart
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
1715 61914235 Florent Chuffart
    # # here are the sample ids in a list
1716 61914235 Florent Chuffart
    # expes = list(c(1))
1717 61914235 Florent Chuffart
    # # here is the region that we wnt to see the coverage
1718 55c1cdff Florent Chuffart
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY")
1719 61914235 Florent Chuffart
    # # it displays the corverage
1720 61914235 Florent Chuffart
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1721 55c1cdff Florent Chuffart
    # out = watch_samples(replicates, config$READ_LENGTH,
1722 55c1cdff Florent Chuffart
    #       plot_coverage = TRUE,
1723 55c1cdff Florent Chuffart
    #       plot_squared_reads = FALSE,
1724 55c1cdff Florent Chuffart
    #       plot_ref_genome = FALSE,
1725 55c1cdff Florent Chuffart
    #       plot_arrow_raw_reads = FALSE,
1726 55c1cdff Florent Chuffart
    #       plot_arrow_nuc_reads = FALSE,
1727 55c1cdff Florent Chuffart
    #       plot_gaussian_reads = FALSE,
1728 55c1cdff Florent Chuffart
    #       plot_gaussian_unified_reads = FALSE,
1729 55c1cdff Florent Chuffart
    #       plot_ellipse_nucs = FALSE,
1730 55c1cdff Florent Chuffart
    #       plot_wp_nucs = FALSE,
1731 55c1cdff Florent Chuffart
    #       plot_wp_nuc_model = FALSE,
1732 55c1cdff Florent Chuffart
    #       plot_common_nucs = FALSE,
1733 61914235 Florent Chuffart
    #       height = 50)
1734 61914235 Florent Chuffart
  })
1735 61914235 Florent Chuffart
1736 61914235 Florent Chuffart
1737 61914235 Florent Chuffart
1738 61914235 Florent Chuffart
1739 61914235 Florent Chuffart
1740 61914235 Florent Chuffart
1741 61914235 Florent Chuffart
1742 61914235 Florent Chuffart
1743 61914235 Florent Chuffart
1744 61914235 Florent Chuffart
1745 61914235 Florent Chuffart
1746 61914235 Florent Chuffart
1747 61914235 Florent Chuffart
1748 61914235 Florent Chuffart
1749 61914235 Florent Chuffart
1750 61914235 Florent Chuffart
1751 61914235 Florent Chuffart
1752 61914235 Florent Chuffart
1753 61914235 Florent Chuffart
1754 61914235 Florent Chuffart
1755 61914235 Florent Chuffart
1756 61914235 Florent Chuffart
1757 61914235 Florent Chuffart
1758 61914235 Florent Chuffart
1759 61914235 Florent Chuffart
1760 61914235 Florent Chuffart
1761 61914235 Florent Chuffart
1762 61914235 Florent Chuffart
1763 61914235 Florent Chuffart
1764 61914235 Florent Chuffart
1765 61914235 Florent Chuffart
1766 61914235 Florent Chuffart
1767 61914235 Florent Chuffart
1768 1d833b97 Florent Chuffart
1769 1d833b97 Florent Chuffart
1770 1d833b97 Florent Chuffart
1771 1d833b97 Florent Chuffart
1772 1d833b97 Florent Chuffart
1773 1d833b97 Florent Chuffart
1774 1d833b97 Florent Chuffart
1775 1d833b97 Florent Chuffart
1776 1d833b97 Florent Chuffart
1777 1d833b97 Florent Chuffart
1778 1d833b97 Florent Chuffart
1779 1d833b97 Florent Chuffart
1780 1d833b97 Florent Chuffart
1781 1d833b97 Florent Chuffart
1782 1d833b97 Florent Chuffart
1783 1d833b97 Florent Chuffart
1784 1d833b97 Florent Chuffart
1785 1d833b97 Florent Chuffart
1786 1d833b97 Florent Chuffart
1787 1d833b97 Florent Chuffart
1788 1d833b97 Florent Chuffart
1789 1d833b97 Florent Chuffart
1790 1d833b97 Florent Chuffart
1791 1d833b97 Florent Chuffart
1792 1d833b97 Florent Chuffart
1793 1d833b97 Florent Chuffart
1794 1d833b97 Florent Chuffart
1795 1d833b97 Florent Chuffart
1796 1d833b97 Florent Chuffart
1797 1d833b97 Florent Chuffart
1798 1d833b97 Florent Chuffart
1799 1d833b97 Florent Chuffart
1800 1d833b97 Florent Chuffart
1801 1d833b97 Florent Chuffart
1802 6b5a528c Florent Chuffart
1803 6b5a528c Florent Chuffart
1804 6b5a528c Florent Chuffart
1805 6b5a528c Florent Chuffart
1806 6b5a528c Florent Chuffart
1807 6b5a528c Florent Chuffart
1808 6b5a528c Florent Chuffart
1809 6b5a528c Florent Chuffart
1810 6b5a528c Florent Chuffart
1811 6b5a528c Florent Chuffart
1812 6b5a528c Florent Chuffart
1813 6b5a528c Florent Chuffart
1814 6b5a528c Florent Chuffart
1815 6b5a528c Florent Chuffart
1816 6b5a528c Florent Chuffart
1817 6b5a528c Florent Chuffart
1818 55c1cdff Florent Chuffart
watch_samples = function(# Watching analysis of samples
1819 6b5a528c Florent Chuffart
### This function allows to view analysis for a particuler region of the genome.
1820 6b5a528c Florent Chuffart
replicates, ##<< replicates under the form...
1821 6b5a528c Florent Chuffart
read_length, ##<< length of the reads
1822 6b5a528c Florent Chuffart
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
1823 6b5a528c Florent Chuffart
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
1824 6b5a528c Florent Chuffart
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
1825 6b5a528c Florent Chuffart
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
1826 6b5a528c Florent Chuffart
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
1827 6b5a528c Florent Chuffart
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
1828 6b5a528c Florent Chuffart
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
1829 6b5a528c Florent Chuffart
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
1830 7646593d Florent Chuffart
change_col = TRUE, ##<< Change the color of each nucleosome.
1831 6b5a528c Florent Chuffart
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
1832 9fdbfada Florent Chuffart
plot_fuzzy_nucs = TRUE,  ##<< Plot (or not) cluster of fuzzy
1833 6b5a528c Florent Chuffart
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
1834 9fdbfada Florent Chuffart
plot_common_nucs = FALSE,  ##<< Plot (or not) aligned reads.
1835 9fdbfada Florent Chuffart
plot_common_unrs = FALSE,  ##<< Plot (or not) unaligned nucleosomal refgions (UNRs).
1836 6b5a528c Florent Chuffart
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
1837 729c934e Florent Chuffart
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
1838 6e0010bc Florent Chuffart
plot_sample_id = FALSE, ##<<  Plot (or not) the sample id for each sample.
1839 55c1cdff Florent Chuffart
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
1840 6b5a528c Florent Chuffart
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
1841 1d833b97 Florent Chuffart
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1842 6e0010bc Florent Chuffart
main=NULL, ##<< main title of the produced plot
1843 6e0010bc Florent Chuffart
xlab=NULL, ##<< xlab of the produced plot
1844 6e0010bc Florent Chuffart
ylab="#reads (per million reads)", ##<< ylab of the produced plot
1845 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1846 6b5a528c Florent Chuffart
){
1847 6b5a528c Florent Chuffart
  returned_list = list()
1848 6b5a528c Florent Chuffart
  # Computing global display parameters
1849 6b5a528c Florent Chuffart
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
1850 6b5a528c Florent Chuffart
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
1851 6b5a528c Florent Chuffart
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
1852 6b5a528c Florent Chuffart
  } else {
1853 6b5a528c Florent Chuffart
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
1854 6b5a528c Florent Chuffart
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
1855 6b5a528c Florent Chuffart
  }
1856 6b5a528c Florent Chuffart
	base_glo = 0
1857 6b5a528c Florent Chuffart
	nb_rank_glo = 0
1858 6b5a528c Florent Chuffart
  for (samples in replicates) {
1859 6b5a528c Florent Chuffart
  	nb_rank_glo = nb_rank_glo + length(samples)
1860 6b5a528c Florent Chuffart
  }
1861 6b5a528c Florent Chuffart
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
1862 6b5a528c Florent Chuffart
	y_min_glo = min(ylim_glo)
1863 6b5a528c Florent Chuffart
	y_max_glo = max(ylim_glo)
1864 6b5a528c Florent Chuffart
  delta_y_glo = y_max_glo - y_min_glo
1865 6b5a528c Florent Chuffart
  # Plot main frame
1866 6e0010bc Florent Chuffart
  if (is.null(xlab)) {
1867 6e0010bc Florent Chuffart
    xlab = paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr)
1868 6e0010bc Florent Chuffart
  }
1869 6e0010bc Florent Chuffart
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab=ylab, xlab=xlab, main=main )
1870 6b5a528c Florent Chuffart
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
1871 6b5a528c Florent Chuffart
  # Go
1872 6b5a528c Florent Chuffart
	replicates_wp_nucs = list()
1873 9fdbfada Florent Chuffart
  wp_maps = list()
1874 9fdbfada Florent Chuffart
  fuzzy_maps = list()
1875 6b5a528c Florent Chuffart
  for (replicate_rank in 1:length(replicates)) {
1876 6b5a528c Florent Chuffart
		# Computing replicate parameters
1877 6b5a528c Florent Chuffart
		nb_rank = length(samples)
1878 6b5a528c Florent Chuffart
		base = (replicate_rank-1) * height * nb_rank
1879 6b5a528c Florent Chuffart
		ylim = c(base, base + height * nb_rank)
1880 6b5a528c Florent Chuffart
		y_min = min(ylim)
1881 6b5a528c Florent Chuffart
		y_max = max(ylim)
1882 6b5a528c Florent Chuffart
	  delta_y = y_max - y_min
1883 6b5a528c Florent Chuffart
		samples = replicates[[replicate_rank]]
1884 6b5a528c Florent Chuffart
		for (sample_rank in 1:length(samples)) {
1885 6b5a528c Florent Chuffart
			# computing sample parameters
1886 6b5a528c Florent Chuffart
			sample = samples[[sample_rank]]
1887 6b5a528c Florent Chuffart
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
1888 6e0010bc Florent Chuffart
      if (plot_sample_id) {
1889 6e0010bc Florent Chuffart
  			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
1890 6e0010bc Florent Chuffart
      }
1891 6b5a528c Florent Chuffart
1892 6b5a528c Florent Chuffart
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
1893 6b5a528c Florent Chuffart
			  x_min = sample$roi[["begin"]]
1894 6b5a528c Florent Chuffart
			  x_max = sample$roi[["end"]]
1895 6b5a528c Florent Chuffart
		  } else {
1896 6b5a528c Florent Chuffart
			  x_min = - sample$roi[["begin"]]
1897 6b5a528c Florent Chuffart
			  x_max = - sample$roi[["end"]]
1898 5bfac5a3 Florent Chuffart
		  }
1899 6b5a528c Florent Chuffart
			shift = x_min_glo - x_min
1900 6b5a528c Florent Chuffart
	    # Plot Genome seq
1901 6b5a528c Florent Chuffart
			if (plot_ref_genome) {
1902 6b5a528c Florent Chuffart
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
1903 6b5a528c Florent Chuffart
		  }
1904 6b5a528c Florent Chuffart
			# Plot reads
1905 6b5a528c Florent Chuffart
			reads = sample$inputs
1906 5bfac5a3 Florent Chuffart
			signs = sign_from_strand(reads[,3])
1907 6b5a528c Florent Chuffart
			if (plot_arrow_raw_reads) {
1908 55c1cdff Florent Chuffart
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev,
1909 5bfac5a3 Florent Chuffart
				col=1, length=0.15/nb_rank)
1910 6b5a528c Florent Chuffart
			}
1911 5bfac5a3 Florent Chuffart
	    if (plot_squared_reads) {
1912 6b5a528c Florent Chuffart
        # require(plotrix)
1913 6b5a528c Florent Chuffart
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
1914 6b5a528c Florent Chuffart
			}
1915 5bfac5a3 Florent Chuffart
	    if (plot_coverage) {
1916 a52f85d7 Florent Chuffart
        if (length(reads[,1]) != 0) {
1917 a52f85d7 Florent Chuffart
          step_h = sign(x_min) * signs * reads[,4]
1918 a52f85d7 Florent Chuffart
          step_b = sign(x_min) * reads[,2] + shift
1919 a52f85d7 Florent Chuffart
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
1920 55c1cdff Florent Chuffart
          steps_x = min(step_b, step_e):max(step_b, step_e)
1921 a52f85d7 Florent Chuffart
          steps_y = rep(0, length(steps_x))
1922 55c1cdff Florent Chuffart
          for (i in 1:length(step_h)) {
1923 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
1924 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
1925 a52f85d7 Florent Chuffart
          }
1926 a52f85d7 Florent Chuffart
          tmp_index = which(steps_y != 0)
1927 a52f85d7 Florent Chuffart
          steps_x = steps_x[tmp_index]
1928 a52f85d7 Florent Chuffart
          steps_y = steps_y[tmp_index]
1929 a52f85d7 Florent Chuffart
          tmp_current_level = 0
1930 55c1cdff Florent Chuffart
          for (i in 1:length(steps_y)) {
1931 a52f85d7 Florent Chuffart
            steps_y[i] = tmp_current_level + steps_y[i]
1932 a52f85d7 Florent Chuffart
            tmp_current_level = steps_y[i]
1933 a52f85d7 Florent Chuffart
          }
1934 a52f85d7 Florent Chuffart
          steps_y = c(0, steps_y)
1935 a52f85d7 Florent Chuffart
          steps_y = steps_y * 1000000/sample$total_reads
1936 a52f85d7 Florent Chuffart
        } else {
1937 a52f85d7 Florent Chuffart
          steps_y = c(0, 0, 0)
1938 1e310114 Florent Chuffart
          steps_x = c(x_min, x_max)
1939 6b5a528c Florent Chuffart
        }
1940 0d1475ac Florent Chuffart
        # print(steps_x)
1941 0d1475ac Florent Chuffart
        # print(steps_y)
1942 6b5a528c Florent Chuffart
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
1943 6b5a528c Florent Chuffart
        abline(y_lev,0)
1944 6b5a528c Florent Chuffart
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
1945 6b5a528c Florent Chuffart
			}
1946 6b5a528c Florent Chuffart
			# Plot nucs
1947 6b5a528c Florent Chuffart
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
1948 6b5a528c Florent Chuffart
				nucs = sample$outputs
1949 6b5a528c Florent Chuffart
				if (length(nucs$center) > 0) {
1950 6b5a528c Florent Chuffart
					col = 1
1951 6b5a528c Florent Chuffart
		      for (i in 1:length(nucs$center)) {
1952 6e0010bc Florent Chuffart
            foo<<-nucs
1953 7646593d Florent Chuffart
            if (change_col) {
1954 55c1cdff Florent Chuffart
  						col = col + 1
1955 6e0010bc Florent Chuffart
              } else {
1956 6e0010bc Florent Chuffart
                col = "blue"
1957 6e0010bc Florent Chuffart
              }
1958 6b5a528c Florent Chuffart
		        nuc = nucs[i,]
1959 6b5a528c Florent Chuffart
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
1960 6b5a528c Florent Chuffart
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
1961 55c1cdff Florent Chuffart
						total_involved_reads = sum(involved_reads[,4])
1962 6b5a528c Florent Chuffart
						if (plot_arrow_nuc_reads ) {
1963 55c1cdff Florent Chuffart
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev,
1964 5bfac5a3 Florent Chuffart
							col=col, length=0.15/nb_rank)
1965 6b5a528c Florent Chuffart
						}
1966 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
1967 6b5a528c Florent Chuffart
  						flatted_reads = flat_reads(involved_reads, nuc$width)
1968 6b5a528c Florent Chuffart
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1969 6b5a528c Florent Chuffart
		  			}
1970 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads ) {
1971 6b5a528c Florent Chuffart
							flatted_reads = flat_reads(involved_reads, nuc$width)
1972 6b5a528c Florent Chuffart
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1973 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
1974 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
1975 6b5a528c Florent Chuffart
	          }
1976 6b5a528c Florent Chuffart
	          if (plot_gaussian_unified_reads ) {
1977 6e0010bc Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=1)
1978 6b5a528c Florent Chuffart
	          }
1979 6b5a528c Florent Chuffart
	          if (plot_ellipse_nucs) {
1980 6b5a528c Florent Chuffart
				      # require(plotrix)
1981 55c1cdff Florent Chuffart
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col)
1982 6b5a528c Florent Chuffart
						}
1983 6b5a528c Florent Chuffart
		      }
1984 6b5a528c Florent Chuffart
		    } else {
1985 6b5a528c Florent Chuffart
		      print("WARNING! No nucs to print.")
1986 6b5a528c Florent Chuffart
		    }
1987 6b5a528c Florent Chuffart
			}
1988 6b5a528c Florent Chuffart
	  }
1989 55c1cdff Florent Chuffart
1990 6b5a528c Florent Chuffart
	  # Plot wp nucs
1991 9fdbfada Florent Chuffart
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) {
1992 6b5a528c Florent Chuffart
			if (samples[[1]]$marker == "Mnase_Seq") {
1993 55c1cdff Florent Chuffart
				if (is.null(aggregated_intra_strain_nucs)) {
1994 5bfac5a3 Florent Chuffart
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
1995 6b5a528c Florent Chuffart
				} else {
1996 6b5a528c Florent Chuffart
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
1997 6b5a528c Florent Chuffart
				}
1998 6b5a528c Florent Chuffart
		  } else {
1999 6b5a528c Florent Chuffart
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
2000 6b5a528c Florent Chuffart
		  }
2001 729c934e Florent Chuffart
      if (plot_chain) {
2002 55c1cdff Florent Chuffart
        tf_nucs = lapply(wp_nucs, function(nuc) {
2003 729c934e Florent Chuffart
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
2004 729c934e Florent Chuffart
            tmp_nuc = tmp_nuc[[1]]
2005 729c934e Florent Chuffart
            tmp_nuc$inputs = NULL
2006 729c934e Florent Chuffart
            tmp_nuc$original_reads = NULL
2007 729c934e Florent Chuffart
            tmp_nuc$wp = nuc$wp
2008 729c934e Florent Chuffart
            # print(tmp_nuc)
2009 729c934e Florent Chuffart
            return(tmp_nuc)
2010 55c1cdff Florent Chuffart
          })
2011 dc58ee6c Florent Chuffart
          return(do.call(rbind, bar))
2012 dc58ee6c Florent Chuffart
        })
2013 729c934e Florent Chuffart
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
2014 55c1cdff Florent Chuffart
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2
2015 dc58ee6c Florent Chuffart
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
2016 dc58ee6c Florent Chuffart
        tmp_y_prev = tmp_y[-length(tmp_y)]
2017 dc58ee6c Florent Chuffart
        tmp_y_next = tmp_y[-1]
2018 dc58ee6c Florent Chuffart
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
2019 dc58ee6c Florent Chuffart
        tmp_track = unlist(tf_nucs$track)
2020 dc58ee6c Florent Chuffart
        tmp_track_prev = tmp_track[-length(tmp_track)]
2021 dc58ee6c Florent Chuffart
        tmp_track_next = tmp_track[-1]
2022 6e0010bc Florent Chuffart
        # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
2023 6e0010bc Florent Chuffart
        if (is.null(config$TRACK_LOD_OFFSET)) {
2024 6e0010bc Florent Chuffart
          config$TRACK_LOD_OFFSET = 0
2025 6e0010bc Florent Chuffart
        }
2026 6e0010bc Florent Chuffart
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LOD_OFFSET * 25
2027 dc58ee6c Florent Chuffart
        tmp_x_prev = tmp_x[-length(tmp_x)]
2028 dc58ee6c Florent Chuffart
        tmp_x_next = tmp_x[-1]
2029 dc58ee6c Florent Chuffart
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
2030 dc58ee6c Florent Chuffart
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
2031 dc58ee6c Florent Chuffart
        tmp_lod_inter =signif(unlist(tf_nucs$lod_score)[-1], 2)
2032 55c1cdff Florent Chuffart
        new_tmp_x = c()
2033 55c1cdff Florent Chuffart
        new_tmp_y = c()
2034 dc58ee6c Florent Chuffart
        index_odd = 1:length(tmp_x) * 2 - 1
2035 55c1cdff Florent Chuffart
        index_even = (1:(length(tmp_x) - 1)) * 2
2036 55c1cdff Florent Chuffart
        new_tmp_x[index_odd] = tmp_x
2037 dc58ee6c Florent Chuffart
        new_tmp_y[index_odd] = tmp_y
2038 dc58ee6c Florent Chuffart
        new_tmp_x[index_even] = tmp_x_inter
2039 dc58ee6c Florent Chuffart
        new_tmp_y[index_even] = tmp_y_inter
2040 dc58ee6c Florent Chuffart
        lines(new_tmp_x , new_tmp_y, lw=2)
2041 dc58ee6c Florent Chuffart
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
2042 dc58ee6c Florent Chuffart
        points(tmp_x, tmp_y, cex=4, lw=2)
2043 dc58ee6c Florent Chuffart
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
2044 6e0010bc Florent Chuffart
        if (is.null(config$LEGEND_LOD_POS)) {
2045 6e0010bc Florent Chuffart
          pos = 2
2046 6e0010bc Florent Chuffart
        } else {
2047 6e0010bc Florent Chuffart
          pos = config$LEGEND_LOD_POS
2048 6e0010bc Florent Chuffart
        }
2049 6e0010bc Florent Chuffart
        text(tmp_x_inter, tmp_y_inter, tmp_lod_inter, cex=1.5, pos=pos) 
2050 729c934e Florent Chuffart
      }
2051 55c1cdff Florent Chuffart
2052 9fdbfada Florent Chuffart
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
2053 729c934e Florent Chuffart
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
2054 9fdbfada Florent Chuffart
        strain = samples[[1]]$strain
2055 9fdbfada Florent Chuffart
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo")
2056 9fdbfada Florent Chuffart
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
2057 9fdbfada Florent Chuffart
2058 9fdbfada Florent Chuffart
        if (plot_fuzzy_nucs) {
2059 9fdbfada Florent Chuffart
          fuzzy_map = fuzzy_maps[[strain]]
2060 4016229d Florent Chuffart
          if (!is.null(fuzzy_map)) {
2061 4016229d Florent Chuffart
            if (nrow(fuzzy_map) > 0) {
2062 4016229d Florent Chuffart
              rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1)                    
2063 4016229d Florent Chuffart
            }
2064 d492bca4 Florent Chuffart
          }
2065 9fdbfada Florent Chuffart
        } 
2066 9fdbfada Florent Chuffart
2067 9fdbfada Florent Chuffart
        if (plot_wp_nucs) {
2068 9fdbfada Florent Chuffart
    			for (wp_nuc in wp_nucs) {
2069 9fdbfada Florent Chuffart
    				if (wp_nuc$wp){
2070 9fdbfada Florent Chuffart
    					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2071 9fdbfada Florent Chuffart
    					if (plot_wp_nuc_model) {
2072 9fdbfada Florent Chuffart
      					all_original_reads = c()
2073 9fdbfada Florent Chuffart
      					for(initial_nuc in wp_nuc$nucs) {
2074 9fdbfada Florent Chuffart
      						all_original_reads = c(all_original_reads, initial_nuc$original_reads)
2075 9fdbfada Florent Chuffart
      					}
2076 9fdbfada Florent Chuffart
      					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2077 9fdbfada Florent Chuffart
    					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
2078 9fdbfada Florent Chuffart
    				  }
2079 729c934e Florent Chuffart
  				  }
2080 729c934e Florent Chuffart
  				}
2081 729c934e Florent Chuffart
  			}
2082 729c934e Florent Chuffart
      }
2083 6b5a528c Florent Chuffart
		}
2084 5bfac5a3 Florent Chuffart
	}
2085 55c1cdff Florent Chuffart
2086 5bfac5a3 Florent Chuffart
	if (plot_common_nucs) {
2087 9fdbfada Florent Chuffart
    if (is.null(aligned_inter_strain_nucs)) {
2088 9fdbfada Florent Chuffart
      aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2089 b69443b5 Florent Chuffart
      if (!is.null(aligned_inter_strain_nucs)) {
2090 4016229d Florent Chuffart
        aligned_inter_strain_nucs$cur_index = "foo" 
2091 b69443b5 Florent Chuffart
      }
2092 9fdbfada Florent Chuffart
    }
2093 55c1cdff Florent Chuffart
2094 9fdbfada Florent Chuffart
    #Plot common wp nucs
2095 9fdbfada Florent Chuffart
    mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2096 9fdbfada Florent Chuffart
    for (replicate_rank in 1:length(replicates)) {
2097 9fdbfada Florent Chuffart
      nb_rank[[replicate_rank]] = length(samples)
2098 9fdbfada Florent Chuffart
      base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2099 9fdbfada Florent Chuffart
      ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2100 9fdbfada Florent Chuffart
      y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2101 9fdbfada Florent Chuffart
      y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2102 9fdbfada Florent Chuffart
      delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2103 9fdbfada Florent Chuffart
      mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2104 9fdbfada Florent Chuffart
2105 9fdbfada Florent Chuffart
      samples = replicates[[replicate_rank]]
2106 9fdbfada Florent Chuffart
      for (sample_rank in 1:length(samples)) {
2107 9fdbfada Florent Chuffart
        sample = samples[[sample_rank]]
2108 9fdbfada Florent Chuffart
        y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2109 9fdbfada Florent Chuffart
        if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2110 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = sample$roi[["begin"]]
2111 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = sample$roi[["end"]]
2112 9fdbfada Florent Chuffart
        } else {
2113 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = - sample$roi[["begin"]]
2114 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = - sample$roi[["end"]]
2115 6b5a528c Florent Chuffart
        }
2116 9fdbfada Florent Chuffart
        shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2117 55c1cdff Florent Chuffart
      }
2118 9fdbfada Florent Chuffart
    }
2119 b69443b5 Florent Chuffart
    print(aligned_inter_strain_nucs)
2120 b69443b5 Florent Chuffart
    if (!is.null(aligned_inter_strain_nucs)) {
2121 b69443b5 Florent Chuffart
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2122 b69443b5 Florent Chuffart
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2123 b69443b5 Florent Chuffart
        tmp_xs = tmp_ys = c()
2124 b69443b5 Florent Chuffart
        for (replicate_rank in 1:length(replicates)) {
2125 b69443b5 Florent Chuffart
          samples = replicates[[replicate_rank]]
2126 b69443b5 Florent Chuffart
          strain = samples[[1]]$strain
2127 b69443b5 Florent Chuffart
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2128 b69443b5 Florent Chuffart
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2129 b69443b5 Florent Chuffart
        }
2130 b69443b5 Florent Chuffart
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]]), cex=dev.size()[1]*200/(x_max[[1]]-x_min[[1]]), pch=19)
2131 55c1cdff Florent Chuffart
      }
2132 9fdbfada Florent Chuffart
    }
2133 9fdbfada Florent Chuffart
    
2134 9fdbfada Florent Chuffart
    if (plot_common_unrs) {
2135 9fdbfada Florent Chuffart
      combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain)
2136 9fdbfada Florent Chuffart
      roi = as.list(samples[[1]]$roi)
2137 4016229d Florent Chuffart
      cur_index = "foo"
2138 9fdbfada Florent Chuffart
      common_nuc_results = list()
2139 9fdbfada Florent Chuffart
      common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs
2140 4016229d Florent Chuffart
      unrs = get_unrs(combi, roi, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) 
2141 6e0010bc Florent Chuffart
      rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], border=4, lw=3, col=adjustcolor(4, alpha.f = 0.05))        
2142 9fdbfada Florent Chuffart
    }
2143 9fdbfada Florent Chuffart
2144 6b5a528c Florent Chuffart
	}
2145 6b5a528c Florent Chuffart
  return(returned_list)
2146 6b5a528c Florent Chuffart
}
2147 6b5a528c Florent Chuffart
2148 6b5a528c Florent Chuffart
2149 6b5a528c Florent Chuffart
2150 9fdbfada Florent Chuffart
get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain.
2151 9fdbfada Florent Chuffart
### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions.
2152 0a13b5e3 Florent Chuffart
wp_map, ##<< Well positionned nucleosomes map.
2153 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2154 9fdbfada Florent Chuffart
strain, ##<< The strain we want to extracvt the fuzzy map.
2155 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable.
2156 9fdbfada Florent Chuffart
) {
2157 9fdbfada Florent Chuffart
  fuzzy_map = wp_map[wp_map$wp==0, ]
2158 9fdbfada Florent Chuffart
  if (nrow(fuzzy_map) > 0) {
2159 9fdbfada Florent Chuffart
    fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,])
2160 9fdbfada Florent Chuffart
    if (!is.null(fuzzy_map)) {
2161 9fdbfada Florent Chuffart
      fuzzy_map = union_regions(fuzzy_map)
2162 9fdbfada Florent Chuffart
      fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config)
2163 9fdbfada Florent Chuffart
    }
2164 9fdbfada Florent Chuffart
  }
2165 9fdbfada Florent Chuffart
  return(fuzzy_map)
2166 9fdbfada Florent Chuffart
}
2167 9fdbfada Florent Chuffart
2168 9fdbfada Florent Chuffart
2169 9fdbfada Florent Chuffart
2170 9fdbfada Florent Chuffart
2171 9fdbfada Florent Chuffart
2172 9fdbfada Florent Chuffart
get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs).
2173 9fdbfada Florent Chuffart
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline.
2174 9fdbfada Florent Chuffart
combi, ##<< The strain combination to consider.
2175 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2176 4016229d Florent Chuffart
cur_index, ##<< The region of interest index.
2177 9fdbfada Florent Chuffart
wp_maps, ##<< Well positionned nucleosomes maps.
2178 9fdbfada Florent Chuffart
fuzzy_maps, ##<< Fuzzy nucleosomes maps.
2179 9fdbfada Florent Chuffart
common_nuc_results, ##<< Common wp nuc maps
2180 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable
2181 9fdbfada Florent Chuffart
) {
2182 4016229d Florent Chuffart
  # print(cur_index)
2183 9fdbfada Florent Chuffart
2184 9fdbfada Florent Chuffart
  tmp_combi_key = paste(combi[1], combi[2], sep="_")
2185 9fdbfada Florent Chuffart
  tmp_common_nucs = common_nuc_results[[tmp_combi_key]]
2186 4016229d Florent Chuffart
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$cur_index == cur_index, ]
2187 9fdbfada Florent Chuffart
2188 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from", combi[1]))
2189 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[1]]]
2190 4016229d Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2191 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2192 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2193 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2194 9fdbfada Florent Chuffart
  # Let's go!
2195 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2196 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2197 9fdbfada Florent Chuffart
      return(NULL)
2198 9fdbfada Florent Chuffart
    } else {
2199 9fdbfada Florent Chuffart
      return (index_nuc)
2200 9fdbfada Florent Chuffart
    }
2201 9fdbfada Florent Chuffart
  }))
2202 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2203 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2204 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2205 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2206 9fdbfada Florent Chuffart
  }
2207 9fdbfada Florent Chuffart
  tmp_unr_nucs_1 = tmp_unr
2208 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)}
2209 9fdbfada Florent Chuffart
  agg_unr_1 = tmp_unr_nucs_1
2210 9fdbfada Florent Chuffart
2211 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from ", combi[2]))
2212 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[2]]]
2213 4016229d Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2214 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2215 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2216 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2217 9fdbfada Florent Chuffart
  # Let's go!
2218 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2219 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2220 9fdbfada Florent Chuffart
      return(NULL)
2221 9fdbfada Florent Chuffart
    } else {
2222 9fdbfada Florent Chuffart
      return (index_nuc)
2223 9fdbfada Florent Chuffart
    }
2224 9fdbfada Florent Chuffart
  }))
2225 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2226 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2227 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2228 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2229 9fdbfada Florent Chuffart
  }
2230 9fdbfada Florent Chuffart
  tmp_unr_nucs_2 = tmp_unr
2231 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)}
2232 9fdbfada Florent Chuffart
  agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config)
2233 4016229d Florent Chuffart
  tr_agg_unr_2 = translate_regions(agg_unr_2, combi, cur_index, roi=roi, config=config)
2234 9fdbfada Florent Chuffart
  tr_agg_unr_2 = union_regions(tr_agg_unr_2)
2235 9fdbfada Florent Chuffart
2236 f0703205 Florent Chuffart
  # print("Dealing with unr from both...")
2237 9fdbfada Florent Chuffart
  all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2))
2238 9fdbfada Florent Chuffart
2239 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[1]))
2240 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2241 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2242 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2243 9fdbfada Florent Chuffart
  # Let's go!
2244 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2245 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2246 9fdbfada Florent Chuffart
      return (index_nuc)
2247 9fdbfada Florent Chuffart
    } else {
2248 9fdbfada Florent Chuffart
      return(NULL)
2249 9fdbfada Florent Chuffart
    }
2250 9fdbfada Florent Chuffart
  }))
2251 9fdbfada Florent Chuffart
  wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2252 9fdbfada Florent Chuffart
  
2253 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[2]))
2254 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2255 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2256 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2257 9fdbfada Florent Chuffart
  # Let's go!
2258 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2259 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2260 9fdbfada Florent Chuffart
      return (index_nuc)
2261 9fdbfada Florent Chuffart
    } else {
2262 9fdbfada Florent Chuffart
      return(NULL)
2263 9fdbfada Florent Chuffart
    }
2264 9fdbfada Florent Chuffart
  }))
2265 9fdbfada Florent Chuffart
  wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2266 9fdbfada Florent Chuffart
  wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config)
2267 9fdbfada Florent Chuffart
  if (nrow(wp_nucs_2) == 0) {
2268 9fdbfada Florent Chuffart
    tr_wp_nucs_2 = wp_nucs_2
2269 9fdbfada Florent Chuffart
  } else {
2270 4016229d Florent Chuffart
    tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, cur_index, roi=roi, config=config)      
2271 9fdbfada Florent Chuffart
  }
2272 9fdbfada Florent Chuffart
2273 f0703205 Florent Chuffart
  # print("Dealing with wp from both...")
2274 9fdbfada Florent Chuffart
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
2275 9fdbfada Florent Chuffart
2276 f0703205 Florent Chuffart
  # print("Dealing with unr and wp...")
2277 9fdbfada Florent Chuffart
  non_inter_unr = substract_region(all_unr, all_wp)
2278 9fdbfada Florent Chuffart
  non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config)
2279 9fdbfada Florent Chuffart
  if (is.null(non_inter_unr)) { return(NULL) }
2280 9fdbfada Florent Chuffart
  non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound
2281 9fdbfada Florent Chuffart
  # non_inter_fuzzy = non_inter_fuzzy[non_inter_fuzzy$len >= min_fuzz_width,]
2282 9fdbfada Florent Chuffart
2283 9fdbfada Florent Chuffart
  non_inter_unr$index_nuc = 1:length(non_inter_unr[,1])
2284 9fdbfada Florent Chuffart
  return (non_inter_unr)
2285 9fdbfada Florent Chuffart
}
2286 9fdbfada Florent Chuffart
2287 9fdbfada Florent Chuffart