Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ f0703205

Historique | Voir | Annoter | Télécharger (86,37 ko)

1 55c1cdff Florent Chuffart
# get_content = function(# Get content from cached
2 6b5a528c Florent Chuffart
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
3 6b5a528c Florent Chuffart
# obj, ##<< The object that we want its content.
4 6b5a528c Florent Chuffart
# ... ##<< Parameters that will be passed to my_read
5 6b5a528c Florent Chuffart
# ) {
6 6b5a528c Florent Chuffart
#   UseMethod("get_content", obj)
7 55c1cdff Florent Chuffart
#   ### Returns the cached content of the current object.
8 6b5a528c Florent Chuffart
# }
9 55c1cdff Florent Chuffart
#
10 55c1cdff Florent Chuffart
# get_content.default = structure( function(# Get content from cached
11 6b5a528c Florent Chuffart
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
12 6b5a528c Florent Chuffart
# obj, ##<< The object that we want its content.
13 6b5a528c Florent Chuffart
# ... ##<< Parameters that will be passed to my_read
14 6b5a528c Florent Chuffart
# ) {
15 6b5a528c Florent Chuffart
#   if (inherits(try(NM_CACHE,TRUE), "try-error") || is.null(NM_CACHE)) {
16 6b5a528c Florent Chuffart
#     NM_CACHE <<- list()
17 6b5a528c Florent Chuffart
#   }
18 6b5a528c Florent Chuffart
#   if (is.null(NM_CACHE[[obj$filename]])) {
19 6b5a528c Florent Chuffart
#     print(paste("Loading file ",obj$filename, sep=""))
20 6b5a528c Florent Chuffart
#     tmp_content = my_read(obj, ...)
21 6b5a528c Florent Chuffart
#     print("affect it...")
22 6b5a528c Florent Chuffart
#     NM_CACHE[[obj$filename]] <<- tmp_content
23 6b5a528c Florent Chuffart
#     print("done.")
24 55c1cdff Florent Chuffart
#   }
25 6b5a528c Florent Chuffart
#   return(NM_CACHE[[obj$filename]])
26 55c1cdff Florent Chuffart
# ### Returns the cached content of the current object.
27 6b5a528c Florent Chuffart
# }, ex=function(){
28 6b5a528c Florent Chuffart
#   # Create a dataframe
29 6b5a528c Florent Chuffart
#   df = NULL
30 6b5a528c Florent Chuffart
#   df = dfadd(df, list(key1 = "value1", key2 = "value2"))
31 6b5a528c Florent Chuffart
#   df = dfadd(df, list(key1 = "value1'", key2 = "value3'"))
32 6b5a528c Florent Chuffart
#   # Dump it into tmp file
33 6b5a528c Florent Chuffart
#   write.table(df, file="/tmp/tmp.dump.table.tmp")
34 6b5a528c Florent Chuffart
#   # Load it into cache using feature.
35 6b5a528c Florent Chuffart
#   cached_content_object  = list(filename="/tmp/tmp.dump.table.tmp")
36 55c1cdff Florent Chuffart
#   class(cached_content_object) = "table"
37 6b5a528c Florent Chuffart
#   # First time it will be load into cache
38 6b5a528c Florent Chuffart
#   print(get_content(cached_content_object))
39 6b5a528c Florent Chuffart
#   # Second time not
40 6b5a528c Florent Chuffart
#   print(get_content(cached_content_object))
41 55c1cdff Florent Chuffart
#
42 6b5a528c Florent Chuffart
# })
43 55c1cdff Florent Chuffart
#
44 6b5a528c Florent Chuffart
# my_read = function(
45 55c1cdff Florent Chuffart
# ### Abstract my_read function.
46 6b5a528c Florent Chuffart
#   obj, ...) {
47 6b5a528c Florent Chuffart
#   UseMethod("my_read", obj)
48 6b5a528c Florent Chuffart
# }
49 55c1cdff Florent Chuffart
#
50 6b5a528c Florent Chuffart
# my_read.default = function(
51 55c1cdff Florent Chuffart
# ### Default my_read function.
52 6b5a528c Florent Chuffart
#   obj, ...){
53 55c1cdff Florent Chuffart
#   stop(paste("ERROR, my_read is not defined for any Objects (file ", obj$filename," )", sep=""))
54 6b5a528c Florent Chuffart
# }
55 55c1cdff Florent Chuffart
#
56 6b5a528c Florent Chuffart
# my_read.fasta = function(
57 55c1cdff Florent Chuffart
# ### my_read function for fasta files.
58 6b5a528c Florent Chuffart
#   obj, ...){
59 6b5a528c Florent Chuffart
#   # require(seqinr)
60 6b5a528c Florent Chuffart
#   return(read.fasta(obj$filename, ...))
61 6b5a528c Florent Chuffart
# }
62 55c1cdff Florent Chuffart
#
63 6b5a528c Florent Chuffart
# my_read.table = function(
64 55c1cdff Florent Chuffart
# ### my_read function for table files.
65 6b5a528c Florent Chuffart
#   obj, ...){
66 6b5a528c Florent Chuffart
#   if (rev(unlist(strsplit(obj$filename, ".", fixed=TRUE)))[1] == "gz") {
67 55c1cdff Florent Chuffart
#     return(read.table(file=gzfile(obj$filename), ...))
68 55c1cdff Florent Chuffart
#   } else {
69 6b5a528c Florent Chuffart
#     return(read.table(file=obj$filename, ...))
70 6b5a528c Florent Chuffart
#   }
71 55c1cdff Florent Chuffart
# }
72 55c1cdff Florent Chuffart
#
73 6b5a528c Florent Chuffart
# my_read.cvs = function(
74 55c1cdff Florent Chuffart
# ### my_read function for cvs files.
75 6b5a528c Florent Chuffart
#   obj, ...){
76 6b5a528c Florent Chuffart
#   return(read.csv(file=obj$filename, ...))
77 55c1cdff Florent Chuffart
# }
78 6b5a528c Florent Chuffart
79 6b5a528c Florent Chuffart
80 6b5a528c Florent Chuffart
81 6b5a528c Florent Chuffart
82 6b5a528c Florent Chuffart
83 6b5a528c Florent Chuffart
84 6b5a528c Florent Chuffart
85 6b5a528c Florent Chuffart
86 6b5a528c Florent Chuffart
87 6b5a528c Florent Chuffart
FDR = structure(function#  False Discovery Rate
88 6b5a528c Florent Chuffart
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
89 55c1cdff Florent Chuffart
##author<< Gael Yvert,
90 6b5a528c Florent Chuffart
(
91 6b5a528c Florent Chuffart
x, ##<< A vector x of independent p-values.
92 6b5a528c Florent Chuffart
FDR ##<< The specified FDR.
93 6b5a528c Florent Chuffart
) {
94 6b5a528c Florent Chuffart
  x <- sort(na.omit(x))
95 6b5a528c Florent Chuffart
  N = length(x)
96 6b5a528c Florent Chuffart
  i = 1;
97 6b5a528c Florent Chuffart
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
98 6b5a528c Florent Chuffart
  if (i == 1)
99 6b5a528c Florent Chuffart
    return (NA)
100 6b5a528c Florent Chuffart
  else
101 6b5a528c Florent Chuffart
    return( x[i-1] )
102 6b5a528c Florent Chuffart
### Return the the corresponding cutoff.
103 6b5a528c Florent Chuffart
}, ex=function(){
104 6b5a528c Florent Chuffart
  print("example")
105 6b5a528c Florent Chuffart
})
106 6b5a528c Florent Chuffart
107 6b5a528c Florent Chuffart
108 6b5a528c Florent Chuffart
109 6b5a528c Florent Chuffart
lod_score_vecs = structure(function # Likelihood ratio
110 6b5a528c Florent Chuffart
### Compute the likelihood log of two set of value from two models Vs. a unique model.
111 6b5a528c Florent Chuffart
(
112 6b5a528c Florent Chuffart
x ,##<< First vector.
113 6b5a528c Florent Chuffart
y ##<< Second vector.
114 6b5a528c Florent Chuffart
) {
115 6b5a528c Florent Chuffart
	if (length(x) <=1 | length(y) <= 1) {
116 6b5a528c Florent Chuffart
		return(NA)
117 6b5a528c Florent Chuffart
	}
118 6b5a528c Florent Chuffart
  meanX = mean(x)
119 6b5a528c Florent Chuffart
  sdX = sd(x)
120 6b5a528c Florent Chuffart
  meanY = mean(y)
121 6b5a528c Florent Chuffart
  sdY = sd(y)
122 6b5a528c Florent Chuffart
  meanXY = mean(c(x,y))
123 6b5a528c Florent Chuffart
  sdXY = sd(c(x,y))
124 6b5a528c Florent Chuffart
  llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
125 6b5a528c Florent Chuffart
  llY = sum(log(dnorm(y,mean=meanY,sd=sdY)))
126 6b5a528c Florent Chuffart
  llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY)))
127 729c934e Florent Chuffart
  ratio = llX + llY - llXY
128 6b5a528c Florent Chuffart
  return(ratio)
129 6b5a528c Florent Chuffart
### Returns the likelihood ratio.
130 6b5a528c Florent Chuffart
}, ex=function(){
131 6b5a528c Florent Chuffart
  # LOD score for 2 set of values
132 6b5a528c Florent Chuffart
  mean1=5; sd1=2; card2 = 250
133 6b5a528c Florent Chuffart
  mean2=6; sd2=3; card1 = 200
134 6b5a528c Florent Chuffart
  x1 = rnorm(card1, mean1, sd1)
135 55c1cdff Florent Chuffart
  x2 = rnorm(card2, mean2, sd2)
136 6b5a528c Florent Chuffart
  min = floor(min(c(x1,x2)))
137 6b5a528c Florent Chuffart
  max = ceiling(max(c(x1,x2)))
138 6b5a528c Florent Chuffart
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
139 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
140 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
141 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
142 6b5a528c Florent Chuffart
  lod_score_vecs(x1,x2)
143 6b5a528c Florent Chuffart
 })
144 6b5a528c Florent Chuffart
145 6b5a528c Florent Chuffart
dfadd = structure(function# Adding list to a dataframe.
146 6b5a528c Florent Chuffart
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
147 6b5a528c Florent Chuffart
	(df, ##<<  A dataframe
148 6b5a528c Florent Chuffart
		l ##<<  A list
149 6b5a528c Florent Chuffart
	) {
150 6b5a528c Florent Chuffart
  if (is.null(df)) {
151 6b5a528c Florent Chuffart
    df = data.frame(l,stringsAsFactors=FALSE)
152 6b5a528c Florent Chuffart
  } else {
153 6b5a528c Florent Chuffart
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
154 6b5a528c Florent Chuffart
  }
155 6b5a528c Florent Chuffart
  return(df)
156 6b5a528c Florent Chuffart
### Return the dataframe \emph{df}.
157 6b5a528c Florent Chuffart
}, ex=function(){
158 6b5a528c Florent Chuffart
		## Here dataframe is NULL
159 6b5a528c Florent Chuffart
		print(df)
160 6b5a528c Florent Chuffart
		df = NULL
161 5bfac5a3 Florent Chuffart
162 6b5a528c Florent Chuffart
		# Initialize df
163 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
164 6b5a528c Florent Chuffart
		print(df)
165 5bfac5a3 Florent Chuffart
166 6b5a528c Florent Chuffart
		# Adding elements to df
167 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
168 6b5a528c Florent Chuffart
		print(df)
169 6b5a528c Florent Chuffart
})
170 6b5a528c Florent Chuffart
171 6b5a528c Florent Chuffart
172 6b5a528c Florent Chuffart
sign_from_strand = function(
173 55c1cdff Florent Chuffart
### Get the sign of strand
174 6b5a528c Florent Chuffart
strands) {
175 6b5a528c Florent Chuffart
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
176 6b5a528c Florent Chuffart
### If strand in forward then returns 1 else returns -1
177 6b5a528c Florent Chuffart
}
178 6b5a528c Florent Chuffart
179 6b5a528c Florent Chuffart
flat_reads = function(
180 6b5a528c Florent Chuffart
### Extract reads coordinates from TempleteFilter input sequence
181 55c1cdff Florent Chuffart
reads, ##<< TemplateFilter input reads
182 6b5a528c Florent Chuffart
nuc_width ##<< Width used to shift F and R reads.
183 6b5a528c Florent Chuffart
) {
184 6b5a528c Florent Chuffart
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
185 6b5a528c Florent Chuffart
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
186 6b5a528c Florent Chuffart
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
187 6b5a528c Florent Chuffart
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
188 6b5a528c Florent Chuffart
### Returns a list of F reads, R reads and joint/shifted F and R reads.
189 6b5a528c Florent Chuffart
}
190 6b5a528c Florent Chuffart
191 6b5a528c Florent Chuffart
192 6b5a528c Florent Chuffart
193 6b5a528c Florent Chuffart
filter_tf_outputs = function(# Filter TemplateFilter outputs
194 6b5a528c Florent Chuffart
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlap threshold.
195 6b5a528c Florent Chuffart
tf_outputs, ##<< TemplateFilter outputs.
196 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
197 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
198 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
199 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
200 6b5a528c Florent Chuffart
ol_bp = 59, ##<< Overlap Threshold.
201 6b5a528c Florent Chuffart
corr_thres = 0.5 ##<< Correlation threshold.
202 6b5a528c Florent Chuffart
) {
203 6b5a528c Florent Chuffart
  if (x_min < 0) {
204 6b5a528c Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center > (-x_max - nuc_width/2) & tf_outputs$center <  (-x_min + nuc_width/2),]
205 6b5a528c Florent Chuffart
	} else {
206 6b5a528c Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center > (x_min - nuc_width/2) & tf_outputs$center < (x_max + nuc_width/2),]
207 6b5a528c Florent Chuffart
  }
208 6b5a528c Florent Chuffart
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
209 6b5a528c Florent Chuffart
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
210 55c1cdff Florent Chuffart
  tf_outputs = tf_outputs[tf_outputs$correlation >= corr_thres,]
211 55c1cdff Florent Chuffart
  tf_outputs = tf_outputs[order(tf_outputs$correlation,decreasing=TRUE),]
212 6b5a528c Florent Chuffart
  i = 1
213 55c1cdff Florent Chuffart
  while (i <= length(tf_outputs[,1])) {
214 55c1cdff Florent Chuffart
    lb = tf_outputs[i,]$low
215 6b5a528c Florent Chuffart
    ub = tf_outputs[i,]$up
216 6b5a528c Florent Chuffart
    tf_outputs = tf_outputs[!(tf_outputs$low <= (ub-ol_bp) & tf_outputs$up > ub) & !(tf_outputs$up >= (lb+ol_bp) & tf_outputs$low < lb),]
217 6b5a528c Florent Chuffart
    i = i+1
218 6b5a528c Florent Chuffart
  }
219 6b5a528c Florent Chuffart
  return(tf_outputs)
220 6b5a528c Florent Chuffart
### Returns filtered TemplateFilter Outputs
221 6b5a528c Florent Chuffart
}
222 6b5a528c Florent Chuffart
223 6b5a528c Florent Chuffart
224 6b5a528c Florent Chuffart
225 6b5a528c Florent Chuffart
filter_tf_inputs = function(# Filter TemplateFilter inputs
226 6b5a528c Florent Chuffart
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
227 6b5a528c Florent Chuffart
inputs, ##<< TF inputs to be filtered.
228 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
229 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
230 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
231 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
232 6b5a528c Florent Chuffart
only_f = FALSE, ##<< Filter only F reads.
233 b8a95426 Florent Chuffart
only_r = FALSE, ##<< Filter only R reads.
234 b8a95426 Florent Chuffart
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
235 6b5a528c Florent Chuffart
) {
236 6b5a528c Florent Chuffart
	if (only_f) {
237 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
238 6b5a528c Florent Chuffart
	} else if (only_r) {
239 5bfac5a3 Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
240 6b5a528c Florent Chuffart
	} else {
241 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
242 6b5a528c Florent Chuffart
	}
243 b8a95426 Florent Chuffart
  if (!filter_for_coverage) {
244 b8a95426 Florent Chuffart
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
245 55c1cdff Florent Chuffart
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
246 b8a95426 Florent Chuffart
  }
247 6b5a528c Florent Chuffart
	return(inputs)
248 6b5a528c Florent Chuffart
### Returns filtred inputs.
249 6b5a528c Florent Chuffart
}
250 6b5a528c Florent Chuffart
251 6b5a528c Florent Chuffart
252 6b5a528c Florent Chuffart
253 6b5a528c Florent Chuffart
get_comp_strand = function(
254 6b5a528c Florent Chuffart
### Compute the complementatry strand.
255 6b5a528c Florent Chuffart
strand ##<< The original strand.
256 6b5a528c Florent Chuffart
) {
257 6b5a528c Florent Chuffart
	apply(t(strand),2, function(n){
258 55c1cdff Florent Chuffart
	  if (n=="a") {return("t")}
259 6b5a528c Florent Chuffart
		if (n=="t") {return("a")}
260 55c1cdff Florent Chuffart
		if (n=="c") {return("g")}
261 55c1cdff Florent Chuffart
		if (n=="g") {return("c")}
262 6b5a528c Florent Chuffart
	})
263 6b5a528c Florent Chuffart
### Returns the complementatry strand.
264 6b5a528c Florent Chuffart
}
265 6b5a528c Florent Chuffart
266 6b5a528c Florent Chuffart
267 55c1cdff Florent Chuffart
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
268 6b5a528c Florent Chuffart
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
269 6b5a528c Florent Chuffart
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
270 729c934e Florent Chuffart
lod_thres=20, ##<< Log likelihood ration threshold.
271 6b5a528c Florent Chuffart
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
272 6b5a528c Florent Chuffart
){
273 6b5a528c Florent Chuffart
	end_of_tracks = function(tracks) {
274 6b5a528c Florent Chuffart
		if (length(tracks) == 0) {
275 6b5a528c Florent Chuffart
			return(TRUE)
276 6b5a528c Florent Chuffart
		}
277 6b5a528c Florent Chuffart
	  for (lower_bound in tracks) {
278 6b5a528c Florent Chuffart
			if(!is.na(lower_bound)) {
279 6b5a528c Florent Chuffart
	      if (lower_bound < coord_max) {
280 6b5a528c Florent Chuffart
	        return(FALSE)
281 6b5a528c Florent Chuffart
	      }
282 6b5a528c Florent Chuffart
	  	}
283 6b5a528c Florent Chuffart
	  }
284 6b5a528c Florent Chuffart
	  return(TRUE)
285 6b5a528c Florent Chuffart
	}
286 6b5a528c Florent Chuffart
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
287 6b5a528c Florent Chuffart
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
288 6b5a528c Florent Chuffart
			new_cluster$wp = TRUE
289 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
290 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
291 6b5a528c Florent Chuffart
		  	clusters[[length(clusters) + 1]] = new_cluster
292 6b5a528c Florent Chuffart
				# print(new_cluster)
293 6b5a528c Florent Chuffart
		  }
294 6b5a528c Florent Chuffart
		} else {
295 6b5a528c Florent Chuffart
			new_cluster$wp = FALSE
296 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
297 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
298 6b5a528c Florent Chuffart
			  clusters[[length(clusters) + 1]] = new_cluster
299 6b5a528c Florent Chuffart
			}
300 6b5a528c Florent Chuffart
		}
301 6b5a528c Florent Chuffart
		return(clusters)
302 6b5a528c Florent Chuffart
	}
303 6b5a528c Florent Chuffart
	strain = samples[[1]]$strain
304 6b5a528c Florent Chuffart
	lod_scores = c()
305 6b5a528c Florent Chuffart
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
306 6b5a528c Florent Chuffart
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
307 6b5a528c Florent Chuffart
  # compute clusters
308 6b5a528c Florent Chuffart
  clusters = list()
309 6b5a528c Florent Chuffart
  cluster_contents = list()
310 6b5a528c Florent Chuffart
  # Init reader
311 6b5a528c Florent Chuffart
  indexes = c()
312 6b5a528c Florent Chuffart
  track_readers = c()
313 6b5a528c Florent Chuffart
  current_nuc = NULL
314 55c1cdff Florent Chuffart
	lod_score = lod_thres + 1
315 55c1cdff Florent Chuffart
  # Read nucs from TF outputs
316 6b5a528c Florent Chuffart
  tf_outs = list()
317 6b5a528c Florent Chuffart
	i = 1
318 6b5a528c Florent Chuffart
  for (sample in samples) {
319 6b5a528c Florent Chuffart
		# print(sample$roi$chr)
320 6b5a528c Florent Chuffart
		# print(min_nuc_center)
321 6b5a528c Florent Chuffart
		# print(max_nuc_center)
322 6b5a528c Florent Chuffart
		# print(sample$outputs)
323 6b5a528c Florent Chuffart
		# tf_outs[[i]] = filter_tf_outputs(sample$outputs, sample$roi$chr, min_nuc_center, max_nuc_center)
324 6b5a528c Florent Chuffart
		# print(tf_outs[[i]])
325 6b5a528c Florent Chuffart
		tf_outs[[i]] = sample$outputs
326 55c1cdff Florent Chuffart
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]
327 6b5a528c Florent Chuffart
    indexes[i] = 1
328 6b5a528c Florent Chuffart
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
329 5bfac5a3 Florent Chuffart
      track_readers[i] = coord_max
330 6b5a528c Florent Chuffart
	  } else {
331 5bfac5a3 Florent Chuffart
      track_readers[i] = tf_outs[[i]][indexes[i],]$center
332 6b5a528c Florent Chuffart
		}
333 5bfac5a3 Florent Chuffart
		i = i+1
334 6b5a528c Florent Chuffart
  }
335 6b5a528c Florent Chuffart
	# print(track_readers)
336 6b5a528c Florent Chuffart
  new_cluster = NULL
337 6b5a528c Florent Chuffart
  nb_nucs_in_cluster = 0
338 6b5a528c Florent Chuffart
  nuc_from_track = c()
339 6b5a528c Florent Chuffart
  for (i in 1:length(tf_outs)){
340 6b5a528c Florent Chuffart
    nuc_from_track[i] = FALSE
341 6b5a528c Florent Chuffart
  }
342 6b5a528c Florent Chuffart
  # Start clustering
343 6b5a528c Florent Chuffart
  while (!end_of_tracks(track_readers)) {
344 6b5a528c Florent Chuffart
    new_center = min(track_readers)
345 5bfac5a3 Florent Chuffart
		current_track = which(track_readers == new_center)[1]
346 6b5a528c Florent Chuffart
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
347 6b5a528c Florent Chuffart
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
348 6b5a528c Florent Chuffart
		new_nuc$inputs = samples[[current_track]]$inputs
349 6b5a528c Florent Chuffart
		new_nuc$chr = samples[[current_track]]$roi$chr
350 6b5a528c Florent Chuffart
		new_nuc$track = current_track
351 5bfac5a3 Florent Chuffart
352 55c1cdff Florent Chuffart
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width)
353 6b5a528c Florent Chuffart
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
354 6b5a528c Florent Chuffart
		new_nuc$original_reads = flatted_reads[[3]]
355 6b5a528c Florent Chuffart
356 6b5a528c Florent Chuffart
    new_upper_bound = new_nuc$upper_bound
357 6b5a528c Florent Chuffart
358 6b5a528c Florent Chuffart
    if (!is.null(current_nuc)) {
359 6b5a528c Florent Chuffart
			lod_score = lod_score_vecs(current_nuc$original_reads,new_nuc$original_reads)
360 6b5a528c Florent Chuffart
			lod_scores = c(lod_scores,lod_score)
361 6b5a528c Florent Chuffart
		}
362 6b5a528c Florent Chuffart
		# print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
363 6b5a528c Florent Chuffart
		if (is.na(lod_score)) {
364 55c1cdff Florent Chuffart
			lod_score = lod_thres + 1
365 6b5a528c Florent Chuffart
		}
366 729c934e Florent Chuffart
		# Store lod_score
367 5bfac5a3 Florent Chuffart
		new_nuc$lod_score = lod_score
368 729c934e Florent Chuffart
	  if (lod_score < lod_thres) {
369 6b5a528c Florent Chuffart
      # aggregate to current cluster
370 6b5a528c Florent Chuffart
      #   update bound
371 6b5a528c Florent Chuffart
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
372 55c1cdff Florent Chuffart
        new_cluster$upper_bound = new_nuc$upper_bound
373 6b5a528c Florent Chuffart
      }
374 6b5a528c Florent Chuffart
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
375 55c1cdff Florent Chuffart
        new_cluster$lower_bound = new_nuc$lower_bound
376 6b5a528c Florent Chuffart
      }
377 6b5a528c Florent Chuffart
      #   add nucleosome to current cluster
378 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
379 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
380 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
381 6b5a528c Florent Chuffart
    } else {
382 6b5a528c Florent Chuffart
			if (!is.null(new_cluster)) {
383 6b5a528c Florent Chuffart
        # store old cluster
384 6b5a528c Florent Chuffart
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
385 6b5a528c Florent Chuffart
			}
386 6b5a528c Florent Chuffart
      # Reinit current cluster composition stuff
387 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = 0
388 6b5a528c Florent Chuffart
      nuc_from_track = c()
389 6b5a528c Florent Chuffart
      for (i in 1:length(tf_outs)){
390 6b5a528c Florent Chuffart
        nuc_from_track[i] = FALSE
391 6b5a528c Florent Chuffart
      }
392 55c1cdff Florent Chuffart
      # create new cluster
393 6b5a528c Florent Chuffart
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
394 6b5a528c Florent Chuffart
      # update upper bound
395 6b5a528c Florent Chuffart
      current_upper_bound = new_upper_bound
396 6b5a528c Florent Chuffart
      # add nucleosome to current cluster
397 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
398 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
399 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
400 5bfac5a3 Florent Chuffart
401 6b5a528c Florent Chuffart
		}
402 5bfac5a3 Florent Chuffart
403 6b5a528c Florent Chuffart
		current_nuc = new_nuc
404 6b5a528c Florent Chuffart
405 6b5a528c Florent Chuffart
    # update indexes
406 55c1cdff Florent Chuffart
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {
407 6b5a528c Florent Chuffart
      indexes[current_track] = indexes[current_track] + 1
408 6b5a528c Florent Chuffart
      # update track
409 6b5a528c Florent Chuffart
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
410 6b5a528c Florent Chuffart
    } else {
411 6b5a528c Florent Chuffart
      # update track
412 6b5a528c Florent Chuffart
      track_readers[current_track] = coord_max
413 6b5a528c Florent Chuffart
    }
414 6b5a528c Florent Chuffart
  }
415 6b5a528c Florent Chuffart
  # store last cluster
416 6b5a528c Florent Chuffart
  if (!is.null(new_cluster)) {
417 6b5a528c Florent Chuffart
    # store old cluster
418 6b5a528c Florent Chuffart
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
419 6b5a528c Florent Chuffart
  }
420 6b5a528c Florent Chuffart
	return(list(clusters, lod_scores))
421 6b5a528c Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed lod scores.
422 5bfac5a3 Florent Chuffart
}, ex=function(){
423 6b5a528c Florent Chuffart
	# Dealing with a region of interest
424 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
425 6b5a528c Florent Chuffart
	samples = list()
426 6b5a528c Florent Chuffart
	for (i in 1:3) {
427 6b5a528c Florent Chuffart
		# Create TF output
428 6b5a528c Florent Chuffart
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
429 6b5a528c Florent Chuffart
		outputs = dfadd(NULL,tf_nuc)
430 6b5a528c Florent Chuffart
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
431 6b5a528c Florent Chuffart
		# Generate corresponding reads
432 6b5a528c Florent Chuffart
		nb_reads = round(runif(1,170,230))
433 6b5a528c Florent Chuffart
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
434 6b5a528c Florent Chuffart
		u_reads = sort(unique(reads))
435 6b5a528c Florent Chuffart
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
436 6b5a528c Florent Chuffart
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
437 6b5a528c Florent Chuffart
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
438 6b5a528c Florent Chuffart
		u_reads = u_reads + shifts
439 55c1cdff Florent Chuffart
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
440 55c1cdff Florent Chuffart
		                         "V2" = u_reads,
441 55c1cdff Florent Chuffart
														 "V3" = strands,
442 6b5a528c Florent Chuffart
														 "V4" = counts), stringsAsFactors=FALSE)
443 6b5a528c Florent Chuffart
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
444 6b5a528c Florent Chuffart
	}
445 6b5a528c Florent Chuffart
	print(aggregate_intra_strain_nucs(samples))
446 6b5a528c Florent Chuffart
})
447 6b5a528c Florent Chuffart
448 7646593d Florent Chuffart
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
449 7646593d Florent Chuffart
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
450 7646593d Florent Chuffart
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
451 7646593d Florent Chuffart
roi_index ##<< the index of the roi involved
452 7646593d Florent Chuffart
) {
453 7646593d Florent Chuffart
	if  (length(partial_strain_maps) == 0 ){
454 9011a383 Florent Chuffart
		print(paste("Empty partial_strain_maps for roi", roi_index, "ands current strain." ))
455 55c1cdff Florent Chuffart
    tmp_strain_maps = list()
456 7646593d Florent Chuffart
	} else {
457 7646593d Florent Chuffart
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
458 7646593d Florent Chuffart
			tmp_nuc = partial_strain_maps[[i]]
459 7646593d Florent Chuffart
			tmp_nuc_as_list = list()
460 7646593d Florent Chuffart
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
461 7646593d Florent Chuffart
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
462 7646593d Florent Chuffart
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
463 7646593d Florent Chuffart
			tmp_nuc_as_list[["roi_index"]] = roi_index
464 7646593d Florent Chuffart
			tmp_nuc_as_list[["index_nuc"]] = i
465 7646593d Florent Chuffart
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
466 7646593d Florent Chuffart
			all_original_reads = c()
467 7646593d Florent Chuffart
			for (j in 1:length(tmp_nuc$nucs)) {
468 7646593d Florent Chuffart
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
469 7646593d Florent Chuffart
			}
470 7646593d Florent Chuffart
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
471 7646593d Florent Chuffart
			if (tmp_nuc$wp) {
472 7646593d Florent Chuffart
				tmp_nuc_as_list[["lod_1"]] = signif(tmp_nuc$nucs[[2]]$lod_score,5)
473 7646593d Florent Chuffart
				tmp_nuc_as_list[["lod_2"]] = signif(tmp_nuc$nucs[[3]]$lod_score,5)
474 7646593d Florent Chuffart
			} else {
475 7646593d Florent Chuffart
				tmp_nuc_as_list[["lod_1"]] = NA
476 5bfac5a3 Florent Chuffart
				tmp_nuc_as_list[["lod_2"]] = NA
477 7646593d Florent Chuffart
			}
478 7646593d Florent Chuffart
      return(tmp_nuc_as_list)
479 7646593d Florent Chuffart
    })
480 7646593d Florent Chuffart
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
481 7646593d Florent Chuffart
	}
482 21b8928f Florent Chuffart
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
483 7646593d Florent Chuffart
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
484 7646593d Florent Chuffart
}
485 6b5a528c Florent Chuffart
486 6b5a528c Florent Chuffart
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
487 6b5a528c Florent Chuffart
### This function aligns nucs between two strains for a given genome region.
488 55c1cdff Florent Chuffart
replicates, ##<< Set of replicates, ideally 3 per strain.
489 6b5a528c Florent Chuffart
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
490 6b5a528c Florent Chuffart
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
491 6b5a528c Florent Chuffart
corr_thres=0.5, ##<< Correlation threshold.
492 729c934e Florent Chuffart
lod_thres=100, ##<< LOD cut off.
493 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
494 5bfac5a3 Florent Chuffart
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
495 6b5a528c Florent Chuffart
) {
496 6b5a528c Florent Chuffart
	if (length(replicates) < 2) {
497 6b5a528c Florent Chuffart
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
498 6b5a528c Florent Chuffart
	} else if (length(replicates) > 2) {
499 5bfac5a3 Florent Chuffart
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
500 6b5a528c Florent Chuffart
	}
501 6b5a528c Florent Chuffart
	common_nuc = NULL
502 6b5a528c Florent Chuffart
	lod_scores = c()
503 6b5a528c Florent Chuffart
	chr = replicates[[1]][[1]]$roi$chr
504 6b5a528c Florent Chuffart
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
505 6b5a528c Florent Chuffart
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
506 6b5a528c Florent Chuffart
	strain_ref1 = replicates[[1]][[1]]$strain
507 6b5a528c Florent Chuffart
	strain_ref2 = replicates[[2]][[1]]$strain
508 6b5a528c Florent Chuffart
	big_roi = replicates[[1]][[1]]$roi
509 a0b91fee Florent Chuffart
  orig_big_roi = replicates[[1]][[1]]$orig_roi
510 6b5a528c Florent Chuffart
	if(big_roi$end - big_roi$begin < 0) {
511 6b5a528c Florent Chuffart
		tmp_begin = big_roi$begin
512 6b5a528c Florent Chuffart
		big_roi$begin =  big_roi$end
513 6b5a528c Florent Chuffart
		big_roi$end =  tmp_begin
514 6b5a528c Florent Chuffart
	}
515 6b5a528c Florent Chuffart
	# GO!
516 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref1)) {
517 6b5a528c Florent Chuffart
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
518 6b5a528c Florent Chuffart
	}
519 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref2)) {
520 6b5a528c Florent Chuffart
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
521 6b5a528c Florent Chuffart
  }
522 1d833b97 Florent Chuffart
  # foo <<- wp_nucs_strain_ref1
523 1d833b97 Florent Chuffart
  # print(apply(t(wp_nucs_strain_ref1), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
524 1d833b97 Florent Chuffart
  # print(apply(t(wp_nucs_strain_ref2), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
525 6b5a528c Florent Chuffart
	# dealing with matching_nas
526 6b5a528c Florent Chuffart
	lws = c()
527 6b5a528c Florent Chuffart
	ups = c()
528 6b5a528c Florent Chuffart
	for (na in wp_nucs_strain_ref2) {
529 6b5a528c Florent Chuffart
		lws = c(lws, na$lower_bound)
530 6b5a528c Florent Chuffart
		ups = c(ups, na$upper_bound)
531 6b5a528c Florent Chuffart
	}
532 6b5a528c Florent Chuffart
533 5bfac5a3 Florent Chuffart
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))
534 6b5a528c Florent Chuffart
	roi_strain_ref1 = NULL
535 6b5a528c Florent Chuffart
	roi_strain_ref2 = NULL
536 6b5a528c Florent Chuffart
	if (length(wp_nucs_strain_ref1) > 0) {
537 6b5a528c Florent Chuffart
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
538 6b5a528c Florent Chuffart
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
539 6b5a528c Florent Chuffart
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
540 6b5a528c Florent Chuffart
			# Filtering on Well Positionned
541 883439d6 Florent Chuffart
			if (nuc_strain_ref1$wp) {
542 6b5a528c Florent Chuffart
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
543 55c1cdff Florent Chuffart
				roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi=orig_big_roi, config=config)
544 1d833b97 Florent Chuffart
        if (!is.null(roi_strain_ref2)){
545 6b5a528c Florent Chuffart
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
546 6b5a528c Florent Chuffart
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
547 6b5a528c Florent Chuffart
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
548 6b5a528c Florent Chuffart
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
549 6b5a528c Florent Chuffart
					for (index_nuc_strain_ref2 in matching_nas) {
550 6b5a528c Florent Chuffart
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
551 6b5a528c Florent Chuffart
						# Filtering on Well Positionned
552 55c1cdff Florent Chuffart
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)
553 55c1cdff Florent Chuffart
						if (!is.null(translate_roi(nuc_strain_ref2_to_roi, strain_ref1, big_roi=orig_big_roi, config=config)) &
554 1e34bb1f Florent Chuffart
                nuc_strain_ref2$wp) {
555 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score and collecting reads
556 6b5a528c Florent Chuffart
							SKIP = FALSE
557 6b5a528c Florent Chuffart
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
558 6b5a528c Florent Chuffart
							reads_strain_ref1 = c()
559 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref1$nucs){
560 5bfac5a3 Florent Chuffart
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)
561 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
562 6b5a528c Florent Chuffart
									SKIP = TRUE
563 6b5a528c Florent Chuffart
								}
564 6b5a528c Florent Chuffart
							}
565 6b5a528c Florent Chuffart
							reads_strain_ref2 = c()
566 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref2$nucs){
567 5bfac5a3 Florent Chuffart
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)
568 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
569 6b5a528c Florent Chuffart
									SKIP = TRUE
570 6b5a528c Florent Chuffart
								}
571 6b5a528c Florent Chuffart
							}
572 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score
573 6b5a528c Florent Chuffart
							if (!SKIP) {
574 6b5a528c Florent Chuffart
								# tranlation of reads into strain 2 coords
575 6b5a528c Florent Chuffart
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
576 6b5a528c Florent Chuffart
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
577 6b5a528c Florent Chuffart
								lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2)
578 6b5a528c Florent Chuffart
								lod_scores = c(lod_scores, lod_score)
579 5bfac5a3 Florent Chuffart
								# Filtering on LOD Score
580 729c934e Florent Chuffart
								if (lod_score < lod_thres) {
581 6b5a528c Florent Chuffart
									tmp_nuc = list()
582 6b5a528c Florent Chuffart
									# strain_ref1
583 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
584 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
585 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
586 6b5a528c Florent Chuffart
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
587 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
588 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
589 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
590 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr1_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[1]]$corr,5)
591 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr2_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[2]]$corr,5)
592 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr3_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[3]]$corr,5)
593 6b5a528c Florent Chuffart
									# strain_ref2
594 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
595 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
596 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
597 6b5a528c Florent Chuffart
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
598 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
599 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
600 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
601 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr1_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[1]]$corr,5)
602 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr2_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[2]]$corr,5)
603 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr3_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[3]]$corr,5)
604 6b5a528c Florent Chuffart
									# common
605 6b5a528c Florent Chuffart
									tmp_nuc[["lod_score"]] = signif(lod_score,5)
606 6b5a528c Florent Chuffart
									# print(tmp_nuc)
607 6b5a528c Florent Chuffart
									common_nuc = dfadd(common_nuc, tmp_nuc)
608 6b5a528c Florent Chuffart
								}
609 6b5a528c Florent Chuffart
							}
610 6b5a528c Florent Chuffart
						}
611 5bfac5a3 Florent Chuffart
					}
612 6b5a528c Florent Chuffart
				} else {
613 6b5a528c Florent Chuffart
		      print("WARNING! No roi for strain ref 2.")
614 6b5a528c Florent Chuffart
			  }
615 6b5a528c Florent Chuffart
		  }
616 6b5a528c Florent Chuffart
		}
617 5bfac5a3 Florent Chuffart
618 6b5a528c Florent Chuffart
		if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
619 6b5a528c Florent Chuffart
			index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
620 5bfac5a3 Florent Chuffart
			to_remove_list = c()
621 5bfac5a3 Florent Chuffart
			for (i in 1:length(index_redundant)) {
622 6b5a528c Florent Chuffart
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
623 6b5a528c Florent Chuffart
				  to_remove = index_redundant[i]
624 6b5a528c Florent Chuffart
				}	 else {
625 6b5a528c Florent Chuffart
					to_remove = index_redundant[i] + 1
626 5bfac5a3 Florent Chuffart
			  }
627 6b5a528c Florent Chuffart
				to_remove_list = c(to_remove_list, to_remove)
628 6b5a528c Florent Chuffart
			}
629 6b5a528c Florent Chuffart
			common_nuc = common_nuc[-to_remove_list,]
630 6b5a528c Florent Chuffart
		}
631 5bfac5a3 Florent Chuffart
632 6b5a528c Florent Chuffart
		if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
633 8c87ee64 Florent Chuffart
			index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
634 5bfac5a3 Florent Chuffart
			to_remove_list = c()
635 5bfac5a3 Florent Chuffart
			for (i in 1:length(index_redundant)) {
636 6b5a528c Florent Chuffart
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
637 6b5a528c Florent Chuffart
				  to_remove = index_redundant[i]
638 6b5a528c Florent Chuffart
				}	 else {
639 6b5a528c Florent Chuffart
					to_remove = index_redundant[i] + 1
640 5bfac5a3 Florent Chuffart
			  }
641 6b5a528c Florent Chuffart
				to_remove_list = c(to_remove_list, to_remove)
642 6b5a528c Florent Chuffart
			}
643 6b5a528c Florent Chuffart
			common_nuc = common_nuc[-to_remove_list,]
644 6b5a528c Florent Chuffart
		}
645 5bfac5a3 Florent Chuffart
646 6b5a528c Florent Chuffart
		return(list(common_nuc, lod_scores))
647 6b5a528c Florent Chuffart
	} else {
648 6b5a528c Florent Chuffart
		print("WARNING, no nucs for strain_ref1.")
649 6b5a528c Florent Chuffart
		return(NULL)
650 6b5a528c Florent Chuffart
	}
651 6b5a528c Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed lod scores.
652 5bfac5a3 Florent Chuffart
}, ex=function(){
653 1d833b97 Florent Chuffart
654 1d833b97 Florent Chuffart
    # Define new translate_roi function...
655 1d833b97 Florent Chuffart
    translate_roi = function(roi, strain2, big_roi=NULL, config=NULL) {
656 1d833b97 Florent Chuffart
      return(roi)
657 1d833b97 Florent Chuffart
    }
658 1d833b97 Florent Chuffart
    # Binding it by uncomment follwing lines.
659 1d833b97 Florent Chuffart
    unlockBinding("translate_roi", as.environment("package:nucleominer"))
660 1d833b97 Florent Chuffart
    unlockBinding("translate_roi", getNamespace("nucleominer"))
661 1d833b97 Florent Chuffart
    assign("translate_roi", translate_roi, "package:nucleominer")
662 1d833b97 Florent Chuffart
    assign("translate_roi", translate_roi, getNamespace("nucleominer"))
663 1d833b97 Florent Chuffart
    lockBinding("translate_roi", getNamespace("nucleominer"))
664 55c1cdff Florent Chuffart
    lockBinding("translate_roi", as.environment("package:nucleominer"))
665 1d833b97 Florent Chuffart
666 6b5a528c Florent Chuffart
	# Dealing with a region of interest
667 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
668 6b5a528c Florent Chuffart
	roi2 = translate_roi(roi, roi$strain_ref1)
669 6b5a528c Florent Chuffart
	replicates = list()
670 6b5a528c Florent Chuffart
	for (j in 1:2) {
671 6b5a528c Florent Chuffart
		samples = list()
672 6b5a528c Florent Chuffart
		for (i in 1:3) {
673 6b5a528c Florent Chuffart
			# Create TF output
674 6b5a528c Florent Chuffart
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
675 6b5a528c Florent Chuffart
			outputs = dfadd(NULL,tf_nuc)
676 6b5a528c Florent Chuffart
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
677 6b5a528c Florent Chuffart
			# Generate corresponding reads
678 6b5a528c Florent Chuffart
			nb_reads = round(runif(1,170,230))
679 6b5a528c Florent Chuffart
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
680 6b5a528c Florent Chuffart
			u_reads = sort(unique(reads))
681 6b5a528c Florent Chuffart
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
682 6b5a528c Florent Chuffart
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
683 6b5a528c Florent Chuffart
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
684 6b5a528c Florent Chuffart
			u_reads = u_reads + shifts
685 55c1cdff Florent Chuffart
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
686 55c1cdff Florent Chuffart
			                         "V2" = u_reads,
687 55c1cdff Florent Chuffart
															 "V3" = strands,
688 6b5a528c Florent Chuffart
															 "V4" = counts), stringsAsFactors=FALSE)
689 6b5a528c Florent Chuffart
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
690 6b5a528c Florent Chuffart
		}
691 6b5a528c Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
692 6b5a528c Florent Chuffart
	}
693 6b5a528c Florent Chuffart
	print(align_inter_strain_nucs(replicates))
694 6b5a528c Florent Chuffart
})
695 6b5a528c Florent Chuffart
696 6b5a528c Florent Chuffart
697 6b5a528c Florent Chuffart
698 6b5a528c Florent Chuffart
699 6b5a528c Florent Chuffart
700 6b5a528c Florent Chuffart
701 6b5a528c Florent Chuffart
702 6b5a528c Florent Chuffart
703 6b5a528c Florent Chuffart
704 6b5a528c Florent Chuffart
705 6b5a528c Florent Chuffart
706 6b5a528c Florent Chuffart
707 6b5a528c Florent Chuffart
708 6b5a528c Florent Chuffart
709 6b5a528c Florent Chuffart
710 6b5a528c Florent Chuffart
711 6b5a528c Florent Chuffart
712 6b5a528c Florent Chuffart
713 6b5a528c Florent Chuffart
714 6b5a528c Florent Chuffart
715 6b5a528c Florent Chuffart
716 6b5a528c Florent Chuffart
717 6b5a528c Florent Chuffart
718 6b5a528c Florent Chuffart
719 6b5a528c Florent Chuffart
720 6b5a528c Florent Chuffart
721 6b5a528c Florent Chuffart
fetch_mnase_replicates = function(# Prefetch data
722 6b5a528c Florent Chuffart
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
723 6b5a528c Florent Chuffart
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
724 6b5a528c Florent Chuffart
roi, ##<< Region of interest.
725 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
726 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
727 6b5a528c Florent Chuffart
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
728 6b5a528c Florent Chuffart
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
729 6b5a528c Florent Chuffart
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
730 6b5a528c Florent Chuffart
) {
731 6b5a528c Florent Chuffart
	samples=list()
732 6b5a528c Florent Chuffart
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
733 6b5a528c Florent Chuffart
	for (i in samples_ids) {
734 6b5a528c Florent Chuffart
		sample = as.list(all_samples[all_samples$id==i,])
735 a0b91fee Florent Chuffart
    sample$orig_roi = roi
736 8e9facd8 Florent Chuffart
    sample$roi = translate_roi(roi, sample$strain, config = config)
737 6b5a528c Florent Chuffart
		if (get_genome) {
738 6b5a528c Florent Chuffart
			# Get Genome
739 5bfac5a3 Florent Chuffart
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
740 6b5a528c Florent Chuffart
		}
741 6b5a528c Florent Chuffart
		# Get inputs
742 55c1cdff Florent Chuffart
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
743 5bfac5a3 Florent Chuffart
		sample$total_reads = sum(sample$inputs[,4])
744 6b5a528c Florent Chuffart
		if (!only_fetch) {
745 6b5a528c Florent Chuffart
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
746 6b5a528c Florent Chuffart
	  }
747 6b5a528c Florent Chuffart
	  # Get TF outputs for Mnase_Seq samples
748 5bfac5a3 Florent Chuffart
		if (sample$marker == "Mnase_Seq" & get_ouputs) {
749 a52f85d7 Florent Chuffart
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
750 6b5a528c Florent Chuffart
			if (!only_fetch) {
751 6b5a528c Florent Chuffart
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
752 6b5a528c Florent Chuffart
  		}
753 6b5a528c Florent Chuffart
		}
754 5bfac5a3 Florent Chuffart
		samples[[length(samples) + 1]] = sample
755 6b5a528c Florent Chuffart
	}
756 6b5a528c Florent Chuffart
  return(samples)
757 6b5a528c Florent Chuffart
}
758 6b5a528c Florent Chuffart
759 6b5a528c Florent Chuffart
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
760 6b5a528c Florent Chuffart
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
761 55c1cdff Florent Chuffart
region1, ##<< Original regions.
762 6b5a528c Florent Chuffart
region2 ##<< Regions to substract.
763 6b5a528c Florent Chuffart
) {
764 6b5a528c Florent Chuffart
  rec_substract_region = function(region1, region2) {
765 6b5a528c Florent Chuffart
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
766 6b5a528c Florent Chuffart
    cur_fuzzy = region1[i,]
767 6b5a528c Florent Chuffart
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
768 6b5a528c Florent Chuffart
    if (length(inter_wp[,1]) > 0) {
769 6b5a528c Florent Chuffart
      ret = c()
770 6b5a528c Florent Chuffart
      for (j in 1:length(inter_wp[,1])) {
771 6b5a528c Florent Chuffart
        cur_wp = inter_wp[j,]
772 6b5a528c Florent Chuffart
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
773 6b5a528c Florent Chuffart
          # remove cur_fuzzy
774 6b5a528c Florent Chuffart
          ret = c()
775 6b5a528c Florent Chuffart
          break
776 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
777 55c1cdff Florent Chuffart
          # crop fuzzy
778 55c1cdff Florent Chuffart
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1
779 6b5a528c Florent Chuffart
          ret = cur_fuzzy
780 6b5a528c Florent Chuffart
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
781 55c1cdff Florent Chuffart
          # crop fuzzy
782 55c1cdff Florent Chuffart
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1
783 6b5a528c Florent Chuffart
          ret = cur_fuzzy
784 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
785 6b5a528c Florent Chuffart
          # split fuzzy
786 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_1 = cur_fuzzy
787 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1
788 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_2 = cur_fuzzy
789 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1
790 6b5a528c Florent Chuffart
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
791 6b5a528c Florent Chuffart
          # print(ret)
792 6b5a528c Florent Chuffart
          # ret = cur_fuzzy
793 6b5a528c Florent Chuffart
          break
794 6b5a528c Florent Chuffart
        } else {
795 6b5a528c Florent Chuffart
          stop("WARNING NO ADAPTED CASE!")
796 55c1cdff Florent Chuffart
        }
797 6b5a528c Florent Chuffart
      }
798 6b5a528c Florent Chuffart
      return(ret)
799 6b5a528c Florent Chuffart
    } else {
800 6b5a528c Florent Chuffart
      return(cur_fuzzy)
801 6b5a528c Florent Chuffart
    }
802 6b5a528c Florent Chuffart
  })
803 6b5a528c Florent Chuffart
  }
804 21b8928f Florent Chuffart
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
805 6b5a528c Florent Chuffart
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
806 b20637ed Florent Chuffart
  tmp_ulist = unlist(non_inter_fuzzy)
807 b20637ed Florent Chuffart
  tmp_names = names(tmp_ulist)[1:4]
808 55c1cdff Florent Chuffart
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE)
809 b20637ed Florent Chuffart
  names(non_inter_fuzzy) = tmp_names
810 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)
811 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)
812 b20637ed Florent Chuffart
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
813 b20637ed Florent Chuffart
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
814 55c1cdff Florent Chuffart
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]
815 b20637ed Florent Chuffart
  return(non_inter_fuzzy)
816 6b5a528c Florent Chuffart
}
817 6b5a528c Florent Chuffart
818 6b5a528c Florent Chuffart
union_regions = function(# Aggregate regions that intersect themnselves.
819 55c1cdff Florent Chuffart
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
820 6b5a528c Florent Chuffart
regions ##<< The Regions to be aggregated
821 6b5a528c Florent Chuffart
) {
822 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {return(regions)}
823 0303dbe8 Florent Chuffart
  if (nrow(regions) == 0) {return(regions)}
824 6b5a528c Florent Chuffart
  old_length = length(regions[,1])
825 6b5a528c Florent Chuffart
  new_length = 0
826 55c1cdff Florent Chuffart
827 6b5a528c Florent Chuffart
  while (old_length != new_length) {
828 6b5a528c Florent Chuffart
    regions = regions[order(regions$lower_bound), ]
829 55c1cdff Florent Chuffart
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE)
830 6b5a528c Florent Chuffart
    vec_end_1 = which(regions$stop)
831 6b5a528c Florent Chuffart
    if (length(vec_end_1) == 0) {
832 55c1cdff Florent Chuffart
      vec_end_1 = c(length(regions$stop))
833 55c1cdff Florent Chuffart
    }
834 6b5a528c Florent Chuffart
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
835 6b5a528c Florent Chuffart
      vec_end_1 = c(vec_end_1, length(regions$stop))
836 6b5a528c Florent Chuffart
    }
837 6b5a528c Florent Chuffart
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
838 6b5a528c Florent Chuffart
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
839 6b5a528c Florent Chuffart
      chr = regions$chr[vec_beg_1[i]]
840 6b5a528c Florent Chuffart
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
841 6b5a528c Florent Chuffart
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
842 55c1cdff Florent Chuffart
      roi_index = regions$roi_index[vec_beg_1[i]]
843 6b5a528c Florent Chuffart
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, roi_index=roi_index))
844 6b5a528c Florent Chuffart
      })
845 59ad95ca Florent Chuffart
    union = collapse_regions(union)
846 6b5a528c Florent Chuffart
    old_length = length(regions[,1])
847 6b5a528c Florent Chuffart
    new_length = length(union[,1])
848 6b5a528c Florent Chuffart
    regions = union
849 6b5a528c Florent Chuffart
  }
850 6b5a528c Florent Chuffart
  return(union)
851 6b5a528c Florent Chuffart
}
852 6b5a528c Florent Chuffart
853 0303dbe8 Florent Chuffart
# remove_aligned_wp = function(# Remove wp nucs from common nucs list.
854 0303dbe8 Florent Chuffart
# ### It is based on common wp nucs index on nucs and region.
855 0303dbe8 Florent Chuffart
# strain_maps, ##<< Nuc maps.
856 0303dbe8 Florent Chuffart
# roi_index, ##<< The region of interest index.
857 0303dbe8 Florent Chuffart
# tmp_common_nucs, ##<< the list of wp nucs.
858 0303dbe8 Florent Chuffart
# strain##<< The strain to consider.
859 0303dbe8 Florent Chuffart
# ){
860 0303dbe8 Florent Chuffart
#   fuzzy_nucs = strain_maps[[strain]]
861 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$roi_index == roi_index,]
862 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
863 0303dbe8 Florent Chuffart
#   if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
864 0303dbe8 Florent Chuffart
#   if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
865 0303dbe8 Florent Chuffart
#   anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
866 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
867 0303dbe8 Florent Chuffart
#   return(fuzzy_nucs)
868 0303dbe8 Florent Chuffart
# }
869 6b5a528c Florent Chuffart
870 6b5a528c Florent Chuffart
translate_regions = function(# Translate a list of regions from a strain ref to another.
871 6b5a528c Florent Chuffart
### This function is an eloborated call to translate_roi.
872 55c1cdff Florent Chuffart
regions, ##<< Regions to be translated.
873 6b5a528c Florent Chuffart
combi, ##<< Combination of strains.
874 6b5a528c Florent Chuffart
roi_index, ##<< The region of interest index.
875 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
876 6b5a528c Florent Chuffart
roi ##<< The region of interest.
877 6b5a528c Florent Chuffart
) {
878 6b5a528c Florent Chuffart
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
879 6b5a528c Florent Chuffart
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
880 5a26597a Florent Chuffart
    big_roi =  roi
881 55c1cdff Florent Chuffart
    trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], config = config, big_roi = big_roi)
882 5bfac5a3 Florent Chuffart
    if (is.null(trs_tmp_regions_ref2)) {
883 5bfac5a3 Florent Chuffart
      return(NULL)
884 5bfac5a3 Florent Chuffart
    } else {
885 5bfac5a3 Florent Chuffart
      return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), roi_index=roi_index)))
886 5bfac5a3 Florent Chuffart
    }
887 5bfac5a3 Florent Chuffart
  })
888 5bfac5a3 Florent Chuffart
889 59ad95ca Florent Chuffart
  return(collapse_regions(tr_regions))
890 59ad95ca Florent Chuffart
}
891 59ad95ca Florent Chuffart
892 59ad95ca Florent Chuffart
collapse_regions = function(# reformat an "apply  manipulated" list of regions
893 59ad95ca Florent Chuffart
### Utils to reformat an "apply  manipulated" list of regions
894 59ad95ca Florent Chuffart
regions ##< a list of regions
895 59ad95ca Florent Chuffart
) {
896 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {
897 5bfac5a3 Florent Chuffart
    return(NULL)
898 5bfac5a3 Florent Chuffart
  } else {
899 5bfac5a3 Florent Chuffart
    regions = do.call(rbind, regions)
900 5bfac5a3 Florent Chuffart
    regions$chr = as.character(regions$chr)
901 5bfac5a3 Florent Chuffart
    regions$chr = as.numeric(regions$chr)
902 5bfac5a3 Florent Chuffart
    regions$lower_bound = as.numeric(regions$lower_bound)
903 5bfac5a3 Florent Chuffart
    regions$upper_bound = as.numeric(regions$upper_bound)
904 5bfac5a3 Florent Chuffart
    regions = regions[order(regions$lower_bound),]
905 5bfac5a3 Florent Chuffart
    return(regions)
906 5bfac5a3 Florent Chuffart
  }
907 6b5a528c Florent Chuffart
}
908 6b5a528c Florent Chuffart
909 6b5a528c Florent Chuffart
910 6b5a528c Florent Chuffart
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
911 6b5a528c Florent Chuffart
### The fucntion is no more necessary since we remove "big_roi" bug in translate_roi function.
912 6b5a528c Florent Chuffart
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
913 6b5a528c Florent Chuffart
roi, ##<< The region of interest.
914 1d833b97 Florent Chuffart
strain, ##<< The strain to consider.
915 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
916 6b5a528c Florent Chuffart
) {
917 8e9facd8 Florent Chuffart
  tr_roi = translate_roi(roi, strain, config = config)
918 6b5a528c Florent Chuffart
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
919 6b5a528c Florent Chuffart
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
920 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
921 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
922 6b5a528c Florent Chuffart
  }
923 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
924 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
925 6b5a528c Florent Chuffart
  }
926 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
927 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
928 6b5a528c Florent Chuffart
  }
929 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
930 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
931 6b5a528c Florent Chuffart
  }
932 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
933 6b5a528c Florent Chuffart
  return(tmp_fuzzy_nucs)
934 6b5a528c Florent Chuffart
}
935 6b5a528c Florent Chuffart
936 55c1cdff Florent Chuffart
get_all_reads = function(# Retrieve Reads
937 6b5a528c Florent Chuffart
### Retrieve reads for a given marker, combi, form.
938 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
939 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
940 cc54c799 Florent Chuffart
form="wp", ##<< The nuc form to considere.
941 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
942 6b5a528c Florent Chuffart
) {
943 6b5a528c Florent Chuffart
	all_reads = NULL
944 6b5a528c Florent Chuffart
  for (manip in c("Mnase_Seq", marker)) {
945 6b5a528c Florent Chuffart
    if (form == "fuzzy") {
946 cc54c799 Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_fuzzy_and_nbreads.tab",sep="")
947 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
948 6b5a528c Florent Chuffart
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
949 6b5a528c Florent Chuffart
      tmp_res$form = form
950 6b5a528c Florent Chuffart
    } else if (form == "wp") {
951 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
952 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
953 6b5a528c Florent Chuffart
      tmp_res$form = form
954 6b5a528c Florent Chuffart
    } else if (form == "wpfuzzy") {
955 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
956 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
957 6b5a528c Florent Chuffart
      tmp_res$form = "wp"
958 cc54c799 Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_fuzzy_and_nbreads.tab",sep="")
959 5bfac5a3 Florent Chuffart
  		tmp_res2 = read.table(file=out_filename, header=TRUE)
960 6b5a528c Florent Chuffart
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
961 6b5a528c Florent Chuffart
      tmp_res2$form = "fuzzy"
962 6b5a528c Florent Chuffart
      tmp_res = rbind(tmp_res, tmp_res2)
963 6b5a528c Florent Chuffart
    }
964 6b5a528c Florent Chuffart
		if (is.null(all_reads)) {
965 6b5a528c Florent Chuffart
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
966 55c1cdff Florent Chuffart
		}
967 6b5a528c Florent Chuffart
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
968 6b5a528c Florent Chuffart
		all_reads = cbind(all_reads, tmp_res)
969 6b5a528c Florent Chuffart
  }
970 5bfac5a3 Florent Chuffart
  return(all_reads)
971 6b5a528c Florent Chuffart
}
972 6b5a528c Florent Chuffart
973 6b5a528c Florent Chuffart
get_design = function(# Build the design for deseq
974 6b5a528c Florent Chuffart
### This function build the design according sample properties.
975 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
976 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
977 6b5a528c Florent Chuffart
all_samples ##<< Global list of samples.
978 6b5a528c Florent Chuffart
) {
979 6b5a528c Florent Chuffart
  off1 = 0
980 6b5a528c Florent Chuffart
  off2 = 0
981 6b5a528c Florent Chuffart
	manips = c("Mnase_Seq", marker)
982 6b5a528c Florent Chuffart
	design_rownames = c()
983 6b5a528c Florent Chuffart
	design_manip = c()
984 6b5a528c Florent Chuffart
	design_strain = c()
985 5bfac5a3 Florent Chuffart
  off2index = function(off) {
986 55c1cdff Florent Chuffart
  	switch(toString(off),
987 6b5a528c Florent Chuffart
  		"1"=c(0,1,1),
988 6b5a528c Florent Chuffart
  	  "2"=c(1,0,1),
989 55c1cdff Florent Chuffart
    	"3"=c(1,1,0),
990 6b5a528c Florent Chuffart
  		c(1,1,1)
991 5bfac5a3 Florent Chuffart
  		)
992 6b5a528c Florent Chuffart
  }
993 6b5a528c Florent Chuffart
	for (manip in manips) {
994 6b5a528c Florent Chuffart
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
995 6b5a528c Florent Chuffart
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
996 6b5a528c Florent Chuffart
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
997 6b5a528c Florent Chuffart
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
998 6b5a528c Florent Chuffart
		} else {
999 6b5a528c Florent Chuffart
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
1000 6b5a528c Florent Chuffart
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
1001 6b5a528c Florent Chuffart
			}
1002 6b5a528c Florent Chuffart
		}
1003 5bfac5a3 Florent Chuffart
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))
1004 5bfac5a3 Florent Chuffart
		for (strain in combi) {
1005 6b5a528c Florent Chuffart
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
1006 6b5a528c Florent Chuffart
			design_strain = c(design_strain, rep(strain, length(cols)))
1007 6b5a528c Florent Chuffart
			design_rownames = c(design_rownames, cols)
1008 6b5a528c Florent Chuffart
		}
1009 6b5a528c Florent Chuffart
	}
1010 6b5a528c Florent Chuffart
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
1011 5bfac5a3 Florent Chuffart
	return(snep_design)
1012 6b5a528c Florent Chuffart
}
1013 6b5a528c Florent Chuffart
1014 6b5a528c Florent Chuffart
plot_dist_samples = function(# Plot the distribution of reads.
1015 6b5a528c Florent Chuffart
### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
1016 6b5a528c Florent Chuffart
strain, ##<< The strain to considere.
1017 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
1018 6b5a528c Florent Chuffart
res, ##<< Data
1019 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
1020 6b5a528c Florent Chuffart
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
1021 5bfac5a3 Florent Chuffart
) {
1022 6b5a528c Florent Chuffart
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
1023 6b5a528c Florent Chuffart
	snepCountTable = res[,cols]
1024 6b5a528c Florent Chuffart
	snepDesign = data.frame(
1025 6b5a528c Florent Chuffart
		row.names = cols,
1026 6b5a528c Florent Chuffart
		manip = rep(marker, length(cols)),
1027 6b5a528c Florent Chuffart
		strain = rep(strain, length(cols))
1028 6b5a528c Florent Chuffart
		)
1029 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
1030 6b5a528c Florent Chuffart
	sizeFactors = estimateSizeFactors(cdsFull)
1031 6b5a528c Florent Chuffart
	# print(sizeFactors[[1]])
1032 6b5a528c Florent Chuffart
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
1033 6b5a528c Florent Chuffart
	if (NEWPLOT) {
1034 5bfac5a3 Florent Chuffart
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))
1035 6b5a528c Florent Chuffart
		NEWPLOT = FALSE
1036 6b5a528c Florent Chuffart
	}
1037 6b5a528c Florent Chuffart
	for (it in 1:length(sample_ids)) {
1038 6b5a528c Florent Chuffart
		sample_id = sample_ids[it]
1039 6b5a528c Florent Chuffart
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1040 6b5a528c Florent Chuffart
	}
1041 7164d3ac Florent Chuffart
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1042 6b5a528c Florent Chuffart
}
1043 6b5a528c Florent Chuffart
1044 6b5a528c Florent Chuffart
analyse_design = function(# Launch deseq methods.
1045 6b5a528c Florent Chuffart
### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
1046 6b5a528c Florent Chuffart
snep_design, ##<< The design to considere.
1047 55c1cdff Florent Chuffart
reads ##<< The data to considere.
1048 6b5a528c Florent Chuffart
) {
1049 6b5a528c Florent Chuffart
	snep_count_table = reads[, rownames(snep_design)]
1050 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1051 6b5a528c Florent Chuffart
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1052 6b5a528c Florent Chuffart
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1053 6b5a528c Florent Chuffart
1054 6b5a528c Florent Chuffart
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1055 6b5a528c Florent Chuffart
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1056 6b5a528c Florent Chuffart
1057 6b5a528c Florent Chuffart
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1058 5bfac5a3 Florent Chuffart
	return(list(fit1, fit0, snep_design, pvalsGLM))
1059 6b5a528c Florent Chuffart
}
1060 6b5a528c Florent Chuffart
1061 6b5a528c Florent Chuffart
1062 6b5a528c Florent Chuffart
1063 6b5a528c Florent Chuffart
1064 6b5a528c Florent Chuffart
1065 6b5a528c Florent Chuffart
1066 6b5a528c Florent Chuffart
1067 6b5a528c Florent Chuffart
1068 6b5a528c Florent Chuffart
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1069 55c1cdff Florent Chuffart
### This function uses
1070 6b5a528c Florent Chuffart
marker, ##<< The marker involved.
1071 6b5a528c Florent Chuffart
combi, ##<< The strain combination involved.
1072 55c1cdff Florent Chuffart
form, ##<< the nuc form involved.
1073 cc54c799 Florent Chuffart
all_samples, ##<< Global list of samples.
1074 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1075 6b5a528c Florent Chuffart
) {
1076 6b5a528c Florent Chuffart
  # PRETREAT
1077 6b5a528c Florent Chuffart
  d = get_design(marker, combi, all_samples)
1078 cc54c799 Florent Chuffart
  reads = get_all_reads(marker, combi, form, config=config)
1079 6b5a528c Florent Chuffart
  # RUN ANALYSE
1080 6b5a528c Florent Chuffart
  tmp_analyse = analyse_design(d, reads)
1081 6b5a528c Florent Chuffart
  # RESULTS
1082 6b5a528c Florent Chuffart
	fit1 = tmp_analyse[[1]]
1083 6b5a528c Florent Chuffart
	fit0 = tmp_analyse[[2]]
1084 6b5a528c Florent Chuffart
  k = names(fit1)
1085 6b5a528c Florent Chuffart
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1086 6b5a528c Florent Chuffart
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1087 6b5a528c Florent Chuffart
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1088 6b5a528c Florent Chuffart
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1089 6b5a528c Florent Chuffart
	snep_design = tmp_analyse[[3]]
1090 6b5a528c Florent Chuffart
  print(snep_design)
1091 6b5a528c Florent Chuffart
	fdr = 0.0001
1092 55c1cdff Florent Chuffart
	thres = FDR(reads$pvalsGLM, fdr)
1093 6b5a528c Florent Chuffart
	reads$snep_index = reads$pvalsGLM < thres
1094 6b5a528c Florent Chuffart
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = ""))
1095 6b5a528c Florent Chuffart
  return(reads)
1096 5bfac5a3 Florent Chuffart
  },  ex=function(){
1097 6b5a528c Florent Chuffart
    marker = "H3K4me1"
1098 55c1cdff Florent Chuffart
    combi = c("BY", "YJM")
1099 6b5a528c Florent Chuffart
    form = "wpfuzzy" # "wp" | "fuzzy" | "wpfuzzy"
1100 6b5a528c Florent Chuffart
    # foo = get_sneps(marker, combi, form)
1101 6b5a528c Florent Chuffart
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1102 6b5a528c Florent Chuffart
})
1103 6b5a528c Florent Chuffart
1104 6b5a528c Florent Chuffart
1105 6b5a528c Florent Chuffart
1106 6b5a528c Florent Chuffart
1107 6b5a528c Florent Chuffart
1108 6b5a528c Florent Chuffart
1109 6b5a528c Florent Chuffart
1110 6b5a528c Florent Chuffart
1111 6b5a528c Florent Chuffart
1112 6b5a528c Florent Chuffart
1113 6b5a528c Florent Chuffart
1114 6b5a528c Florent Chuffart
1115 6b5a528c Florent Chuffart
1116 6b5a528c Florent Chuffart
1117 6b5a528c Florent Chuffart
1118 6b5a528c Florent Chuffart
1119 6b5a528c Florent Chuffart
1120 6b5a528c Florent Chuffart
1121 6b5a528c Florent Chuffart
1122 6b5a528c Florent Chuffart
1123 6b5a528c Florent Chuffart
1124 6b5a528c Florent Chuffart
1125 6b5a528c Florent Chuffart
1126 6b5a528c Florent Chuffart
1127 1d833b97 Florent Chuffart
ROM2ARAB = function(# Roman to Arabic pair list.
1128 1d833b97 Florent Chuffart
### Util to convert Roman to Arabic
1129 1d833b97 Florent Chuffart
){list(
1130 1d833b97 Florent Chuffart
  "I" = 1,
1131 1d833b97 Florent Chuffart
  "II" = 2,
1132 1d833b97 Florent Chuffart
  "III" = 3,
1133 1d833b97 Florent Chuffart
  "IV" = 4,
1134 1d833b97 Florent Chuffart
  "V" = 5,
1135 1d833b97 Florent Chuffart
  "VI" = 6,
1136 1d833b97 Florent Chuffart
  "VII" = 7,
1137 1d833b97 Florent Chuffart
  "VIII" = 8,
1138 1d833b97 Florent Chuffart
  "IX" = 9,
1139 1d833b97 Florent Chuffart
  "X" = 10,
1140 1d833b97 Florent Chuffart
  "XI" = 11,
1141 1d833b97 Florent Chuffart
  "XII" = 12,
1142 1d833b97 Florent Chuffart
  "XIII" = 13,
1143 1d833b97 Florent Chuffart
  "XIV" = 14,
1144 1d833b97 Florent Chuffart
  "XV" = 15,
1145 1d833b97 Florent Chuffart
  "XVI" = 16,
1146 1d833b97 Florent Chuffart
  "XVII" = 17,
1147 1d833b97 Florent Chuffart
  "XVIII" = 18,
1148 1d833b97 Florent Chuffart
  "XIX" = 19,
1149 1d833b97 Florent Chuffart
  "XX" = 20
1150 1d833b97 Florent Chuffart
)}
1151 1d833b97 Florent Chuffart
1152 1d833b97 Florent Chuffart
switch_pairlist = structure(function(# Switch a pairlist
1153 1d833b97 Florent Chuffart
### Take a pairlist key:value and return the switched pairlist value:key.
1154 55c1cdff Florent Chuffart
l ##<< The pairlist to switch.
1155 1d833b97 Florent Chuffart
) {
1156 1d833b97 Florent Chuffart
	ret = list()
1157 1d833b97 Florent Chuffart
	for (name in names(l)) {
1158 1d833b97 Florent Chuffart
		ret[[as.character(l[[name]])]] = name
1159 1d833b97 Florent Chuffart
	}
1160 1d833b97 Florent Chuffart
	ret
1161 1d833b97 Florent Chuffart
### The switched pairlist.
1162 1d833b97 Florent Chuffart
}, ex=function(){
1163 1d833b97 Florent Chuffart
	l = list(key1 = "value1", key2 = "value2")
1164 1d833b97 Florent Chuffart
	print(switch_pairlist(l))
1165 1d833b97 Florent Chuffart
})
1166 1d833b97 Florent Chuffart
1167 1d833b97 Florent Chuffart
ARAB2ROM = function(# Arabic to Roman pair list.
1168 1d833b97 Florent Chuffart
### Util to convert Arabicto Roman
1169 1d833b97 Florent Chuffart
){switch_pairlist(ROM2ARAB())}
1170 1d833b97 Florent Chuffart
1171 1d833b97 Florent Chuffart
1172 1d833b97 Florent Chuffart
1173 1d833b97 Florent Chuffart
1174 5bfac5a3 Florent Chuffart
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1175 5bfac5a3 Florent Chuffart
### This fonction allow to acces to a specific part of the c2c file.
1176 5bfac5a3 Florent Chuffart
strain1, ##<< the key strain
1177 5bfac5a3 Florent Chuffart
strain2, ##<< the target strain
1178 5bfac5a3 Florent Chuffart
chr=NULL, ##<< if defined, the c2c will filtered according to the chromosome value
1179 5bfac5a3 Florent Chuffart
lower_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome upper than lower_bound
1180 5bfac5a3 Florent Chuffart
upper_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome lower than upper_bound
1181 5bfac5a3 Florent Chuffart
config=NULL##<<  GLOBAL config variable
1182 1d833b97 Florent Chuffart
) {
1183 1d833b97 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1184 1d833b97 Florent Chuffart
	# Launch c2c file
1185 1d833b97 Florent Chuffart
	if (reverse) {
1186 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]
1187 1d833b97 Florent Chuffart
	} else {
1188 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]
1189 1d833b97 Florent Chuffart
	}
1190 5bfac5a3 Florent Chuffart
	c2c = get_content(c2c_filename, "table", stringsAsFactors=FALSE)
1191 5bfac5a3 Florent Chuffart
  # Filtering unagapped
1192 1d833b97 Florent Chuffart
  c2c = c2c[c2c$V6=="-",]
1193 1d833b97 Florent Chuffart
	# Reverse
1194 1d833b97 Florent Chuffart
	if (reverse) {
1195 1d833b97 Florent Chuffart
		tmp_col = c2c$V1
1196 1d833b97 Florent Chuffart
		c2c$V1 = c2c$V7
1197 1d833b97 Florent Chuffart
		c2c$V7 = tmp_col
1198 1d833b97 Florent Chuffart
		tmp_col = c2c$V2
1199 1d833b97 Florent Chuffart
		c2c$V2 = c2c$V9
1200 1d833b97 Florent Chuffart
		c2c$V9 = tmp_col
1201 1d833b97 Florent Chuffart
		tmp_col = c2c$V3
1202 1d833b97 Florent Chuffart
		c2c$V3 = c2c$V10
1203 1d833b97 Florent Chuffart
		c2c$V10 = tmp_col
1204 1d833b97 Florent Chuffart
	}
1205 5bfac5a3 Florent Chuffart
  if (!is.null(chr)) {
1206 5bfac5a3 Florent Chuffart
  	if (strain1 == "BY") {
1207 5bfac5a3 Florent Chuffart
  		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1208 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "RM") {
1209 5bfac5a3 Florent Chuffart
  	  chro_1 = paste("supercontig_1.",chr,sep="")
1210 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "YJM") {
1211 5bfac5a3 Florent Chuffart
  	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1212 5bfac5a3 Florent Chuffart
  	}
1213 5bfac5a3 Florent Chuffart
  	c2c = c2c[c2c$V1 == chro_1,]
1214 5bfac5a3 Florent Chuffart
    if (!is.null(lower_bound)) {
1215 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound}
1216 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound}      
1217 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1218 5bfac5a3 Florent Chuffart
    }
1219 5bfac5a3 Florent Chuffart
    if (!is.null(upper_bound)) {
1220 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound}
1221 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound}      
1222 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1223 5bfac5a3 Florent Chuffart
    }
1224 5bfac5a3 Florent Chuffart
  }
1225 5bfac5a3 Florent Chuffart
  return(c2c)
1226 5bfac5a3 Florent Chuffart
# It retruns the appropriate c2c file part.
1227 5bfac5a3 Florent Chuffart
}
1228 5bfac5a3 Florent Chuffart
1229 5bfac5a3 Florent Chuffart
translate_roi = structure(function(# Translate coords of a genome region.
1230 5bfac5a3 Florent Chuffart
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1231 5bfac5a3 Florent Chuffart
roi, ##<< Original genome region of interest.
1232 5bfac5a3 Florent Chuffart
strain2, ##<< The strain in wich you want the genome region of interest.
1233 5bfac5a3 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
1234 5bfac5a3 Florent Chuffart
big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1235 5bfac5a3 Florent Chuffart
) {
1236 5bfac5a3 Florent Chuffart
	strain1 = roi$strain_ref
1237 5bfac5a3 Florent Chuffart
	if (strain1 == strain2) {
1238 5bfac5a3 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1239 5bfac5a3 Florent Chuffart
		return(roi)
1240 5bfac5a3 Florent Chuffart
	}
1241 5bfac5a3 Florent Chuffart
1242 5bfac5a3 Florent Chuffart
	# Extract c2c file
1243 1d833b97 Florent Chuffart
	if (!is.null(big_roi)) {
1244 5bfac5a3 Florent Chuffart
  	# Dealing with big_roi
1245 62d69d53 Florent Chuffart
		if (roi$strain_ref != big_roi$strain_ref) {
1246 62d69d53 Florent Chuffart
      big_roi = translate_roi(big_roi, roi$strain_ref, config=config)
1247 62d69d53 Florent Chuffart
    }
1248 62d69d53 Florent Chuffart
    if (big_roi$end < big_roi$begin) {
1249 62d69d53 Florent Chuffart
      tmp_var = big_roi$begin
1250 55c1cdff Florent Chuffart
      big_roi$begin = big_roi$end
1251 55c1cdff Florent Chuffart
      big_roi$end = tmp_var
1252 5bfac5a3 Florent Chuffart
      big_roi$length = big_roi$end - big_roi$begin + 1
1253 55c1cdff Florent Chuffart
    }
1254 ded736c8 Florent Chuffart
    if (big_roi$chr!=roi$chr | roi$end > big_roi$end | roi$end < big_roi$begin | roi$begin > big_roi$end | roi$begin < big_roi$begin) {
1255 ded736c8 Florent Chuffart
      print("WARNING! Trying to translate a roi not included in a big_roi.")
1256 ded736c8 Florent Chuffart
      return(NULL)
1257 ded736c8 Florent Chuffart
    }
1258 5bfac5a3 Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, big_roi$chr, big_roi$begin, big_roi$end, config=config)
1259 5bfac5a3 Florent Chuffart
  } else {
1260 5bfac5a3 Florent Chuffart
    # No big_roi
1261 5bfac5a3 Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, roi$chr, config=config)    
1262 5bfac5a3 Florent Chuffart
  }
1263 5bfac5a3 Florent Chuffart
  
1264 1d833b97 Florent Chuffart
  #	Convert initial roi$chr into c2c format
1265 5bfac5a3 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1266 1d833b97 Florent Chuffart
	begin_1 = roi$begin
1267 1d833b97 Florent Chuffart
  end_1 = roi$end
1268 5bfac5a3 Florent Chuffart
  if (reverse) {
1269 5bfac5a3 Florent Chuffart
  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1270 5bfac5a3 Florent Chuffart
    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1271 1d833b97 Florent Chuffart
	} else {
1272 5bfac5a3 Florent Chuffart
		tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1273 5bfac5a3 Florent Chuffart
	  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1274 1d833b97 Florent Chuffart
	}
1275 5bfac5a3 Florent Chuffart
1276 5bfac5a3 Florent Chuffart
	# Never happend conditions ...
1277 1d833b97 Florent Chuffart
	{
1278 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) == 0) {
1279 1d833b97 Florent Chuffart
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1280 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1281 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2>=begin_1,]
1282 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V2)
1283 54cc8283 Florent Chuffart
      } else {
1284 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3>=begin_1,]
1285 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V3)
1286 54cc8283 Florent Chuffart
      }
1287 1d833b97 Florent Chuffart
			if (reverse) {
1288 5bfac5a3 Florent Chuffart
		  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1289 1d833b97 Florent Chuffart
			} else {
1290 5bfac5a3 Florent Chuffart
				tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1291 1d833b97 Florent Chuffart
			}
1292 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) == 0) {
1293 1d833b97 Florent Chuffart
				if (!is.null(big_roi)) {
1294 1d833b97 Florent Chuffart
					return(NULL)
1295 5bfac5a3 Florent Chuffart
					tmptransfostart = c2c[c2c$V3>=big_roi$begin & c2c$V2<=big_roi$begin,]
1296 55c1cdff Florent Chuffart
				} else {
1297 1d833b97 Florent Chuffart
					print(tmptransfostart)
1298 1d833b97 Florent Chuffart
					print(tmptransfostop)
1299 5bfac5a3 Florent Chuffart
					stop("Never happend condition 1.")
1300 1d833b97 Florent Chuffart
				}
1301 1d833b97 Florent Chuffart
			}
1302 55c1cdff Florent Chuffart
		}
1303 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) == 0) {
1304 1d833b97 Florent Chuffart
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1305 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1306 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3<=end_1,]
1307 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V3)
1308 54cc8283 Florent Chuffart
      } else {
1309 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2<=end_1,]
1310 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V2)
1311 54cc8283 Florent Chuffart
      }
1312 1d833b97 Florent Chuffart
			if (reverse) {
1313 5bfac5a3 Florent Chuffart
		    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1314 1d833b97 Florent Chuffart
			} else {
1315 5bfac5a3 Florent Chuffart
			  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1316 1d833b97 Florent Chuffart
			}
1317 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) == 0) {
1318 1d833b97 Florent Chuffart
				if (!is.null(big_roi)) {
1319 1d833b97 Florent Chuffart
					return(NULL)
1320 5bfac5a3 Florent Chuffart
				  tmptransfostop = c2c[c2c$V3>=big_roi$end & c2c$V2<=big_roi$end,]
1321 1d833b97 Florent Chuffart
				} else {
1322 1d833b97 Florent Chuffart
					print(tmptransfostart)
1323 1d833b97 Florent Chuffart
					print(tmptransfostop)
1324 1d833b97 Florent Chuffart
					stop("Never happend condition 2.")
1325 55c1cdff Florent Chuffart
				}
1326 55c1cdff Florent Chuffart
			}
1327 1d833b97 Florent Chuffart
		}
1328 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) != 1) {
1329 1d833b97 Florent Chuffart
			# print("many start")
1330 5bfac5a3 Florent Chuffart
			tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1331 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) != 1) {
1332 1d833b97 Florent Chuffart
				print(tmptransfostart)
1333 1d833b97 Florent Chuffart
				print(tmptransfostop)
1334 1d833b97 Florent Chuffart
  			stop("Never happend condition 3.")
1335 1d833b97 Florent Chuffart
			}
1336 1d833b97 Florent Chuffart
		}
1337 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) != 1) {
1338 1d833b97 Florent Chuffart
			# print("many stop")
1339 5bfac5a3 Florent Chuffart
		  tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1340 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) != 1) {
1341 1d833b97 Florent Chuffart
				print(tmptransfostart)
1342 1d833b97 Florent Chuffart
				print(tmptransfostop)
1343 1d833b97 Florent Chuffart
  			stop("Never happend condition 4.")
1344 1d833b97 Florent Chuffart
			}
1345 5bfac5a3 Florent Chuffart
		}
1346 1d833b97 Florent Chuffart
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1347 1d833b97 Florent Chuffart
			print(tmptransfostart)
1348 5bfac5a3 Florent Chuffart
			print(tmptransfostop)
1349 1d833b97 Florent Chuffart
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1350 1d833b97 Florent Chuffart
		}
1351 55c1cdff Florent Chuffart
	}
1352 5bfac5a3 Florent Chuffart
  
1353 1d833b97 Florent Chuffart
  # Deal with strand
1354 1d833b97 Florent Chuffart
  if (tmptransfostart$V8 == 1) {
1355 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1356 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1357 1d833b97 Florent Chuffart
  } else {
1358 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1359 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1360 1d833b97 Florent Chuffart
  }
1361 1d833b97 Florent Chuffart
	# Build returned roi
1362 1d833b97 Florent Chuffart
	roi$strain_ref = strain2
1363 1d833b97 Florent Chuffart
	if (roi$strain_ref == "BY") {
1364 1d833b97 Florent Chuffart
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1365 1d833b97 Florent Chuffart
	} else {
1366 1d833b97 Florent Chuffart
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1367 1d833b97 Florent Chuffart
	}
1368 1d833b97 Florent Chuffart
  roi$begin = begin_2
1369 1d833b97 Florent Chuffart
  roi$end = end_2
1370 1d833b97 Florent Chuffart
	if (sign(roi$end - roi$begin) == 0) {
1371 5bfac5a3 Florent Chuffart
		roi$length = 1
1372 5bfac5a3 Florent Chuffart
	} else {
1373 5bfac5a3 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1374 1d833b97 Florent Chuffart
	}
1375 55c1cdff Florent Chuffart
  return(roi)
1376 1d833b97 Florent Chuffart
}, ex=function(){
1377 1d833b97 Florent Chuffart
	# Define new translate_roi function...
1378 1d833b97 Florent Chuffart
	translate_roi = function(roi, strain2, config) {
1379 1d833b97 Florent Chuffart
		strain1 = roi$strain_ref
1380 1d833b97 Florent Chuffart
		if (strain1 == strain2) {
1381 1d833b97 Florent Chuffart
			return(roi)
1382 1d833b97 Florent Chuffart
		} else {
1383 5bfac5a3 Florent Chuffart
		  stop("Here is my new translate_roi function...")
1384 5bfac5a3 Florent Chuffart
		}
1385 1d833b97 Florent Chuffart
	}
1386 1d833b97 Florent Chuffart
	# Binding it by uncomment follwing lines.
1387 1d833b97 Florent Chuffart
	# unlockBinding("translate_roi", as.environment("package:nm"))
1388 1d833b97 Florent Chuffart
	# unlockBinding("translate_roi", getNamespace("nm"))
1389 1d833b97 Florent Chuffart
	# assign("translate_roi", translate_roi, "package:nm")
1390 1d833b97 Florent Chuffart
	# assign("translate_roi", translate_roi, getNamespace("nm"))
1391 1d833b97 Florent Chuffart
	# lockBinding("translate_roi", getNamespace("nm"))
1392 5bfac5a3 Florent Chuffart
	# lockBinding("translate_roi", as.environment("package:nm"))
1393 1d833b97 Florent Chuffart
})
1394 1d833b97 Florent Chuffart
1395 1d833b97 Florent Chuffart
1396 5bfac5a3 Florent Chuffart
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1397 5bfac5a3 Florent Chuffart
### CURs are regions that can be aligned between the genomes
1398 5bfac5a3 Florent Chuffart
diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
1399 5bfac5a3 Florent Chuffart
min_cur_width = 4000, ##<< The minimum width of a CUR
1400 5bfac5a3 Florent Chuffart
config = NULL ##<< GLOBAL config variable
1401 5bfac5a3 Florent Chuffart
) {
1402 1d833b97 Florent Chuffart
1403 5bfac5a3 Florent Chuffart
  check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
1404 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1405 5bfac5a3 Florent Chuffart
    check_homogeneity(c2c)
1406 5bfac5a3 Florent Chuffart
  	if (length(c2c[,1]) == 0 ) {
1407 5bfac5a3 Florent Chuffart
      stop("WARNING! checking overlapping for a region corresponding to an empty c2c.")
1408 5bfac5a3 Florent Chuffart
    } else {
1409 5bfac5a3 Florent Chuffart
  		lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)})
1410 5bfac5a3 Florent Chuffart
  		upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)})
1411 5bfac5a3 Florent Chuffart
  		tmp_index = order(lower_bounds)
1412 5bfac5a3 Florent Chuffart
      lower_bounds = lower_bounds[tmp_index]
1413 5bfac5a3 Florent Chuffart
      upper_bounds = upper_bounds[tmp_index]
1414 5bfac5a3 Florent Chuffart
      tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)]
1415 5bfac5a3 Florent Chuffart
      ov_index = which(tmp_diff < 0)
1416 5bfac5a3 Florent Chuffart
      if(length(ov_index < 0) !=0 ) {
1417 5bfac5a3 Florent Chuffart
        ov_index = ov_index[1]
1418 5bfac5a3 Florent Chuffart
        print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep=""))
1419 5bfac5a3 Florent Chuffart
        c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),]
1420 5bfac5a3 Florent Chuffart
        print(c2c_corrupted)
1421 5bfac5a3 Florent Chuffart
        return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1))
1422 5bfac5a3 Florent Chuffart
      }
1423 5bfac5a3 Florent Chuffart
      return(NULL)
1424 5bfac5a3 Florent Chuffart
    }
1425 5bfac5a3 Florent Chuffart
  }
1426 1d833b97 Florent Chuffart
1427 5bfac5a3 Florent Chuffart
  check_homogeneity = function(sub_c2c) {
1428 5bfac5a3 Florent Chuffart
    tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3)
1429 5bfac5a3 Florent Chuffart
    tmp_signs = tmp_signs[tmp_signs != 0]
1430 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs[1]  != tmp_signs)) {
1431 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign)! ********************"))
1432 5bfac5a3 Florent Chuffart
      print(tmp_signs)
1433 5bfac5a3 Florent Chuffart
  	}
1434 5bfac5a3 Florent Chuffart
    tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10)
1435 5bfac5a3 Florent Chuffart
    tmp_signs2 = tmp_signs2[tmp_signs2 != 0]
1436 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs2[1]  != tmp_signs2)) {
1437 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign2)! ********************"))
1438 5bfac5a3 Florent Chuffart
      print(tmp_signs2)
1439 5bfac5a3 Florent Chuffart
  	}
1440 5bfac5a3 Florent Chuffart
  	if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1441 5bfac5a3 Florent Chuffart
  		print("*************** ERROR, non homogenous region chrs or V8! ********************")
1442 5bfac5a3 Florent Chuffart
  	}
1443 5bfac5a3 Florent Chuffart
  }
1444 1d833b97 Florent Chuffart
1445 5bfac5a3 Florent Chuffart
  test_and_squeeze_rois = function(foo, config=NULL) {
1446 5bfac5a3 Florent Chuffart
    is_it_ok = function(list1, list2) {
1447 5bfac5a3 Florent Chuffart
      bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length))
1448 5bfac5a3 Florent Chuffart
      ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0
1449 5bfac5a3 Florent Chuffart
      if (!ok) {
1450 5bfac5a3 Florent Chuffart
        print(bar[bar[,3] != 0 | bar[,6] != 0, ])
1451 5bfac5a3 Florent Chuffart
      }
1452 5bfac5a3 Florent Chuffart
      return (ok)
1453 5bfac5a3 Florent Chuffart
    }
1454 5bfac5a3 Florent Chuffart
    squeeze_rois = function(list1, list2) {
1455 5bfac5a3 Florent Chuffart
      rois = apply(t(1:nrow(list1)), 2, function(i){
1456 5bfac5a3 Florent Chuffart
        roi = list1[i,]
1457 5bfac5a3 Florent Chuffart
        roi2 = list2[i,]
1458 5bfac5a3 Florent Chuffart
        roi$begin = max(roi$begin, roi2$begin)
1459 5bfac5a3 Florent Chuffart
        roi$end = min(roi$end, roi2$end)
1460 5bfac5a3 Florent Chuffart
        roi$length =  roi$end - roi$begin + 1
1461 5bfac5a3 Florent Chuffart
        return(roi)
1462 5bfac5a3 Florent Chuffart
      })
1463 5bfac5a3 Florent Chuffart
      return(do.call(rbind, rois))
1464 5bfac5a3 Florent Chuffart
    }
1465 5bfac5a3 Florent Chuffart
    # foo_orig = compute_inter_all_strain_curs2(config=config)
1466 5bfac5a3 Florent Chuffart
    # foo = foo_orig
1467 5bfac5a3 Florent Chuffart
    STOP = FALSE
1468 5bfac5a3 Florent Chuffart
    nb_round = 0
1469 5bfac5a3 Florent Chuffart
    while(!STOP) {
1470 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1471 5bfac5a3 Florent Chuffart
      print(paste("2-2 round #", nb_round, sep=""))
1472 5bfac5a3 Florent Chuffart
      fooby = translate_rois(foo, "BY", config=config)
1473 5bfac5a3 Florent Chuffart
      fooyjm = translate_rois(foo, "YJM", config=config)
1474 5bfac5a3 Florent Chuffart
      fooyjmby = translate_rois(fooyjm, "BY", config=config)
1475 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, fooyjmby)) {
1476 5bfac5a3 Florent Chuffart
        print("case 1")
1477 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, fooyjmby)
1478 5bfac5a3 Florent Chuffart
    		next
1479 5bfac5a3 Florent Chuffart
      }
1480 5bfac5a3 Florent Chuffart
      foorm = translate_rois(foo, "RM", config=config)
1481 5bfac5a3 Florent Chuffart
      foormby = translate_rois(foorm, "BY", config=config)
1482 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foormby)) {
1483 5bfac5a3 Florent Chuffart
        print("case 2")
1484 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foormby)
1485 5bfac5a3 Florent Chuffart
    		next
1486 5bfac5a3 Florent Chuffart
      }
1487 5bfac5a3 Florent Chuffart
      fooyjmrm = translate_rois(fooyjm, "RM", config=config)
1488 5bfac5a3 Florent Chuffart
      fooyjmrmyjm = translate_rois(fooyjmrm, "YJM", config=config)
1489 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooyjm, fooyjmrmyjm)) {
1490 5bfac5a3 Florent Chuffart
        print("case 3")
1491 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooyjm, fooyjmrmyjm)
1492 5bfac5a3 Florent Chuffart
        next
1493 5bfac5a3 Florent Chuffart
      }
1494 5bfac5a3 Florent Chuffart
      foormyjm = translate_rois(foorm, "YJM", config=config)
1495 5bfac5a3 Florent Chuffart
      foormyjmrm = translate_rois(foormyjm, "RM", config=config)
1496 5bfac5a3 Florent Chuffart
      if (!is_it_ok(foorm, foormyjmrm)) {
1497 5bfac5a3 Florent Chuffart
        print("case 4")
1498 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(foorm, foormyjmrm)
1499 5bfac5a3 Florent Chuffart
        next
1500 5bfac5a3 Florent Chuffart
      }
1501 5bfac5a3 Florent Chuffart
      foo = translate_rois(foo, "BY", config=config)
1502 5bfac5a3 Florent Chuffart
      STOP = TRUE
1503 5bfac5a3 Florent Chuffart
    }
1504 5bfac5a3 Florent Chuffart
    STOP = FALSE
1505 5bfac5a3 Florent Chuffart
    nb_round = 0
1506 5bfac5a3 Florent Chuffart
    while(!STOP) {
1507 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1508 5bfac5a3 Florent Chuffart
      print(paste("3-3 round #", nb_round, sep=""))
1509 5bfac5a3 Florent Chuffart
      fooby = translate_rois(foo, "BY", config=config)
1510 5bfac5a3 Florent Chuffart
      foobyrm = translate_rois(fooby, "RM", config=config)
1511 5bfac5a3 Florent Chuffart
      foobyrmyjm = translate_rois(foobyrm, "YJM", config=config)
1512 5bfac5a3 Florent Chuffart
      foobyrmyjmby = translate_rois(foobyrmyjm, "BY", config=config)
1513 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyrmyjmby)) {
1514 5bfac5a3 Florent Chuffart
        print("case 1")
1515 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyrmyjmby)
1516 5bfac5a3 Florent Chuffart
      }
1517 5bfac5a3 Florent Chuffart
      fooby = translate_rois(foo, "BY", config=config)
1518 5bfac5a3 Florent Chuffart
      foobyyjm = translate_rois(fooby, "YJM", config=config)
1519 5bfac5a3 Florent Chuffart
      foobyyjmrm = translate_rois(foobyyjm, "RM", config=config)
1520 5bfac5a3 Florent Chuffart
      foobyyjmrmby = translate_rois(foobyyjmrm, "BY", config=config)
1521 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyyjmrmby)) {
1522 5bfac5a3 Florent Chuffart
        print("case 2")
1523 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyyjmrmby)
1524 5bfac5a3 Florent Chuffart
        next
1525 5bfac5a3 Florent Chuffart
      }
1526 5bfac5a3 Florent Chuffart
      foo = translate_rois(foo, "BY", config=config)
1527 5bfac5a3 Florent Chuffart
      STOP = TRUE
1528 5bfac5a3 Florent Chuffart
    }
1529 5bfac5a3 Florent Chuffart
    print("end")
1530 5bfac5a3 Florent Chuffart
    return(foo)
1531 5bfac5a3 Florent Chuffart
  }
1532 1d833b97 Florent Chuffart
1533 5bfac5a3 Florent Chuffart
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) {
1534 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, config=config)
1535 5bfac5a3 Florent Chuffart
    # computing diffs
1536 55c1cdff Florent Chuffart
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
1537 55c1cdff Florent Chuffart
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
1538 1d833b97 Florent Chuffart
    # Filtering
1539 1d833b97 Florent Chuffart
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1540 1d833b97 Florent Chuffart
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1541 5bfac5a3 Florent Chuffart
    rois = apply(t(1:length(indexes_start)), 2, function(i) {
1542 5bfac5a3 Florent Chuffart
      if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start)))
1543 5bfac5a3 Florent Chuffart
      returned_rois = NULL
1544 1d833b97 Florent Chuffart
  		start = indexes_start[i]
1545 1d833b97 Florent Chuffart
  		stop = indexes_stop[i]
1546 1d833b97 Florent Chuffart
  		sub_c2c = c2c[start:stop,]
1547 5bfac5a3 Florent Chuffart
      check_homogeneity(sub_c2c)
1548 1d833b97 Florent Chuffart
  		if (strain1 == "BY") {
1549 1d833b97 Florent Chuffart
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1550 5bfac5a3 Florent Chuffart
  		} else {
1551 1d833b97 Florent Chuffart
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1552 1d833b97 Florent Chuffart
  		}
1553 5bfac5a3 Florent Chuffart
  		roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1)
1554 5bfac5a3 Florent Chuffart
  		roi$length = roi$end - roi$begin + 1
1555 1d833b97 Florent Chuffart
  		if (roi$length >= min_cur_width) {
1556 5bfac5a3 Florent Chuffart
  			lower_bound = roi$begin
1557 5bfac5a3 Florent Chuffart
  			upper_bound = roi$end
1558 5bfac5a3 Florent Chuffart
        check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1559 5bfac5a3 Florent Chuffart
        while(!is.null(check)) {
1560 5bfac5a3 Florent Chuffart
          # print(check)
1561 5bfac5a3 Florent Chuffart
      		roi1 = roi
1562 5bfac5a3 Florent Chuffart
          roi1$end = check[[1]]
1563 5bfac5a3 Florent Chuffart
      		roi1$length = roi1$end - roi1$begin + 1
1564 5bfac5a3 Florent Chuffart
      		if (roi1$length >= min_cur_width) {
1565 5bfac5a3 Florent Chuffart
            returned_rois = dfadd(returned_rois,roi1)
1566 5bfac5a3 Florent Chuffart
          }
1567 5bfac5a3 Florent Chuffart
          roi$begin = check[[2]]
1568 5bfac5a3 Florent Chuffart
      		roi$length = roi$end - roi$begin + 1
1569 5bfac5a3 Florent Chuffart
      		if (roi$length >= min_cur_width) {
1570 5bfac5a3 Florent Chuffart
      			lower_bound = min(roi$begin, roi$end)
1571 5bfac5a3 Florent Chuffart
      			upper_bound = max(roi$begin, roi$end)
1572 5bfac5a3 Florent Chuffart
            check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1573 5bfac5a3 Florent Chuffart
          } else {
1574 5bfac5a3 Florent Chuffart
            check = NULL
1575 5bfac5a3 Florent Chuffart
            roi = NULL
1576 5bfac5a3 Florent Chuffart
          }
1577 5bfac5a3 Florent Chuffart
        }
1578 5bfac5a3 Florent Chuffart
        returned_rois = dfadd(returned_rois,roi)
1579 1d833b97 Florent Chuffart
  	  }
1580 5bfac5a3 Florent Chuffart
    })
1581 5bfac5a3 Florent Chuffart
    rois = do.call(rbind,rois)
1582 5bfac5a3 Florent Chuffart
    rois = rois[order(as.numeric(rois$chr), rois$begin), ]
1583 1d833b97 Florent Chuffart
  	return(rois)
1584 1d833b97 Florent Chuffart
  }
1585 1d833b97 Florent Chuffart
1586 5bfac5a3 Florent Chuffart
  translate_rois = function(rois, target_strain, config) {
1587 5bfac5a3 Florent Chuffart
    tr_rois = apply(t(1:nrow(rois)), 2, function(i){
1588 5bfac5a3 Florent Chuffart
      roi = rois[i,]
1589 5bfac5a3 Florent Chuffart
      tr_roi = translate_roi(roi, target_strain, config=config)  
1590 5bfac5a3 Florent Chuffart
      tmp_begin = min(tr_roi$begin, tr_roi$end)
1591 5bfac5a3 Florent Chuffart
      tmp_end = max(tr_roi$begin, tr_roi$end)
1592 5bfac5a3 Florent Chuffart
      tr_roi$begin = tmp_begin
1593 5bfac5a3 Florent Chuffart
      tr_roi$end = tmp_end
1594 5bfac5a3 Florent Chuffart
      tr_roi$length =  tr_roi$end - tr_roi$begin + 1
1595 5bfac5a3 Florent Chuffart
      return(tr_roi)
1596 5bfac5a3 Florent Chuffart
    })
1597 5bfac5a3 Florent Chuffart
    tr_rois = do.call(rbind, tr_rois)
1598 5bfac5a3 Florent Chuffart
    return(tr_rois)
1599 5bfac5a3 Florent Chuffart
  }
1600 729c934e Florent Chuffart
1601 5bfac5a3 Florent Chuffart
  combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"))
1602 5bfac5a3 Florent Chuffart
  rois = list()
1603 5bfac5a3 Florent Chuffart
  for (combi in combis) {
1604 5bfac5a3 Florent Chuffart
    strain1 = combi[1]
1605 5bfac5a3 Florent Chuffart
    strain2 = combi[2]
1606 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1607 5bfac5a3 Florent Chuffart
    rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1608 5bfac5a3 Florent Chuffart
    strain1 = combi[2]
1609 5bfac5a3 Florent Chuffart
    strain2 = combi[1]
1610 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1611 5bfac5a3 Florent Chuffart
    rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1612 5bfac5a3 Florent Chuffart
    tr_rois_rev = translate_rois(rois_rev, combi[1], config)      
1613 5bfac5a3 Florent Chuffart
    region1 = rois_fwd
1614 5bfac5a3 Florent Chuffart
    region2 = tr_rois_rev
1615 5bfac5a3 Florent Chuffart
    rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev)
1616 5bfac5a3 Florent Chuffart
  }
1617 5bfac5a3 Florent Chuffart
  reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
1618 5bfac5a3 Florent Chuffart
  reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
1619 5bfac5a3 Florent Chuffart
  tr_reducted_1_rois = translate_rois(reducted_1_rois, "RM", config)  
1620 5bfac5a3 Florent Chuffart
  reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]])
1621 5bfac5a3 Florent Chuffart
  reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ]
1622 5bfac5a3 Florent Chuffart
  reducted_rois = translate_rois(reducted_2_rois, "BY", config)  
1623 5bfac5a3 Florent Chuffart
  reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
1624 5bfac5a3 Florent Chuffart
  squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
1625 5bfac5a3 Florent Chuffart
  return (squeezed_rois)
1626 1d833b97 Florent Chuffart
}
1627 1d833b97 Florent Chuffart
1628 5bfac5a3 Florent Chuffart
intersect_region = function(# Returns the intersection of 2 list on regions.
1629 5bfac5a3 Florent Chuffart
### This function...
1630 5bfac5a3 Florent Chuffart
region1, ##<< Original regions.
1631 5bfac5a3 Florent Chuffart
region2 ##<< Regions to intersect.
1632 5bfac5a3 Florent Chuffart
) {
1633 5bfac5a3 Florent Chuffart
  intersection = apply(t(1:nrow(region1)), 2, function(i) {
1634 5bfac5a3 Florent Chuffart
    roi1 = region1[i, ]
1635 5bfac5a3 Florent Chuffart
    sub_regions2 = region2[region2$chr == roi1$chr, ]
1636 5bfac5a3 Florent Chuffart
    sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end |
1637 5bfac5a3 Florent Chuffart
                                roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end |
1638 5bfac5a3 Florent Chuffart
                                sub_regions2$begin < roi1$begin  & roi1$end < sub_regions2$end
1639 5bfac5a3 Florent Chuffart
                                , ]
1640 5bfac5a3 Florent Chuffart
    if (nrow(sub_regions2) == 0) {
1641 5bfac5a3 Florent Chuffart
      print("removing a region")
1642 5bfac5a3 Florent Chuffart
      return(NULL)
1643 5bfac5a3 Florent Chuffart
    } else if (nrow(sub_regions2) > 1) {
1644 5bfac5a3 Florent Chuffart
      print("more than one region in intersect_region")          
1645 5bfac5a3 Florent Chuffart
      return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])})))
1646 5bfac5a3 Florent Chuffart
    } else {
1647 5bfac5a3 Florent Chuffart
      roi2 = sub_regions2[1,]
1648 5bfac5a3 Florent Chuffart
      if (roi1$begin < roi2$begin) {
1649 5bfac5a3 Florent Chuffart
        print("not the same begin")
1650 5bfac5a3 Florent Chuffart
        roi1$begin = roi2$begin
1651 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1652 5bfac5a3 Florent Chuffart
      }
1653 5bfac5a3 Florent Chuffart
      if (roi1$end > roi2$end) {
1654 5bfac5a3 Florent Chuffart
        print("not the same end")
1655 5bfac5a3 Florent Chuffart
        roi1$end = roi2$end
1656 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1657 5bfac5a3 Florent Chuffart
      }
1658 5bfac5a3 Florent Chuffart
      return(roi1)
1659 5bfac5a3 Florent Chuffart
    }         
1660 5bfac5a3 Florent Chuffart
  })
1661 5bfac5a3 Florent Chuffart
  return(do.call(rbind,intersection))
1662 5bfac5a3 Florent Chuffart
}
1663 1d833b97 Florent Chuffart
1664 55c1cdff Florent Chuffart
build_replicates = structure(function(# Stage replicates data
1665 61914235 Florent Chuffart
### This function loads in memory data corresponding to the given experiments.
1666 61914235 Florent Chuffart
expe, ##<< a list of vector corresponding to vector of replicates.
1667 61914235 Florent Chuffart
roi, ##<< the region that we are interested in.
1668 61914235 Florent Chuffart
only_fetch=FALSE, ##<< filter or not inputs.
1669 61914235 Florent Chuffart
get_genome=FALSE,##<< Load or not corresponding genome.
1670 61914235 Florent Chuffart
all_samples, ##<< Global list of samples.
1671 61914235 Florent Chuffart
config=NULL ##<< GLOBAL config variable.
1672 61914235 Florent Chuffart
) {
1673 61914235 Florent Chuffart
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1674 61914235 Florent Chuffart
  	samples=list()
1675 61914235 Florent Chuffart
  	for (i in samples_ids) {
1676 61914235 Florent Chuffart
  		sample = as.list(all_samples[all_samples$id==i,])
1677 a0b91fee Florent Chuffart
      sample$orig_roi = roi
1678 8e9facd8 Florent Chuffart
      sample$roi = translate_roi(roi, sample$strain, config = config)
1679 61914235 Florent Chuffart
  		if (get_genome) {
1680 61914235 Florent Chuffart
  			# Get Genome
1681 61914235 Florent Chuffart
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1682 5bfac5a3 Florent Chuffart
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1683 61914235 Florent Chuffart
  		}
1684 61914235 Florent Chuffart
  		# Get inputs
1685 61914235 Florent Chuffart
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1686 55c1cdff Florent Chuffart
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE)
1687 5bfac5a3 Florent Chuffart
  		sample$total_reads = sum(sample$inputs[,4])
1688 61914235 Florent Chuffart
  		if (!only_fetch) {
1689 b8a95426 Florent Chuffart
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
1690 61914235 Florent Chuffart
  	  }
1691 61914235 Florent Chuffart
  	  # Get TF outputs for Mnase_Seq samples
1692 5bfac5a3 Florent Chuffart
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {
1693 61914235 Florent Chuffart
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1694 61914235 Florent Chuffart
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1695 61914235 Florent Chuffart
  			if (!only_fetch) {
1696 61914235 Florent Chuffart
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1697 61914235 Florent Chuffart
    		}
1698 61914235 Florent Chuffart
  		}
1699 5bfac5a3 Florent Chuffart
  		samples[[length(samples) + 1]] = sample
1700 61914235 Florent Chuffart
  	}
1701 61914235 Florent Chuffart
  	return(samples)
1702 61914235 Florent Chuffart
  }
1703 61914235 Florent Chuffart
	replicates = list()
1704 61914235 Florent Chuffart
	for(samples_ids in expe) {
1705 61914235 Florent Chuffart
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1706 61914235 Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
1707 61914235 Florent Chuffart
	}
1708 61914235 Florent Chuffart
	return(replicates)
1709 61914235 Florent Chuffart
  }, ex = function() {
1710 61914235 Florent Chuffart
    # library(rjson)
1711 61914235 Florent Chuffart
    # library(nucleominer)
1712 55c1cdff Florent Chuffart
    #
1713 61914235 Florent Chuffart
    # # Read config file
1714 61914235 Florent Chuffart
    # json_conf_file = "nucleo_miner_config.json"
1715 61914235 Florent Chuffart
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1716 61914235 Florent Chuffart
    # # Read sample file
1717 55c1cdff Florent Chuffart
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
1718 61914235 Florent Chuffart
    # # here are the sample ids in a list
1719 61914235 Florent Chuffart
    # expes = list(c(1))
1720 61914235 Florent Chuffart
    # # here is the region that we wnt to see the coverage
1721 55c1cdff Florent Chuffart
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY")
1722 61914235 Florent Chuffart
    # # it displays the corverage
1723 61914235 Florent Chuffart
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1724 55c1cdff Florent Chuffart
    # out = watch_samples(replicates, config$READ_LENGTH,
1725 55c1cdff Florent Chuffart
    #       plot_coverage = TRUE,
1726 55c1cdff Florent Chuffart
    #       plot_squared_reads = FALSE,
1727 55c1cdff Florent Chuffart
    #       plot_ref_genome = FALSE,
1728 55c1cdff Florent Chuffart
    #       plot_arrow_raw_reads = FALSE,
1729 55c1cdff Florent Chuffart
    #       plot_arrow_nuc_reads = FALSE,
1730 55c1cdff Florent Chuffart
    #       plot_gaussian_reads = FALSE,
1731 55c1cdff Florent Chuffart
    #       plot_gaussian_unified_reads = FALSE,
1732 55c1cdff Florent Chuffart
    #       plot_ellipse_nucs = FALSE,
1733 55c1cdff Florent Chuffart
    #       plot_wp_nucs = FALSE,
1734 55c1cdff Florent Chuffart
    #       plot_wp_nuc_model = FALSE,
1735 55c1cdff Florent Chuffart
    #       plot_common_nucs = FALSE,
1736 61914235 Florent Chuffart
    #       height = 50)
1737 61914235 Florent Chuffart
  })
1738 61914235 Florent Chuffart
1739 61914235 Florent Chuffart
1740 61914235 Florent Chuffart
1741 61914235 Florent Chuffart
1742 61914235 Florent Chuffart
1743 61914235 Florent Chuffart
1744 61914235 Florent Chuffart
1745 61914235 Florent Chuffart
1746 61914235 Florent Chuffart
1747 61914235 Florent Chuffart
1748 61914235 Florent Chuffart
1749 61914235 Florent Chuffart
1750 61914235 Florent Chuffart
1751 61914235 Florent Chuffart
1752 61914235 Florent Chuffart
1753 61914235 Florent Chuffart
1754 61914235 Florent Chuffart
1755 61914235 Florent Chuffart
1756 61914235 Florent Chuffart
1757 61914235 Florent Chuffart
1758 61914235 Florent Chuffart
1759 61914235 Florent Chuffart
1760 61914235 Florent Chuffart
1761 61914235 Florent Chuffart
1762 61914235 Florent Chuffart
1763 61914235 Florent Chuffart
1764 61914235 Florent Chuffart
1765 61914235 Florent Chuffart
1766 61914235 Florent Chuffart
1767 61914235 Florent Chuffart
1768 61914235 Florent Chuffart
1769 61914235 Florent Chuffart
1770 61914235 Florent Chuffart
1771 1d833b97 Florent Chuffart
1772 1d833b97 Florent Chuffart
1773 1d833b97 Florent Chuffart
1774 1d833b97 Florent Chuffart
1775 1d833b97 Florent Chuffart
1776 1d833b97 Florent Chuffart
1777 1d833b97 Florent Chuffart
1778 1d833b97 Florent Chuffart
1779 1d833b97 Florent Chuffart
1780 1d833b97 Florent Chuffart
1781 1d833b97 Florent Chuffart
1782 1d833b97 Florent Chuffart
1783 1d833b97 Florent Chuffart
1784 1d833b97 Florent Chuffart
1785 1d833b97 Florent Chuffart
1786 1d833b97 Florent Chuffart
1787 1d833b97 Florent Chuffart
1788 1d833b97 Florent Chuffart
1789 1d833b97 Florent Chuffart
1790 1d833b97 Florent Chuffart
1791 1d833b97 Florent Chuffart
1792 1d833b97 Florent Chuffart
1793 1d833b97 Florent Chuffart
1794 1d833b97 Florent Chuffart
1795 1d833b97 Florent Chuffart
1796 1d833b97 Florent Chuffart
1797 1d833b97 Florent Chuffart
1798 1d833b97 Florent Chuffart
1799 1d833b97 Florent Chuffart
1800 1d833b97 Florent Chuffart
1801 1d833b97 Florent Chuffart
1802 1d833b97 Florent Chuffart
1803 1d833b97 Florent Chuffart
1804 1d833b97 Florent Chuffart
1805 6b5a528c Florent Chuffart
1806 6b5a528c Florent Chuffart
1807 6b5a528c Florent Chuffart
1808 6b5a528c Florent Chuffart
1809 6b5a528c Florent Chuffart
1810 6b5a528c Florent Chuffart
1811 6b5a528c Florent Chuffart
1812 6b5a528c Florent Chuffart
1813 6b5a528c Florent Chuffart
1814 6b5a528c Florent Chuffart
1815 6b5a528c Florent Chuffart
1816 6b5a528c Florent Chuffart
1817 6b5a528c Florent Chuffart
1818 6b5a528c Florent Chuffart
1819 6b5a528c Florent Chuffart
1820 6b5a528c Florent Chuffart
1821 55c1cdff Florent Chuffart
watch_samples = function(# Watching analysis of samples
1822 6b5a528c Florent Chuffart
### This function allows to view analysis for a particuler region of the genome.
1823 6b5a528c Florent Chuffart
replicates, ##<< replicates under the form...
1824 6b5a528c Florent Chuffart
read_length, ##<< length of the reads
1825 6b5a528c Florent Chuffart
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
1826 6b5a528c Florent Chuffart
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
1827 6b5a528c Florent Chuffart
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
1828 6b5a528c Florent Chuffart
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
1829 6b5a528c Florent Chuffart
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
1830 6b5a528c Florent Chuffart
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
1831 6b5a528c Florent Chuffart
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
1832 6b5a528c Florent Chuffart
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
1833 7646593d Florent Chuffart
change_col = TRUE, ##<< Change the color of each nucleosome.
1834 6b5a528c Florent Chuffart
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
1835 9fdbfada Florent Chuffart
plot_fuzzy_nucs = TRUE,  ##<< Plot (or not) cluster of fuzzy
1836 6b5a528c Florent Chuffart
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
1837 9fdbfada Florent Chuffart
plot_common_nucs = FALSE,  ##<< Plot (or not) aligned reads.
1838 9fdbfada Florent Chuffart
plot_common_unrs = FALSE,  ##<< Plot (or not) unaligned nucleosomal refgions (UNRs).
1839 6b5a528c Florent Chuffart
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
1840 729c934e Florent Chuffart
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
1841 55c1cdff Florent Chuffart
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
1842 6b5a528c Florent Chuffart
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
1843 1d833b97 Florent Chuffart
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1844 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1845 6b5a528c Florent Chuffart
){
1846 6b5a528c Florent Chuffart
  returned_list = list()
1847 6b5a528c Florent Chuffart
  # Computing global display parameters
1848 6b5a528c Florent Chuffart
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
1849 6b5a528c Florent Chuffart
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
1850 6b5a528c Florent Chuffart
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
1851 6b5a528c Florent Chuffart
  } else {
1852 6b5a528c Florent Chuffart
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
1853 6b5a528c Florent Chuffart
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
1854 6b5a528c Florent Chuffart
  }
1855 6b5a528c Florent Chuffart
	base_glo = 0
1856 6b5a528c Florent Chuffart
	nb_rank_glo = 0
1857 6b5a528c Florent Chuffart
  for (samples in replicates) {
1858 6b5a528c Florent Chuffart
  	nb_rank_glo = nb_rank_glo + length(samples)
1859 6b5a528c Florent Chuffart
  }
1860 6b5a528c Florent Chuffart
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
1861 6b5a528c Florent Chuffart
	y_min_glo = min(ylim_glo)
1862 6b5a528c Florent Chuffart
	y_max_glo = max(ylim_glo)
1863 6b5a528c Florent Chuffart
  delta_y_glo = y_max_glo - y_min_glo
1864 6b5a528c Florent Chuffart
  # Plot main frame
1865 935a568c Florent Chuffart
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab="#reads (per million reads)", xlab=paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr), main="NucleoMiner2" )
1866 6b5a528c Florent Chuffart
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
1867 6b5a528c Florent Chuffart
  # Go
1868 6b5a528c Florent Chuffart
	replicates_wp_nucs = list()
1869 9fdbfada Florent Chuffart
  wp_maps = list()
1870 9fdbfada Florent Chuffart
  fuzzy_maps = list()
1871 6b5a528c Florent Chuffart
  for (replicate_rank in 1:length(replicates)) {
1872 6b5a528c Florent Chuffart
		# Computing replicate parameters
1873 6b5a528c Florent Chuffart
		nb_rank = length(samples)
1874 6b5a528c Florent Chuffart
		base = (replicate_rank-1) * height * nb_rank
1875 6b5a528c Florent Chuffart
		ylim = c(base, base + height * nb_rank)
1876 6b5a528c Florent Chuffart
		y_min = min(ylim)
1877 6b5a528c Florent Chuffart
		y_max = max(ylim)
1878 6b5a528c Florent Chuffart
	  delta_y = y_max - y_min
1879 6b5a528c Florent Chuffart
		samples = replicates[[replicate_rank]]
1880 6b5a528c Florent Chuffart
		for (sample_rank in 1:length(samples)) {
1881 6b5a528c Florent Chuffart
			# computing sample parameters
1882 6b5a528c Florent Chuffart
			sample = samples[[sample_rank]]
1883 6b5a528c Florent Chuffart
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
1884 6b5a528c Florent Chuffart
			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
1885 6b5a528c Florent Chuffart
1886 6b5a528c Florent Chuffart
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
1887 6b5a528c Florent Chuffart
			  x_min = sample$roi[["begin"]]
1888 6b5a528c Florent Chuffart
			  x_max = sample$roi[["end"]]
1889 6b5a528c Florent Chuffart
		  } else {
1890 6b5a528c Florent Chuffart
			  x_min = - sample$roi[["begin"]]
1891 6b5a528c Florent Chuffart
			  x_max = - sample$roi[["end"]]
1892 5bfac5a3 Florent Chuffart
		  }
1893 6b5a528c Florent Chuffart
			shift = x_min_glo - x_min
1894 6b5a528c Florent Chuffart
	    # Plot Genome seq
1895 6b5a528c Florent Chuffart
			if (plot_ref_genome) {
1896 6b5a528c Florent Chuffart
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
1897 6b5a528c Florent Chuffart
		  }
1898 6b5a528c Florent Chuffart
			# Plot reads
1899 6b5a528c Florent Chuffart
			reads = sample$inputs
1900 5bfac5a3 Florent Chuffart
			signs = sign_from_strand(reads[,3])
1901 6b5a528c Florent Chuffart
			if (plot_arrow_raw_reads) {
1902 55c1cdff Florent Chuffart
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev,
1903 5bfac5a3 Florent Chuffart
				col=1, length=0.15/nb_rank)
1904 6b5a528c Florent Chuffart
			}
1905 5bfac5a3 Florent Chuffart
	    if (plot_squared_reads) {
1906 6b5a528c Florent Chuffart
        # require(plotrix)
1907 6b5a528c Florent Chuffart
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
1908 6b5a528c Florent Chuffart
			}
1909 5bfac5a3 Florent Chuffart
	    if (plot_coverage) {
1910 a52f85d7 Florent Chuffart
        if (length(reads[,1]) != 0) {
1911 a52f85d7 Florent Chuffart
          step_h = sign(x_min) * signs * reads[,4]
1912 a52f85d7 Florent Chuffart
          step_b = sign(x_min) * reads[,2] + shift
1913 a52f85d7 Florent Chuffart
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
1914 55c1cdff Florent Chuffart
          steps_x = min(step_b, step_e):max(step_b, step_e)
1915 a52f85d7 Florent Chuffart
          steps_y = rep(0, length(steps_x))
1916 55c1cdff Florent Chuffart
          for (i in 1:length(step_h)) {
1917 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
1918 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
1919 a52f85d7 Florent Chuffart
          }
1920 a52f85d7 Florent Chuffart
          tmp_index = which(steps_y != 0)
1921 a52f85d7 Florent Chuffart
          steps_x = steps_x[tmp_index]
1922 a52f85d7 Florent Chuffart
          steps_y = steps_y[tmp_index]
1923 a52f85d7 Florent Chuffart
          tmp_current_level = 0
1924 55c1cdff Florent Chuffart
          for (i in 1:length(steps_y)) {
1925 a52f85d7 Florent Chuffart
            steps_y[i] = tmp_current_level + steps_y[i]
1926 a52f85d7 Florent Chuffart
            tmp_current_level = steps_y[i]
1927 a52f85d7 Florent Chuffart
          }
1928 a52f85d7 Florent Chuffart
          steps_y = c(0, steps_y)
1929 a52f85d7 Florent Chuffart
          steps_y = steps_y * 1000000/sample$total_reads
1930 a52f85d7 Florent Chuffart
        } else {
1931 a52f85d7 Florent Chuffart
          steps_y = c(0, 0, 0)
1932 1e310114 Florent Chuffart
          steps_x = c(x_min, x_max)
1933 6b5a528c Florent Chuffart
        }
1934 0d1475ac Florent Chuffart
        # print(steps_x)
1935 0d1475ac Florent Chuffart
        # print(steps_y)
1936 6b5a528c Florent Chuffart
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
1937 6b5a528c Florent Chuffart
        abline(y_lev,0)
1938 6b5a528c Florent Chuffart
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
1939 6b5a528c Florent Chuffart
			}
1940 6b5a528c Florent Chuffart
			# Plot nucs
1941 6b5a528c Florent Chuffart
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
1942 6b5a528c Florent Chuffart
				nucs = sample$outputs
1943 6b5a528c Florent Chuffart
				if (length(nucs$center) > 0) {
1944 6b5a528c Florent Chuffart
					col = 1
1945 6b5a528c Florent Chuffart
		      for (i in 1:length(nucs$center)) {
1946 7646593d Florent Chuffart
            if (change_col) {
1947 55c1cdff Florent Chuffart
  						col = col + 1
1948 7646593d Florent Chuffart
            }
1949 6b5a528c Florent Chuffart
		        nuc = nucs[i,]
1950 6b5a528c Florent Chuffart
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
1951 6b5a528c Florent Chuffart
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
1952 55c1cdff Florent Chuffart
						total_involved_reads = sum(involved_reads[,4])
1953 6b5a528c Florent Chuffart
						if (plot_arrow_nuc_reads ) {
1954 55c1cdff Florent Chuffart
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev,
1955 5bfac5a3 Florent Chuffart
							col=col, length=0.15/nb_rank)
1956 6b5a528c Florent Chuffart
						}
1957 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
1958 6b5a528c Florent Chuffart
  						flatted_reads = flat_reads(involved_reads, nuc$width)
1959 6b5a528c Florent Chuffart
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1960 6b5a528c Florent Chuffart
		  			}
1961 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads ) {
1962 6b5a528c Florent Chuffart
							flatted_reads = flat_reads(involved_reads, nuc$width)
1963 6b5a528c Florent Chuffart
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1964 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
1965 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
1966 6b5a528c Florent Chuffart
	          }
1967 6b5a528c Florent Chuffart
	          if (plot_gaussian_unified_reads ) {
1968 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=2)
1969 6b5a528c Florent Chuffart
	          }
1970 6b5a528c Florent Chuffart
	          if (plot_ellipse_nucs) {
1971 6b5a528c Florent Chuffart
				      # require(plotrix)
1972 55c1cdff Florent Chuffart
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col)
1973 6b5a528c Florent Chuffart
						}
1974 6b5a528c Florent Chuffart
		      }
1975 6b5a528c Florent Chuffart
		    } else {
1976 6b5a528c Florent Chuffart
		      print("WARNING! No nucs to print.")
1977 6b5a528c Florent Chuffart
		    }
1978 6b5a528c Florent Chuffart
			}
1979 6b5a528c Florent Chuffart
	  }
1980 55c1cdff Florent Chuffart
1981 6b5a528c Florent Chuffart
	  # Plot wp nucs
1982 9fdbfada Florent Chuffart
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) {
1983 6b5a528c Florent Chuffart
			if (samples[[1]]$marker == "Mnase_Seq") {
1984 55c1cdff Florent Chuffart
				if (is.null(aggregated_intra_strain_nucs)) {
1985 5bfac5a3 Florent Chuffart
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
1986 6b5a528c Florent Chuffart
				} else {
1987 6b5a528c Florent Chuffart
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
1988 6b5a528c Florent Chuffart
				}
1989 6b5a528c Florent Chuffart
		  } else {
1990 6b5a528c Florent Chuffart
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
1991 6b5a528c Florent Chuffart
		  }
1992 729c934e Florent Chuffart
      if (plot_chain) {
1993 55c1cdff Florent Chuffart
        tf_nucs = lapply(wp_nucs, function(nuc) {
1994 729c934e Florent Chuffart
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
1995 729c934e Florent Chuffart
            tmp_nuc = tmp_nuc[[1]]
1996 729c934e Florent Chuffart
            tmp_nuc$inputs = NULL
1997 729c934e Florent Chuffart
            tmp_nuc$original_reads = NULL
1998 729c934e Florent Chuffart
            tmp_nuc$wp = nuc$wp
1999 729c934e Florent Chuffart
            # print(tmp_nuc)
2000 729c934e Florent Chuffart
            return(tmp_nuc)
2001 55c1cdff Florent Chuffart
          })
2002 dc58ee6c Florent Chuffart
          return(do.call(rbind, bar))
2003 dc58ee6c Florent Chuffart
        })
2004 729c934e Florent Chuffart
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
2005 55c1cdff Florent Chuffart
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2
2006 dc58ee6c Florent Chuffart
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
2007 dc58ee6c Florent Chuffart
        tmp_y_prev = tmp_y[-length(tmp_y)]
2008 dc58ee6c Florent Chuffart
        tmp_y_next = tmp_y[-1]
2009 dc58ee6c Florent Chuffart
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
2010 dc58ee6c Florent Chuffart
        tmp_track = unlist(tf_nucs$track)
2011 dc58ee6c Florent Chuffart
        tmp_track_prev = tmp_track[-length(tmp_track)]
2012 dc58ee6c Florent Chuffart
        tmp_track_next = tmp_track[-1]
2013 dc58ee6c Florent Chuffart
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
2014 dc58ee6c Florent Chuffart
        tmp_x_prev = tmp_x[-length(tmp_x)]
2015 dc58ee6c Florent Chuffart
        tmp_x_next = tmp_x[-1]
2016 dc58ee6c Florent Chuffart
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
2017 dc58ee6c Florent Chuffart
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
2018 dc58ee6c Florent Chuffart
        tmp_lod_inter =signif(unlist(tf_nucs$lod_score)[-1], 2)
2019 55c1cdff Florent Chuffart
        new_tmp_x = c()
2020 55c1cdff Florent Chuffart
        new_tmp_y = c()
2021 dc58ee6c Florent Chuffart
        index_odd = 1:length(tmp_x) * 2 - 1
2022 55c1cdff Florent Chuffart
        index_even = (1:(length(tmp_x) - 1)) * 2
2023 55c1cdff Florent Chuffart
        new_tmp_x[index_odd] = tmp_x
2024 dc58ee6c Florent Chuffart
        new_tmp_y[index_odd] = tmp_y
2025 dc58ee6c Florent Chuffart
        new_tmp_x[index_even] = tmp_x_inter
2026 dc58ee6c Florent Chuffart
        new_tmp_y[index_even] = tmp_y_inter
2027 dc58ee6c Florent Chuffart
        lines(new_tmp_x , new_tmp_y, lw=2)
2028 dc58ee6c Florent Chuffart
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
2029 dc58ee6c Florent Chuffart
        points(tmp_x, tmp_y, cex=4, lw=2)
2030 dc58ee6c Florent Chuffart
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
2031 55c1cdff Florent Chuffart
        text(tmp_x_inter, tmp_y_inter, tmp_lod_inter, srt=90, cex=0.9, bg = "yellow")#, col=(tmp_lod_inter < 20) + 2)
2032 729c934e Florent Chuffart
      }
2033 55c1cdff Florent Chuffart
2034 9fdbfada Florent Chuffart
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
2035 729c934e Florent Chuffart
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
2036 9fdbfada Florent Chuffart
        strain = samples[[1]]$strain
2037 9fdbfada Florent Chuffart
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo")
2038 9fdbfada Florent Chuffart
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
2039 9fdbfada Florent Chuffart
2040 9fdbfada Florent Chuffart
        if (plot_fuzzy_nucs) {
2041 9fdbfada Florent Chuffart
          fuzzy_map = fuzzy_maps[[strain]]
2042 9fdbfada Florent Chuffart
          rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1)        
2043 9fdbfada Florent Chuffart
        } 
2044 9fdbfada Florent Chuffart
2045 9fdbfada Florent Chuffart
        if (plot_wp_nucs) {
2046 9fdbfada Florent Chuffart
    			for (wp_nuc in wp_nucs) {
2047 9fdbfada Florent Chuffart
    				if (wp_nuc$wp){
2048 9fdbfada Florent Chuffart
    					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2049 9fdbfada Florent Chuffart
    					if (plot_wp_nuc_model) {
2050 9fdbfada Florent Chuffart
      					all_original_reads = c()
2051 9fdbfada Florent Chuffart
      					for(initial_nuc in wp_nuc$nucs) {
2052 9fdbfada Florent Chuffart
      						all_original_reads = c(all_original_reads, initial_nuc$original_reads)
2053 9fdbfada Florent Chuffart
      					}
2054 9fdbfada Florent Chuffart
      					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2055 9fdbfada Florent Chuffart
    					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
2056 9fdbfada Florent Chuffart
    				  }
2057 729c934e Florent Chuffart
  				  }
2058 729c934e Florent Chuffart
  				}
2059 729c934e Florent Chuffart
  			}
2060 729c934e Florent Chuffart
      }
2061 6b5a528c Florent Chuffart
		}
2062 5bfac5a3 Florent Chuffart
	}
2063 55c1cdff Florent Chuffart
2064 5bfac5a3 Florent Chuffart
	if (plot_common_nucs) {
2065 9fdbfada Florent Chuffart
    if (is.null(aligned_inter_strain_nucs)) {
2066 9fdbfada Florent Chuffart
      aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2067 9fdbfada Florent Chuffart
      aligned_inter_strain_nucs$roi_index = "foo" 
2068 9fdbfada Florent Chuffart
    }
2069 55c1cdff Florent Chuffart
2070 9fdbfada Florent Chuffart
    #Plot common wp nucs
2071 9fdbfada Florent Chuffart
    mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2072 9fdbfada Florent Chuffart
    for (replicate_rank in 1:length(replicates)) {
2073 9fdbfada Florent Chuffart
      nb_rank[[replicate_rank]] = length(samples)
2074 9fdbfada Florent Chuffart
      base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2075 9fdbfada Florent Chuffart
      ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2076 9fdbfada Florent Chuffart
      y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2077 9fdbfada Florent Chuffart
      y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2078 9fdbfada Florent Chuffart
      delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2079 9fdbfada Florent Chuffart
      mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2080 9fdbfada Florent Chuffart
2081 9fdbfada Florent Chuffart
      samples = replicates[[replicate_rank]]
2082 9fdbfada Florent Chuffart
      for (sample_rank in 1:length(samples)) {
2083 9fdbfada Florent Chuffart
        sample = samples[[sample_rank]]
2084 9fdbfada Florent Chuffart
        y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2085 9fdbfada Florent Chuffart
        if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2086 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = sample$roi[["begin"]]
2087 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = sample$roi[["end"]]
2088 9fdbfada Florent Chuffart
        } else {
2089 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = - sample$roi[["begin"]]
2090 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = - sample$roi[["end"]]
2091 6b5a528c Florent Chuffart
        }
2092 9fdbfada Florent Chuffart
        shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2093 55c1cdff Florent Chuffart
      }
2094 9fdbfada Florent Chuffart
    }
2095 9fdbfada Florent Chuffart
    for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2096 9fdbfada Florent Chuffart
      inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2097 9fdbfada Florent Chuffart
      tmp_xs = tmp_ys = c()
2098 9fdbfada Florent Chuffart
      for (replicate_rank in 1:length(replicates)) {
2099 9fdbfada Florent Chuffart
        samples = replicates[[replicate_rank]]
2100 9fdbfada Florent Chuffart
        strain = samples[[1]]$strain
2101 9fdbfada Florent Chuffart
        tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2102 9fdbfada Florent Chuffart
        tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2103 55c1cdff Florent Chuffart
      }
2104 9fdbfada Florent Chuffart
      lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]]), cex=dev.size()[1]*200/(x_max[[1]]-x_min[[1]]), pch=19)
2105 9fdbfada Florent Chuffart
    }
2106 9fdbfada Florent Chuffart
    
2107 9fdbfada Florent Chuffart
    if (plot_common_unrs) {
2108 9fdbfada Florent Chuffart
      combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain)
2109 9fdbfada Florent Chuffart
      roi = as.list(samples[[1]]$roi)
2110 9fdbfada Florent Chuffart
      roi_index = "foo"
2111 9fdbfada Florent Chuffart
      common_nuc_results = list()
2112 9fdbfada Florent Chuffart
      common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs
2113 9fdbfada Florent Chuffart
      unrs = get_unrs(combi, roi, roi_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) 
2114 9fdbfada Florent Chuffart
      rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], col=adjustcolor(4, alpha.f = 0.3), border=1)        
2115 9fdbfada Florent Chuffart
    }
2116 9fdbfada Florent Chuffart
2117 6b5a528c Florent Chuffart
	}
2118 6b5a528c Florent Chuffart
  return(returned_list)
2119 6b5a528c Florent Chuffart
}
2120 6b5a528c Florent Chuffart
2121 6b5a528c Florent Chuffart
2122 6b5a528c Florent Chuffart
2123 9fdbfada Florent Chuffart
get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain.
2124 9fdbfada Florent Chuffart
### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions.
2125 0a13b5e3 Florent Chuffart
wp_map, ##<< Well positionned nucleosomes map.
2126 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2127 9fdbfada Florent Chuffart
strain, ##<< The strain we want to extracvt the fuzzy map.
2128 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable.
2129 9fdbfada Florent Chuffart
) {
2130 9fdbfada Florent Chuffart
  fuzzy_map = wp_map[wp_map$wp==0, ]
2131 9fdbfada Florent Chuffart
  if (nrow(fuzzy_map) > 0) {
2132 9fdbfada Florent Chuffart
    fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,])
2133 9fdbfada Florent Chuffart
    if (!is.null(fuzzy_map)) {
2134 9fdbfada Florent Chuffart
      fuzzy_map = union_regions(fuzzy_map)
2135 9fdbfada Florent Chuffart
      fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config)
2136 9fdbfada Florent Chuffart
    }
2137 9fdbfada Florent Chuffart
  }
2138 9fdbfada Florent Chuffart
  return(fuzzy_map)
2139 9fdbfada Florent Chuffart
}
2140 9fdbfada Florent Chuffart
2141 9fdbfada Florent Chuffart
2142 9fdbfada Florent Chuffart
2143 9fdbfada Florent Chuffart
2144 9fdbfada Florent Chuffart
2145 9fdbfada Florent Chuffart
get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs).
2146 9fdbfada Florent Chuffart
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline.
2147 9fdbfada Florent Chuffart
combi, ##<< The strain combination to consider.
2148 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2149 9fdbfada Florent Chuffart
roi_index, ##<< The region of interest index.
2150 9fdbfada Florent Chuffart
wp_maps, ##<< Well positionned nucleosomes maps.
2151 9fdbfada Florent Chuffart
fuzzy_maps, ##<< Fuzzy nucleosomes maps.
2152 9fdbfada Florent Chuffart
common_nuc_results, ##<< Common wp nuc maps
2153 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable
2154 9fdbfada Florent Chuffart
) {
2155 f0703205 Florent Chuffart
  # print(roi_index)
2156 9fdbfada Florent Chuffart
2157 9fdbfada Florent Chuffart
  tmp_combi_key = paste(combi[1], combi[2], sep="_")
2158 9fdbfada Florent Chuffart
  tmp_common_nucs = common_nuc_results[[tmp_combi_key]]
2159 9fdbfada Florent Chuffart
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$roi_index == roi_index, ]
2160 9fdbfada Florent Chuffart
2161 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from", combi[1]))
2162 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[1]]]
2163 9fdbfada Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$roi_index == roi_index, ]
2164 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2165 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2166 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$roi_index == roi_index, ]
2167 9fdbfada Florent Chuffart
  # Let's go!
2168 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2169 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2170 9fdbfada Florent Chuffart
      return(NULL)
2171 9fdbfada Florent Chuffart
    } else {
2172 9fdbfada Florent Chuffart
      return (index_nuc)
2173 9fdbfada Florent Chuffart
    }
2174 9fdbfada Florent Chuffart
  }))
2175 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2176 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2177 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2178 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2179 9fdbfada Florent Chuffart
  }
2180 9fdbfada Florent Chuffart
  tmp_unr_nucs_1 = tmp_unr
2181 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)}
2182 9fdbfada Florent Chuffart
  agg_unr_1 = tmp_unr_nucs_1
2183 9fdbfada Florent Chuffart
2184 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from ", combi[2]))
2185 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[2]]]
2186 9fdbfada Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$roi_index == roi_index, ]
2187 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2188 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2189 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$roi_index == roi_index, ]
2190 9fdbfada Florent Chuffart
  # Let's go!
2191 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2192 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2193 9fdbfada Florent Chuffart
      return(NULL)
2194 9fdbfada Florent Chuffart
    } else {
2195 9fdbfada Florent Chuffart
      return (index_nuc)
2196 9fdbfada Florent Chuffart
    }
2197 9fdbfada Florent Chuffart
  }))
2198 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2199 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2200 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2201 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2202 9fdbfada Florent Chuffart
  }
2203 9fdbfada Florent Chuffart
  tmp_unr_nucs_2 = tmp_unr
2204 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)}
2205 9fdbfada Florent Chuffart
  agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config)
2206 9fdbfada Florent Chuffart
  tr_agg_unr_2 = translate_regions(agg_unr_2, combi, roi_index, roi=roi, config=config)
2207 9fdbfada Florent Chuffart
  tr_agg_unr_2 = union_regions(tr_agg_unr_2)
2208 9fdbfada Florent Chuffart
2209 f0703205 Florent Chuffart
  # print("Dealing with unr from both...")
2210 9fdbfada Florent Chuffart
  all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2))
2211 9fdbfada Florent Chuffart
2212 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[1]))
2213 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2214 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2215 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$roi_index == roi_index, ]
2216 9fdbfada Florent Chuffart
  # Let's go!
2217 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2218 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2219 9fdbfada Florent Chuffart
      return (index_nuc)
2220 9fdbfada Florent Chuffart
    } else {
2221 9fdbfada Florent Chuffart
      return(NULL)
2222 9fdbfada Florent Chuffart
    }
2223 9fdbfada Florent Chuffart
  }))
2224 9fdbfada Florent Chuffart
  wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2225 9fdbfada Florent Chuffart
  
2226 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[2]))
2227 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2228 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2229 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$roi_index == roi_index, ]
2230 9fdbfada Florent Chuffart
  # Let's go!
2231 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2232 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2233 9fdbfada Florent Chuffart
      return (index_nuc)
2234 9fdbfada Florent Chuffart
    } else {
2235 9fdbfada Florent Chuffart
      return(NULL)
2236 9fdbfada Florent Chuffart
    }
2237 9fdbfada Florent Chuffart
  }))
2238 9fdbfada Florent Chuffart
  wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2239 9fdbfada Florent Chuffart
  wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config)
2240 9fdbfada Florent Chuffart
  if (nrow(wp_nucs_2) == 0) {
2241 9fdbfada Florent Chuffart
    tr_wp_nucs_2 = wp_nucs_2
2242 9fdbfada Florent Chuffart
  } else {
2243 9fdbfada Florent Chuffart
    tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, roi_index, roi=roi, config=config)      
2244 9fdbfada Florent Chuffart
  }
2245 9fdbfada Florent Chuffart
2246 f0703205 Florent Chuffart
  # print("Dealing with wp from both...")
2247 9fdbfada Florent Chuffart
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
2248 9fdbfada Florent Chuffart
2249 f0703205 Florent Chuffart
  # print("Dealing with unr and wp...")
2250 9fdbfada Florent Chuffart
  non_inter_unr = substract_region(all_unr, all_wp)
2251 9fdbfada Florent Chuffart
  non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config)
2252 9fdbfada Florent Chuffart
  if (is.null(non_inter_unr)) { return(NULL) }
2253 9fdbfada Florent Chuffart
  non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound
2254 9fdbfada Florent Chuffart
  # non_inter_fuzzy = non_inter_fuzzy[non_inter_fuzzy$len >= min_fuzz_width,]
2255 9fdbfada Florent Chuffart
2256 9fdbfada Florent Chuffart
  non_inter_unr$index_nuc = 1:length(non_inter_unr[,1])
2257 9fdbfada Florent Chuffart
  return (non_inter_unr)
2258 9fdbfada Florent Chuffart
}
2259 9fdbfada Florent Chuffart
2260 9fdbfada Florent Chuffart