Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ 7646593d

Historique | Voir | Annoter | Télécharger (92,22 ko)

1 6b5a528c Florent Chuffart
# get_content = function(# Get content from cached 
2 6b5a528c Florent Chuffart
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
3 6b5a528c Florent Chuffart
# obj, ##<< The object that we want its content.
4 6b5a528c Florent Chuffart
# ... ##<< Parameters that will be passed to my_read
5 6b5a528c Florent Chuffart
# ) {
6 6b5a528c Florent Chuffart
#   UseMethod("get_content", obj)
7 6b5a528c Florent Chuffart
#   ### Returns the cached content of the current object. 
8 6b5a528c Florent Chuffart
# }
9 6b5a528c Florent Chuffart
# 
10 6b5a528c Florent Chuffart
# get_content.default = structure( function(# Get content from cached 
11 6b5a528c Florent Chuffart
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
12 6b5a528c Florent Chuffart
# obj, ##<< The object that we want its content.
13 6b5a528c Florent Chuffart
# ... ##<< Parameters that will be passed to my_read
14 6b5a528c Florent Chuffart
# ) {
15 6b5a528c Florent Chuffart
#   if (inherits(try(NM_CACHE,TRUE), "try-error") || is.null(NM_CACHE)) {
16 6b5a528c Florent Chuffart
#     NM_CACHE <<- list()
17 6b5a528c Florent Chuffart
#   }
18 6b5a528c Florent Chuffart
#   if (is.null(NM_CACHE[[obj$filename]])) {
19 6b5a528c Florent Chuffart
#     print(paste("Loading file ",obj$filename, sep=""))
20 6b5a528c Florent Chuffart
#     tmp_content = my_read(obj, ...)
21 6b5a528c Florent Chuffart
#     print("affect it...")
22 6b5a528c Florent Chuffart
#     NM_CACHE[[obj$filename]] <<- tmp_content
23 6b5a528c Florent Chuffart
#     print("done.")
24 6b5a528c Florent Chuffart
#   }  
25 6b5a528c Florent Chuffart
#   return(NM_CACHE[[obj$filename]])
26 6b5a528c Florent Chuffart
# ### Returns the cached content of the current object. 
27 6b5a528c Florent Chuffart
# }, ex=function(){
28 6b5a528c Florent Chuffart
#   # Create a dataframe
29 6b5a528c Florent Chuffart
#   df = NULL
30 6b5a528c Florent Chuffart
#   df = dfadd(df, list(key1 = "value1", key2 = "value2"))
31 6b5a528c Florent Chuffart
#   df = dfadd(df, list(key1 = "value1'", key2 = "value3'"))
32 6b5a528c Florent Chuffart
#   # Dump it into tmp file
33 6b5a528c Florent Chuffart
#   write.table(df, file="/tmp/tmp.dump.table.tmp")
34 6b5a528c Florent Chuffart
#   # Load it into cache using feature.
35 6b5a528c Florent Chuffart
#   cached_content_object  = list(filename="/tmp/tmp.dump.table.tmp")
36 6b5a528c Florent Chuffart
#   class(cached_content_object) = "table"  
37 6b5a528c Florent Chuffart
#   # First time it will be load into cache
38 6b5a528c Florent Chuffart
#   print(get_content(cached_content_object))
39 6b5a528c Florent Chuffart
#   # Second time not
40 6b5a528c Florent Chuffart
#   print(get_content(cached_content_object))
41 6b5a528c Florent Chuffart
#   
42 6b5a528c Florent Chuffart
# })
43 6b5a528c Florent Chuffart
#   
44 6b5a528c Florent Chuffart
# my_read = function(
45 6b5a528c Florent Chuffart
# ### Abstract my_read function.  
46 6b5a528c Florent Chuffart
#   obj, ...) {
47 6b5a528c Florent Chuffart
#   UseMethod("my_read", obj)
48 6b5a528c Florent Chuffart
# }
49 6b5a528c Florent Chuffart
#   
50 6b5a528c Florent Chuffart
# my_read.default = function(
51 6b5a528c Florent Chuffart
# ### Default my_read function.  
52 6b5a528c Florent Chuffart
#   obj, ...){
53 6b5a528c Florent Chuffart
#   stop(paste("ERROR, my_read is not defined for any Objects (file ", obj$filename," )", sep=""))      
54 6b5a528c Florent Chuffart
# }
55 6b5a528c Florent Chuffart
# 
56 6b5a528c Florent Chuffart
# my_read.fasta = function(
57 6b5a528c Florent Chuffart
# ### my_read function for fasta files.  
58 6b5a528c Florent Chuffart
#   obj, ...){
59 6b5a528c Florent Chuffart
#   # require(seqinr)
60 6b5a528c Florent Chuffart
#   return(read.fasta(obj$filename, ...))
61 6b5a528c Florent Chuffart
# }
62 6b5a528c Florent Chuffart
# 
63 6b5a528c Florent Chuffart
# my_read.table = function(
64 6b5a528c Florent Chuffart
# ### my_read function for table files.  
65 6b5a528c Florent Chuffart
#   obj, ...){
66 6b5a528c Florent Chuffart
#   if (rev(unlist(strsplit(obj$filename, ".", fixed=TRUE)))[1] == "gz") {
67 6b5a528c Florent Chuffart
#     return(read.table(file=gzfile(obj$filename), ...))      
68 6b5a528c Florent Chuffart
#   } else { 
69 6b5a528c Florent Chuffart
#     return(read.table(file=obj$filename, ...))
70 6b5a528c Florent Chuffart
#   }
71 6b5a528c Florent Chuffart
# }  
72 6b5a528c Florent Chuffart
# 
73 6b5a528c Florent Chuffart
# my_read.cvs = function(
74 6b5a528c Florent Chuffart
# ### my_read function for cvs files.  
75 6b5a528c Florent Chuffart
#   obj, ...){
76 6b5a528c Florent Chuffart
#   return(read.csv(file=obj$filename, ...))
77 6b5a528c Florent Chuffart
# }  
78 6b5a528c Florent Chuffart
79 6b5a528c Florent Chuffart
80 6b5a528c Florent Chuffart
81 6b5a528c Florent Chuffart
82 6b5a528c Florent Chuffart
83 6b5a528c Florent Chuffart
84 6b5a528c Florent Chuffart
85 6b5a528c Florent Chuffart
86 6b5a528c Florent Chuffart
87 6b5a528c Florent Chuffart
FDR = structure(function#  False Discovery Rate
88 6b5a528c Florent Chuffart
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
89 6b5a528c Florent Chuffart
##author<< Gael Yvert, 
90 6b5a528c Florent Chuffart
(
91 6b5a528c Florent Chuffart
x, ##<< A vector x of independent p-values.
92 6b5a528c Florent Chuffart
FDR ##<< The specified FDR.
93 6b5a528c Florent Chuffart
) {
94 6b5a528c Florent Chuffart
  x <- sort(na.omit(x))
95 6b5a528c Florent Chuffart
  N = length(x)
96 6b5a528c Florent Chuffart
  i = 1;
97 6b5a528c Florent Chuffart
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
98 6b5a528c Florent Chuffart
  if (i == 1)
99 6b5a528c Florent Chuffart
    return (NA)
100 6b5a528c Florent Chuffart
  else
101 6b5a528c Florent Chuffart
    return( x[i-1] )
102 6b5a528c Florent Chuffart
### Return the the corresponding cutoff.
103 6b5a528c Florent Chuffart
}, ex=function(){
104 6b5a528c Florent Chuffart
  print("example")
105 6b5a528c Florent Chuffart
})
106 6b5a528c Florent Chuffart
107 6b5a528c Florent Chuffart
108 6b5a528c Florent Chuffart
109 6b5a528c Florent Chuffart
lod_score_vecs = structure(function # Likelihood ratio
110 6b5a528c Florent Chuffart
### Compute the likelihood log of two set of value from two models Vs. a unique model.
111 6b5a528c Florent Chuffart
(
112 6b5a528c Florent Chuffart
x ,##<< First vector.
113 6b5a528c Florent Chuffart
y ##<< Second vector.
114 6b5a528c Florent Chuffart
) {
115 6b5a528c Florent Chuffart
	if (length(x) <=1 | length(y) <= 1) {
116 6b5a528c Florent Chuffart
		return(NA)
117 6b5a528c Florent Chuffart
	}
118 6b5a528c Florent Chuffart
  meanX = mean(x)
119 6b5a528c Florent Chuffart
  sdX = sd(x)
120 6b5a528c Florent Chuffart
  meanY = mean(y)
121 6b5a528c Florent Chuffart
  sdY = sd(y)
122 6b5a528c Florent Chuffart
  meanXY = mean(c(x,y))
123 6b5a528c Florent Chuffart
  sdXY = sd(c(x,y))
124 6b5a528c Florent Chuffart
  llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
125 6b5a528c Florent Chuffart
  llY = sum(log(dnorm(y,mean=meanY,sd=sdY)))
126 6b5a528c Florent Chuffart
  llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY)))
127 729c934e Florent Chuffart
  ratio = llX + llY - llXY
128 6b5a528c Florent Chuffart
  return(ratio)
129 6b5a528c Florent Chuffart
### Returns the likelihood ratio.
130 6b5a528c Florent Chuffart
}, ex=function(){
131 6b5a528c Florent Chuffart
  # LOD score for 2 set of values
132 6b5a528c Florent Chuffart
  mean1=5; sd1=2; card2 = 250
133 6b5a528c Florent Chuffart
  mean2=6; sd2=3; card1 = 200
134 6b5a528c Florent Chuffart
  x1 = rnorm(card1, mean1, sd1)
135 6b5a528c Florent Chuffart
  x2 = rnorm(card2, mean2, sd2)  
136 6b5a528c Florent Chuffart
  min = floor(min(c(x1,x2)))
137 6b5a528c Florent Chuffart
  max = ceiling(max(c(x1,x2)))
138 6b5a528c Florent Chuffart
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
139 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
140 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
141 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
142 6b5a528c Florent Chuffart
  lod_score_vecs(x1,x2)
143 6b5a528c Florent Chuffart
 })
144 6b5a528c Florent Chuffart
145 6b5a528c Florent Chuffart
dfadd = structure(function# Adding list to a dataframe.
146 6b5a528c Florent Chuffart
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
147 6b5a528c Florent Chuffart
	(df, ##<<  A dataframe
148 6b5a528c Florent Chuffart
		l ##<<  A list
149 6b5a528c Florent Chuffart
	) {
150 6b5a528c Florent Chuffart
  if (is.null(df)) {
151 6b5a528c Florent Chuffart
    df = data.frame(l,stringsAsFactors=FALSE)
152 6b5a528c Florent Chuffart
  } else {
153 6b5a528c Florent Chuffart
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
154 6b5a528c Florent Chuffart
  }
155 6b5a528c Florent Chuffart
  return(df)
156 6b5a528c Florent Chuffart
### Return the dataframe \emph{df}.
157 6b5a528c Florent Chuffart
}, ex=function(){
158 6b5a528c Florent Chuffart
		## Here dataframe is NULL
159 6b5a528c Florent Chuffart
		print(df)
160 6b5a528c Florent Chuffart
		df = NULL
161 6b5a528c Florent Chuffart
		
162 6b5a528c Florent Chuffart
		# Initialize df
163 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
164 6b5a528c Florent Chuffart
		print(df)
165 6b5a528c Florent Chuffart
		
166 6b5a528c Florent Chuffart
		# Adding elements to df
167 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
168 6b5a528c Florent Chuffart
		print(df)
169 6b5a528c Florent Chuffart
})
170 6b5a528c Florent Chuffart
171 6b5a528c Florent Chuffart
172 6b5a528c Florent Chuffart
sign_from_strand = function(
173 6b5a528c Florent Chuffart
### Get the sign of strand 
174 6b5a528c Florent Chuffart
strands) {
175 6b5a528c Florent Chuffart
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
176 6b5a528c Florent Chuffart
### If strand in forward then returns 1 else returns -1
177 6b5a528c Florent Chuffart
}
178 6b5a528c Florent Chuffart
179 6b5a528c Florent Chuffart
flat_reads = function(
180 6b5a528c Florent Chuffart
### Extract reads coordinates from TempleteFilter input sequence
181 6b5a528c Florent Chuffart
reads, ##<< TemplateFilter input reads 
182 6b5a528c Florent Chuffart
nuc_width ##<< Width used to shift F and R reads.
183 6b5a528c Florent Chuffart
) {
184 6b5a528c Florent Chuffart
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
185 6b5a528c Florent Chuffart
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
186 6b5a528c Florent Chuffart
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
187 6b5a528c Florent Chuffart
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
188 6b5a528c Florent Chuffart
### Returns a list of F reads, R reads and joint/shifted F and R reads.
189 6b5a528c Florent Chuffart
}
190 6b5a528c Florent Chuffart
191 6b5a528c Florent Chuffart
192 6b5a528c Florent Chuffart
193 6b5a528c Florent Chuffart
filter_tf_outputs = function(# Filter TemplateFilter outputs
194 6b5a528c Florent Chuffart
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlap threshold.
195 6b5a528c Florent Chuffart
tf_outputs, ##<< TemplateFilter outputs.
196 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
197 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
198 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
199 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
200 6b5a528c Florent Chuffart
ol_bp = 59, ##<< Overlap Threshold.
201 6b5a528c Florent Chuffart
corr_thres = 0.5 ##<< Correlation threshold.
202 6b5a528c Florent Chuffart
) {
203 6b5a528c Florent Chuffart
  if (x_min < 0) {
204 6b5a528c Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center > (-x_max - nuc_width/2) & tf_outputs$center <  (-x_min + nuc_width/2),]
205 6b5a528c Florent Chuffart
	} else {
206 6b5a528c Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center > (x_min - nuc_width/2) & tf_outputs$center < (x_max + nuc_width/2),]
207 6b5a528c Florent Chuffart
  }
208 6b5a528c Florent Chuffart
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
209 6b5a528c Florent Chuffart
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
210 6b5a528c Florent Chuffart
  tf_outputs = tf_outputs[tf_outputs$correlation >= corr_thres,]  
211 6b5a528c Florent Chuffart
  tf_outputs = tf_outputs[order(tf_outputs$correlation,decreasing=TRUE),]  
212 6b5a528c Florent Chuffart
  i = 1
213 6b5a528c Florent Chuffart
  while (i <= length(tf_outputs[,1])) {    
214 6b5a528c Florent Chuffart
    lb = tf_outputs[i,]$low 
215 6b5a528c Florent Chuffart
    ub = tf_outputs[i,]$up
216 6b5a528c Florent Chuffart
    tf_outputs = tf_outputs[!(tf_outputs$low <= (ub-ol_bp) & tf_outputs$up > ub) & !(tf_outputs$up >= (lb+ol_bp) & tf_outputs$low < lb),]
217 6b5a528c Florent Chuffart
    i = i+1
218 6b5a528c Florent Chuffart
  }
219 6b5a528c Florent Chuffart
  return(tf_outputs)
220 6b5a528c Florent Chuffart
### Returns filtered TemplateFilter Outputs
221 6b5a528c Florent Chuffart
}
222 6b5a528c Florent Chuffart
223 6b5a528c Florent Chuffart
224 6b5a528c Florent Chuffart
225 6b5a528c Florent Chuffart
filter_tf_inputs = function(# Filter TemplateFilter inputs
226 6b5a528c Florent Chuffart
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
227 6b5a528c Florent Chuffart
inputs, ##<< TF inputs to be filtered.
228 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
229 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
230 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
231 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
232 6b5a528c Florent Chuffart
only_f = FALSE, ##<< Filter only F reads.
233 b8a95426 Florent Chuffart
only_r = FALSE, ##<< Filter only R reads.
234 b8a95426 Florent Chuffart
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
235 6b5a528c Florent Chuffart
) {
236 6b5a528c Florent Chuffart
	if (only_f) {
237 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
238 6b5a528c Florent Chuffart
	} else if (only_r) {
239 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]			
240 6b5a528c Florent Chuffart
	} else {
241 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
242 6b5a528c Florent Chuffart
	}
243 b8a95426 Florent Chuffart
  if (!filter_for_coverage) {
244 b8a95426 Florent Chuffart
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
245 b8a95426 Florent Chuffart
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]    
246 b8a95426 Florent Chuffart
  }
247 6b5a528c Florent Chuffart
	return(inputs)
248 6b5a528c Florent Chuffart
### Returns filtred inputs.
249 6b5a528c Florent Chuffart
}
250 6b5a528c Florent Chuffart
251 6b5a528c Florent Chuffart
252 6b5a528c Florent Chuffart
253 6b5a528c Florent Chuffart
get_comp_strand = function(
254 6b5a528c Florent Chuffart
### Compute the complementatry strand.
255 6b5a528c Florent Chuffart
strand ##<< The original strand.
256 6b5a528c Florent Chuffart
) {
257 6b5a528c Florent Chuffart
	apply(t(strand),2, function(n){
258 6b5a528c Florent Chuffart
	  if (n=="a") {return("t")} 
259 6b5a528c Florent Chuffart
		if (n=="t") {return("a")}
260 6b5a528c Florent Chuffart
		if (n=="c") {return("g")} 
261 6b5a528c Florent Chuffart
		if (n=="g") {return("c")} 
262 6b5a528c Florent Chuffart
	})
263 6b5a528c Florent Chuffart
### Returns the complementatry strand.
264 6b5a528c Florent Chuffart
}
265 6b5a528c Florent Chuffart
266 6b5a528c Florent Chuffart
267 6b5a528c Florent Chuffart
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes. 
268 6b5a528c Florent Chuffart
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
269 6b5a528c Florent Chuffart
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
270 729c934e Florent Chuffart
lod_thres=20, ##<< Log likelihood ration threshold.
271 6b5a528c Florent Chuffart
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
272 6b5a528c Florent Chuffart
){
273 6b5a528c Florent Chuffart
	end_of_tracks = function(tracks) {
274 6b5a528c Florent Chuffart
		if (length(tracks) == 0) {
275 6b5a528c Florent Chuffart
			return(TRUE)
276 6b5a528c Florent Chuffart
		}
277 6b5a528c Florent Chuffart
	  for (lower_bound in tracks) {
278 6b5a528c Florent Chuffart
			if(!is.na(lower_bound)) {
279 6b5a528c Florent Chuffart
	      if (lower_bound < coord_max) {
280 6b5a528c Florent Chuffart
	        return(FALSE)
281 6b5a528c Florent Chuffart
	      }
282 6b5a528c Florent Chuffart
	  	}
283 6b5a528c Florent Chuffart
	  }
284 6b5a528c Florent Chuffart
	  return(TRUE)
285 6b5a528c Florent Chuffart
	}
286 6b5a528c Florent Chuffart
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
287 6b5a528c Florent Chuffart
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
288 6b5a528c Florent Chuffart
			new_cluster$wp = TRUE
289 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
290 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
291 6b5a528c Florent Chuffart
		  	clusters[[length(clusters) + 1]] = new_cluster
292 6b5a528c Florent Chuffart
				# print(new_cluster)
293 6b5a528c Florent Chuffart
		  }
294 6b5a528c Florent Chuffart
		} else {
295 6b5a528c Florent Chuffart
			new_cluster$wp = FALSE
296 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
297 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
298 6b5a528c Florent Chuffart
			  clusters[[length(clusters) + 1]] = new_cluster
299 6b5a528c Florent Chuffart
			}
300 6b5a528c Florent Chuffart
		}
301 6b5a528c Florent Chuffart
		return(clusters)
302 6b5a528c Florent Chuffart
	}
303 6b5a528c Florent Chuffart
	strain = samples[[1]]$strain
304 6b5a528c Florent Chuffart
	lod_scores = c()
305 6b5a528c Florent Chuffart
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
306 6b5a528c Florent Chuffart
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
307 6b5a528c Florent Chuffart
  # compute clusters
308 6b5a528c Florent Chuffart
  clusters = list()
309 6b5a528c Florent Chuffart
  cluster_contents = list()
310 6b5a528c Florent Chuffart
  # Init reader
311 6b5a528c Florent Chuffart
  indexes = c()
312 6b5a528c Florent Chuffart
  track_readers = c()
313 6b5a528c Florent Chuffart
  current_nuc = NULL
314 729c934e Florent Chuffart
	lod_score = lod_thres + 1 
315 6b5a528c Florent Chuffart
  # Read nucs from TF outputs  
316 6b5a528c Florent Chuffart
  tf_outs = list()
317 6b5a528c Florent Chuffart
	i = 1
318 6b5a528c Florent Chuffart
  for (sample in samples) {
319 6b5a528c Florent Chuffart
		# print(sample$roi$chr)
320 6b5a528c Florent Chuffart
		# print(min_nuc_center)
321 6b5a528c Florent Chuffart
		# print(max_nuc_center)
322 6b5a528c Florent Chuffart
		# print(sample$outputs)
323 6b5a528c Florent Chuffart
		# tf_outs[[i]] = filter_tf_outputs(sample$outputs, sample$roi$chr, min_nuc_center, max_nuc_center)
324 6b5a528c Florent Chuffart
		# print(tf_outs[[i]])
325 6b5a528c Florent Chuffart
		tf_outs[[i]] = sample$outputs
326 6b5a528c Florent Chuffart
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]  
327 6b5a528c Florent Chuffart
    indexes[i] = 1
328 6b5a528c Florent Chuffart
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
329 6b5a528c Florent Chuffart
      track_readers[i] = coord_max				
330 6b5a528c Florent Chuffart
	  } else {
331 6b5a528c Florent Chuffart
      track_readers[i] = tf_outs[[i]][indexes[i],]$center				
332 6b5a528c Florent Chuffart
		}
333 6b5a528c Florent Chuffart
		i = i+1		
334 6b5a528c Florent Chuffart
  }
335 6b5a528c Florent Chuffart
	# print(track_readers)
336 6b5a528c Florent Chuffart
  new_cluster = NULL
337 6b5a528c Florent Chuffart
  nb_nucs_in_cluster = 0
338 6b5a528c Florent Chuffart
  nuc_from_track = c()
339 6b5a528c Florent Chuffart
  for (i in 1:length(tf_outs)){
340 6b5a528c Florent Chuffart
    nuc_from_track[i] = FALSE
341 6b5a528c Florent Chuffart
  }
342 6b5a528c Florent Chuffart
  # Start clustering
343 6b5a528c Florent Chuffart
  while (!end_of_tracks(track_readers)) {
344 6b5a528c Florent Chuffart
    new_center = min(track_readers)
345 6b5a528c Florent Chuffart
		current_track = which(track_readers == new_center)[1]			
346 6b5a528c Florent Chuffart
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
347 6b5a528c Florent Chuffart
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
348 6b5a528c Florent Chuffart
		new_nuc$inputs = samples[[current_track]]$inputs
349 6b5a528c Florent Chuffart
		new_nuc$chr = samples[[current_track]]$roi$chr
350 6b5a528c Florent Chuffart
		new_nuc$track = current_track
351 6b5a528c Florent Chuffart
		
352 6b5a528c Florent Chuffart
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width) 
353 6b5a528c Florent Chuffart
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
354 6b5a528c Florent Chuffart
		new_nuc$original_reads = flatted_reads[[3]]
355 6b5a528c Florent Chuffart
356 6b5a528c Florent Chuffart
    new_upper_bound = new_nuc$upper_bound
357 6b5a528c Florent Chuffart
358 6b5a528c Florent Chuffart
    if (!is.null(current_nuc)) {
359 6b5a528c Florent Chuffart
			lod_score = lod_score_vecs(current_nuc$original_reads,new_nuc$original_reads)
360 6b5a528c Florent Chuffart
			lod_scores = c(lod_scores,lod_score)
361 6b5a528c Florent Chuffart
		}
362 6b5a528c Florent Chuffart
		# print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
363 6b5a528c Florent Chuffart
		if (is.na(lod_score)) {
364 729c934e Florent Chuffart
			lod_score = lod_thres + 1 
365 6b5a528c Florent Chuffart
		}
366 729c934e Florent Chuffart
		# Store lod_score
367 729c934e Florent Chuffart
		new_nuc$lod_score = lod_score 			
368 729c934e Florent Chuffart
	  if (lod_score < lod_thres) {
369 6b5a528c Florent Chuffart
      # aggregate to current cluster
370 6b5a528c Florent Chuffart
      #   update bound
371 6b5a528c Florent Chuffart
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
372 6b5a528c Florent Chuffart
        new_cluster$upper_bound = new_nuc$upper_bound      
373 6b5a528c Florent Chuffart
      }
374 6b5a528c Florent Chuffart
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
375 6b5a528c Florent Chuffart
        new_cluster$lower_bound = new_nuc$lower_bound         
376 6b5a528c Florent Chuffart
      }
377 6b5a528c Florent Chuffart
      #   add nucleosome to current cluster
378 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
379 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1 
380 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
381 6b5a528c Florent Chuffart
    } else {
382 6b5a528c Florent Chuffart
			if (!is.null(new_cluster)) {
383 6b5a528c Florent Chuffart
        # store old cluster
384 6b5a528c Florent Chuffart
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
385 6b5a528c Florent Chuffart
			}
386 6b5a528c Florent Chuffart
      # Reinit current cluster composition stuff
387 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = 0
388 6b5a528c Florent Chuffart
      nuc_from_track = c()
389 6b5a528c Florent Chuffart
      for (i in 1:length(tf_outs)){
390 6b5a528c Florent Chuffart
        nuc_from_track[i] = FALSE
391 6b5a528c Florent Chuffart
      }
392 6b5a528c Florent Chuffart
      # create new cluster        
393 6b5a528c Florent Chuffart
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
394 6b5a528c Florent Chuffart
      # update upper bound
395 6b5a528c Florent Chuffart
      current_upper_bound = new_upper_bound
396 6b5a528c Florent Chuffart
      # add nucleosome to current cluster
397 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1 
398 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
399 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
400 6b5a528c Florent Chuffart
				
401 6b5a528c Florent Chuffart
		}
402 6b5a528c Florent Chuffart
			
403 6b5a528c Florent Chuffart
		current_nuc = new_nuc
404 6b5a528c Florent Chuffart
405 6b5a528c Florent Chuffart
    # update indexes
406 6b5a528c Florent Chuffart
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {      
407 6b5a528c Florent Chuffart
      indexes[current_track] = indexes[current_track] + 1
408 6b5a528c Florent Chuffart
      # update track
409 6b5a528c Florent Chuffart
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
410 6b5a528c Florent Chuffart
    } else {
411 6b5a528c Florent Chuffart
      # update track
412 6b5a528c Florent Chuffart
      track_readers[current_track] = coord_max
413 6b5a528c Florent Chuffart
    }
414 6b5a528c Florent Chuffart
  }
415 6b5a528c Florent Chuffart
  # store last cluster
416 6b5a528c Florent Chuffart
  if (!is.null(new_cluster)) {
417 6b5a528c Florent Chuffart
    # store old cluster
418 6b5a528c Florent Chuffart
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
419 6b5a528c Florent Chuffart
  }
420 6b5a528c Florent Chuffart
	return(list(clusters, lod_scores))
421 6b5a528c Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed lod scores.
422 6b5a528c Florent Chuffart
}, ex=function(){	
423 6b5a528c Florent Chuffart
	# Dealing with a region of interest
424 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
425 6b5a528c Florent Chuffart
	samples = list()
426 6b5a528c Florent Chuffart
	for (i in 1:3) {
427 6b5a528c Florent Chuffart
		# Create TF output
428 6b5a528c Florent Chuffart
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
429 6b5a528c Florent Chuffart
		outputs = dfadd(NULL,tf_nuc)
430 6b5a528c Florent Chuffart
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
431 6b5a528c Florent Chuffart
		# Generate corresponding reads
432 6b5a528c Florent Chuffart
		nb_reads = round(runif(1,170,230))
433 6b5a528c Florent Chuffart
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
434 6b5a528c Florent Chuffart
		u_reads = sort(unique(reads))
435 6b5a528c Florent Chuffart
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
436 6b5a528c Florent Chuffart
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
437 6b5a528c Florent Chuffart
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
438 6b5a528c Florent Chuffart
		u_reads = u_reads + shifts
439 6b5a528c Florent Chuffart
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)), 
440 6b5a528c Florent Chuffart
		                         "V2" = u_reads, 
441 6b5a528c Florent Chuffart
														 "V3" = strands, 
442 6b5a528c Florent Chuffart
														 "V4" = counts), stringsAsFactors=FALSE)
443 6b5a528c Florent Chuffart
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
444 6b5a528c Florent Chuffart
	}
445 6b5a528c Florent Chuffart
	print(aggregate_intra_strain_nucs(samples))
446 6b5a528c Florent Chuffart
})
447 6b5a528c Florent Chuffart
448 7646593d Florent Chuffart
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
449 7646593d Florent Chuffart
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
450 7646593d Florent Chuffart
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
451 7646593d Florent Chuffart
roi_index ##<< the index of the roi involved
452 7646593d Florent Chuffart
) {
453 7646593d Florent Chuffart
	if  (length(partial_strain_maps) == 0 ){
454 7646593d Florent Chuffart
		print(paste("Empty partial_strain_maps for roi", roi_index, "ands strain", strain, "." ))
455 7646593d Florent Chuffart
    tmp_strain_maps = list()  
456 7646593d Florent Chuffart
	} else {
457 7646593d Florent Chuffart
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
458 7646593d Florent Chuffart
			tmp_nuc = partial_strain_maps[[i]]
459 7646593d Florent Chuffart
			tmp_nuc_as_list = list()
460 7646593d Florent Chuffart
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
461 7646593d Florent Chuffart
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
462 7646593d Florent Chuffart
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
463 7646593d Florent Chuffart
			tmp_nuc_as_list[["roi_index"]] = roi_index
464 7646593d Florent Chuffart
			tmp_nuc_as_list[["index_nuc"]] = i
465 7646593d Florent Chuffart
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
466 7646593d Florent Chuffart
			all_original_reads = c()
467 7646593d Florent Chuffart
			for (j in 1:length(tmp_nuc$nucs)) {
468 7646593d Florent Chuffart
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
469 7646593d Florent Chuffart
			}
470 7646593d Florent Chuffart
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
471 7646593d Florent Chuffart
			if (tmp_nuc$wp) {
472 7646593d Florent Chuffart
				tmp_nuc_as_list[["lod_1"]] = signif(tmp_nuc$nucs[[2]]$lod_score,5)
473 7646593d Florent Chuffart
				tmp_nuc_as_list[["lod_2"]] = signif(tmp_nuc$nucs[[3]]$lod_score,5)
474 7646593d Florent Chuffart
			} else {
475 7646593d Florent Chuffart
				tmp_nuc_as_list[["lod_1"]] = NA
476 7646593d Florent Chuffart
				tmp_nuc_as_list[["lod_2"]] = NA							
477 7646593d Florent Chuffart
			}
478 7646593d Florent Chuffart
      return(tmp_nuc_as_list)
479 7646593d Florent Chuffart
    })
480 7646593d Florent Chuffart
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
481 7646593d Florent Chuffart
	}
482 7646593d Florent Chuffart
  return(data.frame(tmp_strain_maps))
483 7646593d Florent Chuffart
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
484 7646593d Florent Chuffart
}
485 6b5a528c Florent Chuffart
486 6b5a528c Florent Chuffart
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
487 6b5a528c Florent Chuffart
### This function aligns nucs between two strains for a given genome region.
488 6b5a528c Florent Chuffart
replicates, ##<< Set of replicates, ideally 3 per strain. 
489 6b5a528c Florent Chuffart
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
490 6b5a528c Florent Chuffart
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
491 6b5a528c Florent Chuffart
corr_thres=0.5, ##<< Correlation threshold.
492 729c934e Florent Chuffart
lod_thres=100, ##<< LOD cut off.
493 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
494 6b5a528c Florent Chuffart
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.	
495 6b5a528c Florent Chuffart
) {
496 6b5a528c Florent Chuffart
	if (length(replicates) < 2) {
497 6b5a528c Florent Chuffart
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
498 6b5a528c Florent Chuffart
	} else if (length(replicates) > 2) {
499 6b5a528c Florent Chuffart
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")			
500 6b5a528c Florent Chuffart
	}
501 6b5a528c Florent Chuffart
	common_nuc = NULL
502 6b5a528c Florent Chuffart
	lod_scores = c()
503 6b5a528c Florent Chuffart
	chr = replicates[[1]][[1]]$roi$chr
504 6b5a528c Florent Chuffart
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
505 6b5a528c Florent Chuffart
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
506 6b5a528c Florent Chuffart
	strain_ref1 = replicates[[1]][[1]]$strain
507 6b5a528c Florent Chuffart
	strain_ref2 = replicates[[2]][[1]]$strain
508 6b5a528c Florent Chuffart
	big_roi = replicates[[1]][[1]]$roi
509 a0b91fee Florent Chuffart
  orig_big_roi = replicates[[1]][[1]]$orig_roi
510 6b5a528c Florent Chuffart
	if(big_roi$end - big_roi$begin < 0) {
511 6b5a528c Florent Chuffart
		tmp_begin = big_roi$begin
512 6b5a528c Florent Chuffart
		big_roi$begin =  big_roi$end
513 6b5a528c Florent Chuffart
		big_roi$end =  tmp_begin
514 6b5a528c Florent Chuffart
	}
515 6b5a528c Florent Chuffart
	# GO!
516 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref1)) {
517 6b5a528c Florent Chuffart
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
518 6b5a528c Florent Chuffart
	}
519 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref2)) {
520 6b5a528c Florent Chuffart
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
521 6b5a528c Florent Chuffart
  }
522 1d833b97 Florent Chuffart
  # foo <<- wp_nucs_strain_ref1
523 1d833b97 Florent Chuffart
  # print(apply(t(wp_nucs_strain_ref1), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
524 1d833b97 Florent Chuffart
  # print(apply(t(wp_nucs_strain_ref2), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
525 6b5a528c Florent Chuffart
	# dealing with matching_nas
526 6b5a528c Florent Chuffart
	lws = c()
527 6b5a528c Florent Chuffart
	ups = c()
528 6b5a528c Florent Chuffart
	for (na in wp_nucs_strain_ref2) {
529 6b5a528c Florent Chuffart
		lws = c(lws, na$lower_bound)
530 6b5a528c Florent Chuffart
		ups = c(ups, na$upper_bound)
531 6b5a528c Florent Chuffart
	}
532 6b5a528c Florent Chuffart
533 6b5a528c Florent Chuffart
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))			
534 6b5a528c Florent Chuffart
	roi_strain_ref1 = NULL
535 6b5a528c Florent Chuffart
	roi_strain_ref2 = NULL
536 6b5a528c Florent Chuffart
	if (length(wp_nucs_strain_ref1) > 0) {
537 6b5a528c Florent Chuffart
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
538 6b5a528c Florent Chuffart
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
539 6b5a528c Florent Chuffart
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
540 6b5a528c Florent Chuffart
			# Filtering on Well Positionned
541 883439d6 Florent Chuffart
			if (nuc_strain_ref1$wp) {
542 6b5a528c Florent Chuffart
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
543 a0b91fee Florent Chuffart
				roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi=orig_big_roi, config=config) 
544 1d833b97 Florent Chuffart
        if (!is.null(roi_strain_ref2)){
545 6b5a528c Florent Chuffart
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
546 6b5a528c Florent Chuffart
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
547 6b5a528c Florent Chuffart
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
548 6b5a528c Florent Chuffart
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
549 6b5a528c Florent Chuffart
					for (index_nuc_strain_ref2 in matching_nas) {
550 6b5a528c Florent Chuffart
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
551 6b5a528c Florent Chuffart
						# Filtering on Well Positionned
552 883439d6 Florent Chuffart
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)             
553 a0b91fee Florent Chuffart
						if (!is.null(translate_roi(nuc_strain_ref2_to_roi, strain_ref1, big_roi=orig_big_roi, config=config)) &  
554 1e34bb1f Florent Chuffart
                nuc_strain_ref2$wp) {
555 6b5a528c Florent Chuffart
							# Filtering on correlation Score and collecting reads					
556 6b5a528c Florent Chuffart
							SKIP = FALSE
557 6b5a528c Florent Chuffart
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
558 6b5a528c Florent Chuffart
							reads_strain_ref1 = c()
559 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref1$nucs){
560 6b5a528c Florent Chuffart
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)							
561 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
562 6b5a528c Florent Chuffart
									SKIP = TRUE
563 6b5a528c Florent Chuffart
								}
564 6b5a528c Florent Chuffart
							}
565 6b5a528c Florent Chuffart
							reads_strain_ref2 = c()
566 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref2$nucs){
567 6b5a528c Florent Chuffart
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)							
568 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
569 6b5a528c Florent Chuffart
									SKIP = TRUE
570 6b5a528c Florent Chuffart
								}
571 6b5a528c Florent Chuffart
							}
572 6b5a528c Florent Chuffart
							# Filtering on correlation Score						
573 6b5a528c Florent Chuffart
							if (!SKIP) {
574 6b5a528c Florent Chuffart
								# tranlation of reads into strain 2 coords
575 6b5a528c Florent Chuffart
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
576 6b5a528c Florent Chuffart
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
577 6b5a528c Florent Chuffart
								lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2)
578 6b5a528c Florent Chuffart
								lod_scores = c(lod_scores, lod_score)
579 6b5a528c Florent Chuffart
								# Filtering on LOD Score						
580 729c934e Florent Chuffart
								if (lod_score < lod_thres) {
581 6b5a528c Florent Chuffart
									tmp_nuc = list()
582 6b5a528c Florent Chuffart
									# strain_ref1
583 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
584 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
585 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
586 6b5a528c Florent Chuffart
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
587 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
588 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
589 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
590 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr1_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[1]]$corr,5)
591 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr2_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[2]]$corr,5)
592 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr3_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[3]]$corr,5)
593 6b5a528c Florent Chuffart
									# strain_ref2
594 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
595 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
596 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
597 6b5a528c Florent Chuffart
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
598 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
599 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
600 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
601 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr1_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[1]]$corr,5)
602 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr2_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[2]]$corr,5)
603 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr3_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[3]]$corr,5)
604 6b5a528c Florent Chuffart
									# common
605 6b5a528c Florent Chuffart
									tmp_nuc[["lod_score"]] = signif(lod_score,5)
606 6b5a528c Florent Chuffart
									# print(tmp_nuc)
607 6b5a528c Florent Chuffart
									common_nuc = dfadd(common_nuc, tmp_nuc)
608 6b5a528c Florent Chuffart
								}
609 6b5a528c Florent Chuffart
							}
610 6b5a528c Florent Chuffart
						}
611 6b5a528c Florent Chuffart
					}				
612 6b5a528c Florent Chuffart
				} else {
613 6b5a528c Florent Chuffart
		      print("WARNING! No roi for strain ref 2.")
614 6b5a528c Florent Chuffart
			  }
615 6b5a528c Florent Chuffart
		  }
616 6b5a528c Florent Chuffart
		}
617 6b5a528c Florent Chuffart
		
618 6b5a528c Florent Chuffart
		if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
619 6b5a528c Florent Chuffart
			index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
620 6b5a528c Florent Chuffart
			to_remove_list = c()			
621 6b5a528c Florent Chuffart
			for (i in 1:length(index_redundant)) {	
622 6b5a528c Florent Chuffart
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
623 6b5a528c Florent Chuffart
				  to_remove = index_redundant[i]
624 6b5a528c Florent Chuffart
				}	 else {
625 6b5a528c Florent Chuffart
					to_remove = index_redundant[i] + 1
626 6b5a528c Florent Chuffart
			  } 		
627 6b5a528c Florent Chuffart
				to_remove_list = c(to_remove_list, to_remove)
628 6b5a528c Florent Chuffart
			}
629 6b5a528c Florent Chuffart
			common_nuc = common_nuc[-to_remove_list,]
630 6b5a528c Florent Chuffart
		}
631 6b5a528c Florent Chuffart
		
632 6b5a528c Florent Chuffart
		if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
633 8c87ee64 Florent Chuffart
			index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
634 6b5a528c Florent Chuffart
			to_remove_list = c()			
635 6b5a528c Florent Chuffart
			for (i in 1:length(index_redundant)) {	
636 6b5a528c Florent Chuffart
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
637 6b5a528c Florent Chuffart
				  to_remove = index_redundant[i]
638 6b5a528c Florent Chuffart
				}	 else {
639 6b5a528c Florent Chuffart
					to_remove = index_redundant[i] + 1
640 6b5a528c Florent Chuffart
			  } 		
641 6b5a528c Florent Chuffart
				to_remove_list = c(to_remove_list, to_remove)
642 6b5a528c Florent Chuffart
			}
643 6b5a528c Florent Chuffart
			common_nuc = common_nuc[-to_remove_list,]
644 6b5a528c Florent Chuffart
		}
645 6b5a528c Florent Chuffart
				
646 6b5a528c Florent Chuffart
		return(list(common_nuc, lod_scores))
647 6b5a528c Florent Chuffart
	} else {
648 6b5a528c Florent Chuffart
		print("WARNING, no nucs for strain_ref1.")
649 6b5a528c Florent Chuffart
		return(NULL)
650 6b5a528c Florent Chuffart
	}
651 6b5a528c Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed lod scores.
652 6b5a528c Florent Chuffart
}, ex=function(){	
653 1d833b97 Florent Chuffart
654 1d833b97 Florent Chuffart
    # Define new translate_roi function...
655 1d833b97 Florent Chuffart
    translate_roi = function(roi, strain2, big_roi=NULL, config=NULL) {
656 1d833b97 Florent Chuffart
      return(roi)
657 1d833b97 Florent Chuffart
    }
658 1d833b97 Florent Chuffart
    # Binding it by uncomment follwing lines.
659 1d833b97 Florent Chuffart
    unlockBinding("translate_roi", as.environment("package:nucleominer"))
660 1d833b97 Florent Chuffart
    unlockBinding("translate_roi", getNamespace("nucleominer"))
661 1d833b97 Florent Chuffart
    assign("translate_roi", translate_roi, "package:nucleominer")
662 1d833b97 Florent Chuffart
    assign("translate_roi", translate_roi, getNamespace("nucleominer"))
663 1d833b97 Florent Chuffart
    lockBinding("translate_roi", getNamespace("nucleominer"))
664 1d833b97 Florent Chuffart
    lockBinding("translate_roi", as.environment("package:nucleominer"))  
665 1d833b97 Florent Chuffart
666 6b5a528c Florent Chuffart
	# Dealing with a region of interest
667 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
668 6b5a528c Florent Chuffart
	roi2 = translate_roi(roi, roi$strain_ref1)
669 6b5a528c Florent Chuffart
	replicates = list()
670 6b5a528c Florent Chuffart
	for (j in 1:2) {
671 6b5a528c Florent Chuffart
		samples = list()
672 6b5a528c Florent Chuffart
		for (i in 1:3) {
673 6b5a528c Florent Chuffart
			# Create TF output
674 6b5a528c Florent Chuffart
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
675 6b5a528c Florent Chuffart
			outputs = dfadd(NULL,tf_nuc)
676 6b5a528c Florent Chuffart
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
677 6b5a528c Florent Chuffart
			# Generate corresponding reads
678 6b5a528c Florent Chuffart
			nb_reads = round(runif(1,170,230))
679 6b5a528c Florent Chuffart
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
680 6b5a528c Florent Chuffart
			u_reads = sort(unique(reads))
681 6b5a528c Florent Chuffart
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
682 6b5a528c Florent Chuffart
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
683 6b5a528c Florent Chuffart
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
684 6b5a528c Florent Chuffart
			u_reads = u_reads + shifts
685 6b5a528c Florent Chuffart
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)), 
686 6b5a528c Florent Chuffart
			                         "V2" = u_reads, 
687 6b5a528c Florent Chuffart
															 "V3" = strands, 
688 6b5a528c Florent Chuffart
															 "V4" = counts), stringsAsFactors=FALSE)
689 6b5a528c Florent Chuffart
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
690 6b5a528c Florent Chuffart
		}
691 6b5a528c Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
692 6b5a528c Florent Chuffart
	}
693 6b5a528c Florent Chuffart
	print(align_inter_strain_nucs(replicates))
694 6b5a528c Florent Chuffart
})
695 6b5a528c Florent Chuffart
696 6b5a528c Florent Chuffart
697 6b5a528c Florent Chuffart
698 6b5a528c Florent Chuffart
699 6b5a528c Florent Chuffart
700 6b5a528c Florent Chuffart
701 6b5a528c Florent Chuffart
702 6b5a528c Florent Chuffart
703 6b5a528c Florent Chuffart
704 6b5a528c Florent Chuffart
705 6b5a528c Florent Chuffart
706 6b5a528c Florent Chuffart
707 6b5a528c Florent Chuffart
708 6b5a528c Florent Chuffart
709 6b5a528c Florent Chuffart
710 6b5a528c Florent Chuffart
711 6b5a528c Florent Chuffart
712 6b5a528c Florent Chuffart
713 6b5a528c Florent Chuffart
714 6b5a528c Florent Chuffart
715 6b5a528c Florent Chuffart
716 6b5a528c Florent Chuffart
717 6b5a528c Florent Chuffart
718 6b5a528c Florent Chuffart
719 6b5a528c Florent Chuffart
720 6b5a528c Florent Chuffart
721 6b5a528c Florent Chuffart
fetch_mnase_replicates = function(# Prefetch data
722 6b5a528c Florent Chuffart
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
723 6b5a528c Florent Chuffart
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
724 6b5a528c Florent Chuffart
roi, ##<< Region of interest.
725 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
726 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
727 6b5a528c Florent Chuffart
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
728 6b5a528c Florent Chuffart
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
729 6b5a528c Florent Chuffart
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
730 6b5a528c Florent Chuffart
) {
731 6b5a528c Florent Chuffart
	samples=list()
732 6b5a528c Florent Chuffart
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
733 6b5a528c Florent Chuffart
	for (i in samples_ids) {
734 6b5a528c Florent Chuffart
		sample = as.list(all_samples[all_samples$id==i,])
735 a0b91fee Florent Chuffart
    sample$orig_roi = roi
736 8e9facd8 Florent Chuffart
    sample$roi = translate_roi(roi, sample$strain, config = config)
737 6b5a528c Florent Chuffart
		if (get_genome) {
738 6b5a528c Florent Chuffart
			# Get Genome
739 1d833b97 Florent Chuffart
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]		
740 6b5a528c Florent Chuffart
		}
741 6b5a528c Florent Chuffart
		# Get inputs
742 a52f85d7 Florent Chuffart
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE) 
743 6b5a528c Florent Chuffart
		sample$total_reads = sum(sample$inputs[,4]) 	
744 6b5a528c Florent Chuffart
		if (!only_fetch) {
745 6b5a528c Florent Chuffart
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
746 6b5a528c Florent Chuffart
	  }
747 6b5a528c Florent Chuffart
	  # Get TF outputs for Mnase_Seq samples
748 6b5a528c Florent Chuffart
		if (sample$marker == "Mnase_Seq" & get_ouputs) {	
749 a52f85d7 Florent Chuffart
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
750 6b5a528c Florent Chuffart
			if (!only_fetch) {
751 6b5a528c Florent Chuffart
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
752 6b5a528c Florent Chuffart
  		}
753 6b5a528c Florent Chuffart
		}
754 6b5a528c Florent Chuffart
		samples[[length(samples) + 1]] = sample		
755 6b5a528c Florent Chuffart
	}
756 6b5a528c Florent Chuffart
  return(samples)
757 6b5a528c Florent Chuffart
}
758 6b5a528c Florent Chuffart
759 6b5a528c Florent Chuffart
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
760 6b5a528c Florent Chuffart
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
761 6b5a528c Florent Chuffart
region1, ##<< Original regions. 
762 6b5a528c Florent Chuffart
region2 ##<< Regions to substract.
763 6b5a528c Florent Chuffart
) {
764 6b5a528c Florent Chuffart
  rec_substract_region = function(region1, region2) {
765 6b5a528c Florent Chuffart
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
766 6b5a528c Florent Chuffart
    cur_fuzzy = region1[i,]
767 6b5a528c Florent Chuffart
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
768 6b5a528c Florent Chuffart
    if (length(inter_wp[,1]) > 0) {
769 6b5a528c Florent Chuffart
      ret = c()
770 6b5a528c Florent Chuffart
      for (j in 1:length(inter_wp[,1])) {
771 6b5a528c Florent Chuffart
        cur_wp = inter_wp[j,]
772 6b5a528c Florent Chuffart
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
773 6b5a528c Florent Chuffart
          # remove cur_fuzzy
774 6b5a528c Florent Chuffart
          ret = c()
775 6b5a528c Florent Chuffart
          break
776 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
777 6b5a528c Florent Chuffart
          # crop fuzzy 
778 6b5a528c Florent Chuffart
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1 
779 6b5a528c Florent Chuffart
          ret = cur_fuzzy
780 6b5a528c Florent Chuffart
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
781 6b5a528c Florent Chuffart
          # crop fuzzy 
782 6b5a528c Florent Chuffart
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1 
783 6b5a528c Florent Chuffart
          ret = cur_fuzzy
784 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
785 6b5a528c Florent Chuffart
          # split fuzzy
786 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_1 = cur_fuzzy
787 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1 
788 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_2 = cur_fuzzy
789 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1 
790 6b5a528c Florent Chuffart
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
791 6b5a528c Florent Chuffart
          # print(ret)
792 6b5a528c Florent Chuffart
          # ret = cur_fuzzy
793 6b5a528c Florent Chuffart
          break
794 6b5a528c Florent Chuffart
        } else {
795 6b5a528c Florent Chuffart
          stop("WARNING NO ADAPTED CASE!")
796 6b5a528c Florent Chuffart
        }          
797 6b5a528c Florent Chuffart
      }
798 6b5a528c Florent Chuffart
      return(ret)
799 6b5a528c Florent Chuffart
    } else {
800 6b5a528c Florent Chuffart
      return(cur_fuzzy)
801 6b5a528c Florent Chuffart
    }
802 6b5a528c Florent Chuffart
  })
803 6b5a528c Florent Chuffart
  }
804 6b5a528c Florent Chuffart
  non_inter_fuzzy = rec_substract_region(region1, region2)
805 6b5a528c Florent Chuffart
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
806 b20637ed Florent Chuffart
  tmp_ulist = unlist(non_inter_fuzzy)
807 b20637ed Florent Chuffart
  tmp_names = names(tmp_ulist)[1:4]
808 b20637ed Florent Chuffart
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE) 
809 b20637ed Florent Chuffart
  names(non_inter_fuzzy) = tmp_names
810 b20637ed Florent Chuffart
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)     
811 b20637ed Florent Chuffart
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)     
812 b20637ed Florent Chuffart
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
813 b20637ed Florent Chuffart
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
814 b20637ed Florent Chuffart
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]      
815 b20637ed Florent Chuffart
  return(non_inter_fuzzy)
816 6b5a528c Florent Chuffart
}
817 6b5a528c Florent Chuffart
818 6b5a528c Florent Chuffart
union_regions = function(# Aggregate regions that intersect themnselves.
819 6b5a528c Florent Chuffart
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.  
820 6b5a528c Florent Chuffart
regions ##<< The Regions to be aggregated
821 6b5a528c Florent Chuffart
) {
822 6b5a528c Florent Chuffart
  old_length = length(regions[,1])
823 6b5a528c Florent Chuffart
  new_length = 0
824 6b5a528c Florent Chuffart
    
825 6b5a528c Florent Chuffart
  while (old_length != new_length) {
826 6b5a528c Florent Chuffart
    regions = regions[order(regions$lower_bound), ]
827 6b5a528c Florent Chuffart
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 0, TRUE)      
828 6b5a528c Florent Chuffart
    vec_end_1 = which(regions$stop)
829 6b5a528c Florent Chuffart
    if (length(vec_end_1) == 0) {
830 6b5a528c Florent Chuffart
      vec_end_1 = c(length(regions$stop))      
831 6b5a528c Florent Chuffart
    } 
832 6b5a528c Florent Chuffart
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
833 6b5a528c Florent Chuffart
      vec_end_1 = c(vec_end_1, length(regions$stop))
834 6b5a528c Florent Chuffart
    }
835 6b5a528c Florent Chuffart
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
836 6b5a528c Florent Chuffart
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
837 6b5a528c Florent Chuffart
      chr = regions$chr[vec_beg_1[i]]
838 6b5a528c Florent Chuffart
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
839 6b5a528c Florent Chuffart
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
840 6b5a528c Florent Chuffart
      roi_index = regions$roi_index[vec_beg_1[i]] 
841 6b5a528c Florent Chuffart
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, roi_index=roi_index))
842 6b5a528c Florent Chuffart
      })
843 59ad95ca Florent Chuffart
    union = collapse_regions(union)
844 6b5a528c Florent Chuffart
    old_length = length(regions[,1])
845 6b5a528c Florent Chuffart
    new_length = length(union[,1])
846 6b5a528c Florent Chuffart
    regions = union
847 6b5a528c Florent Chuffart
  }
848 6b5a528c Florent Chuffart
  return(union)
849 6b5a528c Florent Chuffart
}
850 6b5a528c Florent Chuffart
851 6b5a528c Florent Chuffart
remove_aligned_wp = function(# Remove wp nucs from common nucs list.
852 6b5a528c Florent Chuffart
### It is based on common wp nucs index on nucs and region.  
853 6b5a528c Florent Chuffart
strain_maps, ##<< Nuc maps.
854 6b5a528c Florent Chuffart
roi_index, ##<< The region of interest index.
855 6b5a528c Florent Chuffart
tmp_common_nucs, ##<< the list of wp nucs.
856 6b5a528c Florent Chuffart
strain##<< The strain to consider.
857 6b5a528c Florent Chuffart
){
858 6b5a528c Florent Chuffart
  fuzzy_nucs = strain_maps[[strain]]
859 6b5a528c Florent Chuffart
  fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$roi_index == roi_index,]
860 6b5a528c Florent Chuffart
  fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
861 6b5a528c Florent Chuffart
  if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
862 6b5a528c Florent Chuffart
  if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
863 6b5a528c Florent Chuffart
  anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
864 6b5a528c Florent Chuffart
  fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
865 6b5a528c Florent Chuffart
  return(fuzzy_nucs)
866 6b5a528c Florent Chuffart
}
867 6b5a528c Florent Chuffart
868 6b5a528c Florent Chuffart
translate_regions = function(# Translate a list of regions from a strain ref to another.
869 6b5a528c Florent Chuffart
### This function is an eloborated call to translate_roi.
870 6b5a528c Florent Chuffart
regions, ##<< Regions to be translated. 
871 6b5a528c Florent Chuffart
combi, ##<< Combination of strains.
872 6b5a528c Florent Chuffart
roi_index, ##<< The region of interest index.
873 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
874 6b5a528c Florent Chuffart
roi ##<< The region of interest.
875 6b5a528c Florent Chuffart
) {
876 6b5a528c Florent Chuffart
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
877 6b5a528c Florent Chuffart
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
878 5a26597a Florent Chuffart
    big_roi =  roi
879 5a26597a Florent Chuffart
    trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], config = config, big_roi = big_roi) 
880 6b5a528c Florent Chuffart
    data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), roi_index=roi_index))
881 6b5a528c Florent Chuffart
    })
882 59ad95ca Florent Chuffart
  return(collapse_regions(tr_regions))
883 59ad95ca Florent Chuffart
}
884 59ad95ca Florent Chuffart
885 59ad95ca Florent Chuffart
collapse_regions = function(# reformat an "apply  manipulated" list of regions
886 59ad95ca Florent Chuffart
### Utils to reformat an "apply  manipulated" list of regions
887 59ad95ca Florent Chuffart
regions ##< a list of regions
888 59ad95ca Florent Chuffart
) {
889 b20637ed Florent Chuffart
  regions = do.call(rbind, regions)      
890 59ad95ca Florent Chuffart
  regions$chr = as.character(regions$chr)     
891 59ad95ca Florent Chuffart
  regions$chr = as.numeric(regions$chr)     
892 59ad95ca Florent Chuffart
  regions$lower_bound = as.numeric(regions$lower_bound)
893 59ad95ca Florent Chuffart
  regions$upper_bound = as.numeric(regions$upper_bound)
894 59ad95ca Florent Chuffart
  regions = regions[order(regions$lower_bound),]      
895 59ad95ca Florent Chuffart
  return(regions)  
896 6b5a528c Florent Chuffart
}
897 6b5a528c Florent Chuffart
898 6b5a528c Florent Chuffart
extract_wp = function(# Extract wp nucs from nuc map. 
899 6b5a528c Florent Chuffart
### Function based on common wp nuc index and roi_index.
900 6b5a528c Florent Chuffart
strain_maps, ##<< Nuc maps.
901 6b5a528c Florent Chuffart
roi_index, ##<< The region of interest index.
902 6b5a528c Florent Chuffart
strain, ##<< The strain to consider.
903 6b5a528c Florent Chuffart
tmp_common_nucs ##<< the list of wp nucs.
904 6b5a528c Florent Chuffart
) {
905 6b5a528c Florent Chuffart
  wp_nucs = apply(t(tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]), 2, function(i) {
906 6b5a528c Florent Chuffart
    tmp_wp_nucs = strain_maps[[strain]]
907 6b5a528c Florent Chuffart
    tmp_wp_nucs = tmp_wp_nucs[tmp_wp_nucs$roi_index == roi_index & tmp_wp_nucs$index_nuc == i,]
908 6b5a528c Florent Chuffart
    return(tmp_wp_nucs)
909 6b5a528c Florent Chuffart
    })
910 59ad95ca Florent Chuffart
  return(collapse_regions(wp_nucs))
911 6b5a528c Florent Chuffart
}
912 6b5a528c Florent Chuffart
913 6b5a528c Florent Chuffart
914 6b5a528c Florent Chuffart
915 6b5a528c Florent Chuffart
916 6b5a528c Florent Chuffart
917 6b5a528c Florent Chuffart
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
918 6b5a528c Florent Chuffart
### The fucntion is no more necessary since we remove "big_roi" bug in translate_roi function.
919 6b5a528c Florent Chuffart
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
920 6b5a528c Florent Chuffart
roi, ##<< The region of interest.
921 1d833b97 Florent Chuffart
strain, ##<< The strain to consider.
922 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
923 6b5a528c Florent Chuffart
) {
924 8e9facd8 Florent Chuffart
  tr_roi = translate_roi(roi, strain, config = config)
925 6b5a528c Florent Chuffart
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
926 6b5a528c Florent Chuffart
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
927 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
928 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
929 6b5a528c Florent Chuffart
  }
930 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
931 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
932 6b5a528c Florent Chuffart
  }
933 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
934 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
935 6b5a528c Florent Chuffart
  }
936 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
937 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
938 6b5a528c Florent Chuffart
  }
939 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
940 6b5a528c Florent Chuffart
  return(tmp_fuzzy_nucs)
941 6b5a528c Florent Chuffart
}
942 6b5a528c Florent Chuffart
943 6b5a528c Florent Chuffart
944 6b5a528c Florent Chuffart
get_fuzzy = function(# Compute the fuzzy nucs.
945 6b5a528c Florent Chuffart
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting fuzzy regions. It will be take into account in the count read part og the pipeline.
946 6b5a528c Florent Chuffart
combi, ##<< The strain combination to consider.
947 6b5a528c Florent Chuffart
roi, ##<< The region of interest.
948 6b5a528c Florent Chuffart
roi_index, ##<< The region of interest index.
949 6b5a528c Florent Chuffart
strain_maps, ##<< Nuc maps.
950 1d833b97 Florent Chuffart
common_nuc_results, ##<< Common wp nuc maps
951 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
952 6b5a528c Florent Chuffart
) {
953 6b5a528c Florent Chuffart
  print(roi_index)
954 6b5a528c Florent Chuffart
  PLOT = FALSE
955 6b5a528c Florent Chuffart
  tmp_common_nucs = common_nuc_results[[paste(combi[1], combi[2], sep="_")]]
956 6b5a528c Florent Chuffart
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$roi_index == roi_index, ]
957 6b5a528c Florent Chuffart
958 6b5a528c Florent Chuffart
  print(paste("Dealing with fuzzy from", combi[1]))
959 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs_1 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[1])
960 1d833b97 Florent Chuffart
  tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1], config)
961 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs_1[,1]) == 0) {return(NULL)}
962 6b5a528c Florent Chuffart
  agg_fuzzy_1 = union_regions(tmp_fuzzy_nucs_1)    
963 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(agg_fuzzy_1[,1])) {
964 6b5a528c Florent Chuffart
    lines(c(agg_fuzzy_1[i,]$lower_bound, agg_fuzzy_1[i,]$upper_bound), c(+3.1,+3.1), col=2)
965 6b5a528c Florent Chuffart
  }
966 6b5a528c Florent Chuffart
967 6b5a528c Florent Chuffart
  print(paste("Dealing with fuzzy from ", combi[2]))
968 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs_2 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[2])
969 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs_2[,1]) == 0) {return(NULL)}
970 6b5a528c Florent Chuffart
  agg_fuzzy_2 = union_regions(tmp_fuzzy_nucs_2)      
971 1d833b97 Florent Chuffart
  agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2], config)
972 ddd6e221 Florent Chuffart
  tr_agg_fuzzy_2 = translate_regions(agg_fuzzy_2, combi, roi_index, roi=roi, config=config)
973 1d833b97 Florent Chuffart
  tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2], config)
974 6b5a528c Florent Chuffart
  # tr_agg_fuzzy_2 = union_regions(tr_agg_fuzzy_2)
975 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(tr_agg_fuzzy_2[,1])) {
976 6b5a528c Florent Chuffart
    lines(c(tr_agg_fuzzy_2[i,]$lower_bound, tr_agg_fuzzy_2[i,]$upper_bound), c(+3.3,+3.3), col=2)
977 6b5a528c Florent Chuffart
  }
978 6b5a528c Florent Chuffart
  
979 6b5a528c Florent Chuffart
  print("Dealing with fuzzy from both...")
980 6b5a528c Florent Chuffart
  all_fuzzy = union_regions(rbind(agg_fuzzy_1, tr_agg_fuzzy_2))
981 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(all_fuzzy[,1])) {
982 6b5a528c Florent Chuffart
    lines(c(all_fuzzy[i,]$lower_bound, all_fuzzy[i,]$upper_bound), c(+3.2, +3.2), col=1)
983 6b5a528c Florent Chuffart
  }
984 6b5a528c Florent Chuffart
  
985 6b5a528c Florent Chuffart
  print(paste("Dealing with wp from", combi[1]))
986 6b5a528c Florent Chuffart
  wp_nucs_1 = extract_wp(strain_maps, roi_index, combi[1], tmp_common_nucs)
987 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(wp_nucs_1[,1])) {
988 6b5a528c Florent Chuffart
    lines(c(wp_nucs_1[i,]$lower_bound, wp_nucs_1[i,]$upper_bound), c(+3.5,+3.5), col=3)
989 6b5a528c Florent Chuffart
  }
990 6b5a528c Florent Chuffart
991 6b5a528c Florent Chuffart
  print(paste("Dealing with wp from", combi[2]))
992 6b5a528c Florent Chuffart
  wp_nucs_2 = extract_wp(strain_maps, roi_index, combi[2], tmp_common_nucs)
993 ddd6e221 Florent Chuffart
  tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, roi_index, roi=roi, config=config)
994 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(tr_wp_nucs_2[,1])) {
995 6b5a528c Florent Chuffart
    lines(c(tr_wp_nucs_2[i,]$lower_bound, tr_wp_nucs_2[i,]$upper_bound), c(+3.7,+3.7), col=3)
996 6b5a528c Florent Chuffart
  }
997 6b5a528c Florent Chuffart
998 6b5a528c Florent Chuffart
  print("Dealing with wp from both...")
999 6b5a528c Florent Chuffart
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
1000 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(all_wp[,1])) {
1001 6b5a528c Florent Chuffart
    lines(c(all_wp[i,]$lower_bound, all_wp[i,]$upper_bound), c(+3.6, +3.6), col=1)
1002 6b5a528c Florent Chuffart
  }  
1003 6b5a528c Florent Chuffart
1004 6b5a528c Florent Chuffart
  print("Dealing with fuzzy and wp...")
1005 6b5a528c Florent Chuffart
  non_inter_fuzzy = substract_region(all_fuzzy, all_wp)
1006 6b5a528c Florent Chuffart
  if (is.null(non_inter_fuzzy)) { return(NULL) }
1007 6b5a528c Florent Chuffart
  
1008 6b5a528c Florent Chuffart
  non_inter_fuzzy$len = non_inter_fuzzy$upper_bound - non_inter_fuzzy$lower_bound
1009 6b5a528c Florent Chuffart
  # non_inter_fuzzy = non_inter_fuzzy[non_inter_fuzzy$len >= min_fuzz_width,]
1010 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(non_inter_fuzzy[,1])) {
1011 6b5a528c Florent Chuffart
    lines(c(non_inter_fuzzy[i,]$lower_bound, non_inter_fuzzy[i,]$upper_bound), c(+3.9, +3.9), col=1)
1012 6b5a528c Florent Chuffart
  }
1013 6b5a528c Florent Chuffart
  
1014 6b5a528c Florent Chuffart
  non_inter_fuzzy$index_nuc = 1:length(non_inter_fuzzy[,1])
1015 6b5a528c Florent Chuffart
  return (non_inter_fuzzy)
1016 6b5a528c Florent Chuffart
}
1017 6b5a528c Florent Chuffart
1018 6b5a528c Florent Chuffart
1019 6b5a528c Florent Chuffart
1020 6b5a528c Florent Chuffart
get_all_reads = function(# Retrieve Reads 
1021 6b5a528c Florent Chuffart
### Retrieve reads for a given marker, combi, form.
1022 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
1023 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
1024 cc54c799 Florent Chuffart
form="wp", ##<< The nuc form to considere.
1025 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1026 6b5a528c Florent Chuffart
) {
1027 6b5a528c Florent Chuffart
	all_reads = NULL
1028 6b5a528c Florent Chuffart
  for (manip in c("Mnase_Seq", marker)) {
1029 6b5a528c Florent Chuffart
    if (form == "fuzzy") {
1030 cc54c799 Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_fuzzy_and_nbreads.tab",sep="")
1031 6b5a528c Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)				
1032 6b5a528c Florent Chuffart
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
1033 6b5a528c Florent Chuffart
      tmp_res$form = form
1034 6b5a528c Florent Chuffart
    } else if (form == "wp") {
1035 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
1036 6b5a528c Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)				
1037 6b5a528c Florent Chuffart
      tmp_res$form = form
1038 6b5a528c Florent Chuffart
    } else if (form == "wpfuzzy") {
1039 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
1040 6b5a528c Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)				
1041 6b5a528c Florent Chuffart
      tmp_res$form = "wp"
1042 cc54c799 Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_fuzzy_and_nbreads.tab",sep="")
1043 6b5a528c Florent Chuffart
  		tmp_res2 = read.table(file=out_filename, header=TRUE)				
1044 6b5a528c Florent Chuffart
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
1045 6b5a528c Florent Chuffart
      tmp_res2$form = "fuzzy"
1046 6b5a528c Florent Chuffart
      tmp_res = rbind(tmp_res, tmp_res2)
1047 6b5a528c Florent Chuffart
    }
1048 6b5a528c Florent Chuffart
		if (is.null(all_reads)) {
1049 6b5a528c Florent Chuffart
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
1050 6b5a528c Florent Chuffart
		}     
1051 6b5a528c Florent Chuffart
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
1052 6b5a528c Florent Chuffart
		all_reads = cbind(all_reads, tmp_res)
1053 6b5a528c Florent Chuffart
  }
1054 6b5a528c Florent Chuffart
  return(all_reads)	
1055 6b5a528c Florent Chuffart
}
1056 6b5a528c Florent Chuffart
1057 6b5a528c Florent Chuffart
get_design = function(# Build the design for deseq
1058 6b5a528c Florent Chuffart
### This function build the design according sample properties.
1059 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
1060 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
1061 6b5a528c Florent Chuffart
all_samples ##<< Global list of samples.
1062 6b5a528c Florent Chuffart
) {
1063 6b5a528c Florent Chuffart
  off1 = 0
1064 6b5a528c Florent Chuffart
  off2 = 0
1065 6b5a528c Florent Chuffart
	manips = c("Mnase_Seq", marker)
1066 6b5a528c Florent Chuffart
	design_rownames = c()
1067 6b5a528c Florent Chuffart
	design_manip = c()
1068 6b5a528c Florent Chuffart
	design_strain = c()
1069 6b5a528c Florent Chuffart
  off2index = function(off) {	
1070 6b5a528c Florent Chuffart
  	switch(toString(off), 
1071 6b5a528c Florent Chuffart
  		"1"=c(0,1,1),
1072 6b5a528c Florent Chuffart
  	  "2"=c(1,0,1),
1073 6b5a528c Florent Chuffart
    	"3"=c(1,1,0), 
1074 6b5a528c Florent Chuffart
  		c(1,1,1)
1075 6b5a528c Florent Chuffart
  		)	
1076 6b5a528c Florent Chuffart
  }
1077 6b5a528c Florent Chuffart
	for (manip in manips) {
1078 6b5a528c Florent Chuffart
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
1079 6b5a528c Florent Chuffart
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
1080 6b5a528c Florent Chuffart
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
1081 6b5a528c Florent Chuffart
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
1082 6b5a528c Florent Chuffart
		} else {
1083 6b5a528c Florent Chuffart
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
1084 6b5a528c Florent Chuffart
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
1085 6b5a528c Florent Chuffart
			}
1086 6b5a528c Florent Chuffart
		}
1087 6b5a528c Florent Chuffart
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))				
1088 6b5a528c Florent Chuffart
		for (strain in combi) {			
1089 6b5a528c Florent Chuffart
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
1090 6b5a528c Florent Chuffart
			design_strain = c(design_strain, rep(strain, length(cols)))
1091 6b5a528c Florent Chuffart
			design_rownames = c(design_rownames, cols)
1092 6b5a528c Florent Chuffart
		}
1093 6b5a528c Florent Chuffart
	}
1094 6b5a528c Florent Chuffart
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
1095 6b5a528c Florent Chuffart
	return(snep_design)	
1096 6b5a528c Florent Chuffart
}
1097 6b5a528c Florent Chuffart
1098 6b5a528c Florent Chuffart
plot_dist_samples = function(# Plot the distribution of reads.
1099 6b5a528c Florent Chuffart
### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
1100 6b5a528c Florent Chuffart
strain, ##<< The strain to considere.
1101 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
1102 6b5a528c Florent Chuffart
res, ##<< Data
1103 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
1104 6b5a528c Florent Chuffart
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
1105 6b5a528c Florent Chuffart
) {			
1106 6b5a528c Florent Chuffart
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
1107 6b5a528c Florent Chuffart
	snepCountTable = res[,cols]
1108 6b5a528c Florent Chuffart
	snepDesign = data.frame(
1109 6b5a528c Florent Chuffart
		row.names = cols,
1110 6b5a528c Florent Chuffart
		manip = rep(marker, length(cols)),
1111 6b5a528c Florent Chuffart
		strain = rep(strain, length(cols))
1112 6b5a528c Florent Chuffart
		)
1113 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
1114 6b5a528c Florent Chuffart
	sizeFactors = estimateSizeFactors(cdsFull)
1115 6b5a528c Florent Chuffart
	# print(sizeFactors[[1]])
1116 6b5a528c Florent Chuffart
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
1117 6b5a528c Florent Chuffart
	if (NEWPLOT) {
1118 6b5a528c Florent Chuffart
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))				
1119 6b5a528c Florent Chuffart
		NEWPLOT = FALSE
1120 6b5a528c Florent Chuffart
	}
1121 6b5a528c Florent Chuffart
	for (it in 1:length(sample_ids)) {
1122 6b5a528c Florent Chuffart
		sample_id = sample_ids[it]
1123 6b5a528c Florent Chuffart
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1124 6b5a528c Florent Chuffart
	}
1125 7164d3ac Florent Chuffart
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1126 6b5a528c Florent Chuffart
}
1127 6b5a528c Florent Chuffart
1128 6b5a528c Florent Chuffart
analyse_design = function(# Launch deseq methods.
1129 6b5a528c Florent Chuffart
### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
1130 6b5a528c Florent Chuffart
snep_design, ##<< The design to considere.
1131 6b5a528c Florent Chuffart
reads ##<< The data to considere. 
1132 6b5a528c Florent Chuffart
) {
1133 6b5a528c Florent Chuffart
	snep_count_table = reads[, rownames(snep_design)]
1134 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1135 6b5a528c Florent Chuffart
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1136 6b5a528c Florent Chuffart
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1137 6b5a528c Florent Chuffart
1138 6b5a528c Florent Chuffart
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1139 6b5a528c Florent Chuffart
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1140 6b5a528c Florent Chuffart
1141 6b5a528c Florent Chuffart
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1142 6b5a528c Florent Chuffart
	return(list(fit1, fit0, snep_design, pvalsGLM))	
1143 6b5a528c Florent Chuffart
}
1144 6b5a528c Florent Chuffart
1145 6b5a528c Florent Chuffart
1146 6b5a528c Florent Chuffart
1147 6b5a528c Florent Chuffart
1148 6b5a528c Florent Chuffart
1149 6b5a528c Florent Chuffart
1150 6b5a528c Florent Chuffart
1151 6b5a528c Florent Chuffart
1152 6b5a528c Florent Chuffart
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1153 6b5a528c Florent Chuffart
### This function uses 
1154 6b5a528c Florent Chuffart
marker, ##<< The marker involved.
1155 6b5a528c Florent Chuffart
combi, ##<< The strain combination involved.
1156 6b5a528c Florent Chuffart
form, ##<< the nuc form involved. 
1157 cc54c799 Florent Chuffart
all_samples, ##<< Global list of samples.
1158 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1159 6b5a528c Florent Chuffart
) {
1160 6b5a528c Florent Chuffart
  # PRETREAT
1161 6b5a528c Florent Chuffart
  d = get_design(marker, combi, all_samples)
1162 cc54c799 Florent Chuffart
  reads = get_all_reads(marker, combi, form, config=config)
1163 6b5a528c Florent Chuffart
  # RUN ANALYSE
1164 6b5a528c Florent Chuffart
  tmp_analyse = analyse_design(d, reads)
1165 6b5a528c Florent Chuffart
  # RESULTS
1166 6b5a528c Florent Chuffart
	fit1 = tmp_analyse[[1]]
1167 6b5a528c Florent Chuffart
	fit0 = tmp_analyse[[2]]
1168 6b5a528c Florent Chuffart
  k = names(fit1)
1169 6b5a528c Florent Chuffart
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1170 6b5a528c Florent Chuffart
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1171 6b5a528c Florent Chuffart
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1172 6b5a528c Florent Chuffart
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1173 6b5a528c Florent Chuffart
	snep_design = tmp_analyse[[3]]
1174 6b5a528c Florent Chuffart
  print(snep_design)
1175 6b5a528c Florent Chuffart
	fdr = 0.0001
1176 6b5a528c Florent Chuffart
	thres = FDR(reads$pvalsGLM, fdr) 
1177 6b5a528c Florent Chuffart
	reads$snep_index = reads$pvalsGLM < thres
1178 6b5a528c Florent Chuffart
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = ""))
1179 6b5a528c Florent Chuffart
  return(reads)
1180 6b5a528c Florent Chuffart
  },  ex=function(){	
1181 6b5a528c Florent Chuffart
    marker = "H3K4me1"
1182 6b5a528c Florent Chuffart
    combi = c("BY", "YJM") 
1183 6b5a528c Florent Chuffart
    form = "wpfuzzy" # "wp" | "fuzzy" | "wpfuzzy"
1184 6b5a528c Florent Chuffart
    # foo = get_sneps(marker, combi, form)
1185 6b5a528c Florent Chuffart
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1186 6b5a528c Florent Chuffart
})
1187 6b5a528c Florent Chuffart
1188 6b5a528c Florent Chuffart
1189 6b5a528c Florent Chuffart
1190 6b5a528c Florent Chuffart
1191 6b5a528c Florent Chuffart
1192 6b5a528c Florent Chuffart
1193 6b5a528c Florent Chuffart
1194 6b5a528c Florent Chuffart
1195 6b5a528c Florent Chuffart
1196 6b5a528c Florent Chuffart
1197 6b5a528c Florent Chuffart
1198 6b5a528c Florent Chuffart
1199 6b5a528c Florent Chuffart
1200 6b5a528c Florent Chuffart
1201 6b5a528c Florent Chuffart
1202 6b5a528c Florent Chuffart
1203 6b5a528c Florent Chuffart
1204 6b5a528c Florent Chuffart
1205 6b5a528c Florent Chuffart
1206 6b5a528c Florent Chuffart
1207 6b5a528c Florent Chuffart
1208 6b5a528c Florent Chuffart
1209 6b5a528c Florent Chuffart
1210 6b5a528c Florent Chuffart
1211 1d833b97 Florent Chuffart
ROM2ARAB = function(# Roman to Arabic pair list.
1212 1d833b97 Florent Chuffart
### Util to convert Roman to Arabic
1213 1d833b97 Florent Chuffart
){list(
1214 1d833b97 Florent Chuffart
  "I" = 1,
1215 1d833b97 Florent Chuffart
  "II" = 2,
1216 1d833b97 Florent Chuffart
  "III" = 3,
1217 1d833b97 Florent Chuffart
  "IV" = 4,
1218 1d833b97 Florent Chuffart
  "V" = 5,
1219 1d833b97 Florent Chuffart
  "VI" = 6,
1220 1d833b97 Florent Chuffart
  "VII" = 7,
1221 1d833b97 Florent Chuffart
  "VIII" = 8,
1222 1d833b97 Florent Chuffart
  "IX" = 9,
1223 1d833b97 Florent Chuffart
  "X" = 10,
1224 1d833b97 Florent Chuffart
  "XI" = 11,
1225 1d833b97 Florent Chuffart
  "XII" = 12,
1226 1d833b97 Florent Chuffart
  "XIII" = 13,
1227 1d833b97 Florent Chuffart
  "XIV" = 14,
1228 1d833b97 Florent Chuffart
  "XV" = 15,
1229 1d833b97 Florent Chuffart
  "XVI" = 16,
1230 1d833b97 Florent Chuffart
  "XVII" = 17,
1231 1d833b97 Florent Chuffart
  "XVIII" = 18,
1232 1d833b97 Florent Chuffart
  "XIX" = 19,
1233 1d833b97 Florent Chuffart
  "XX" = 20
1234 1d833b97 Florent Chuffart
)}
1235 1d833b97 Florent Chuffart
1236 1d833b97 Florent Chuffart
switch_pairlist = structure(function(# Switch a pairlist
1237 1d833b97 Florent Chuffart
### Take a pairlist key:value and return the switched pairlist value:key.
1238 1d833b97 Florent Chuffart
l ##<< The pairlist to switch. 
1239 1d833b97 Florent Chuffart
) {
1240 1d833b97 Florent Chuffart
	ret = list()
1241 1d833b97 Florent Chuffart
	for (name in names(l)) {
1242 1d833b97 Florent Chuffart
		ret[[as.character(l[[name]])]] = name
1243 1d833b97 Florent Chuffart
	}
1244 1d833b97 Florent Chuffart
	ret
1245 1d833b97 Florent Chuffart
### The switched pairlist.
1246 1d833b97 Florent Chuffart
}, ex=function(){
1247 1d833b97 Florent Chuffart
	l = list(key1 = "value1", key2 = "value2")
1248 1d833b97 Florent Chuffart
	print(switch_pairlist(l))
1249 1d833b97 Florent Chuffart
})
1250 1d833b97 Florent Chuffart
1251 1d833b97 Florent Chuffart
ARAB2ROM = function(# Arabic to Roman pair list.
1252 1d833b97 Florent Chuffart
### Util to convert Arabicto Roman
1253 1d833b97 Florent Chuffart
){switch_pairlist(ROM2ARAB())}
1254 1d833b97 Florent Chuffart
1255 1d833b97 Florent Chuffart
1256 1d833b97 Florent Chuffart
1257 1d833b97 Florent Chuffart
1258 1d833b97 Florent Chuffart
translate_roi = structure(function(# Translate coords of a genome region.
1259 1d833b97 Florent Chuffart
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1260 1d833b97 Florent Chuffart
roi, ##<< Original genome region of interest.
1261 1d833b97 Florent Chuffart
strain2, ##<< The strain in wich you want the genome region of interest.
1262 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
1263 1d833b97 Florent Chuffart
big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1264 1d833b97 Florent Chuffart
) {
1265 1d833b97 Florent Chuffart
	strain1 = roi$strain_ref
1266 1d833b97 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1267 1d833b97 Florent Chuffart
	if (strain1 == strain2) {
1268 1d833b97 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1269 1d833b97 Florent Chuffart
		return(roi)
1270 1d833b97 Florent Chuffart
	}
1271 1d833b97 Florent Chuffart
	# Launch c2c file
1272 1d833b97 Florent Chuffart
	if (reverse) {
1273 1d833b97 Florent Chuffart
		c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
1274 1d833b97 Florent Chuffart
	} else {
1275 1d833b97 Florent Chuffart
		c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
1276 1d833b97 Florent Chuffart
	}
1277 1d833b97 Florent Chuffart
	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)	
1278 1d833b97 Florent Chuffart
	# filtering it
1279 1d833b97 Florent Chuffart
  c2c = c2c[c2c$V6=="-",]
1280 1d833b97 Florent Chuffart
	# Reverse
1281 1d833b97 Florent Chuffart
	if (reverse) {
1282 1d833b97 Florent Chuffart
		tmp_col = c2c$V1
1283 1d833b97 Florent Chuffart
		c2c$V1 = c2c$V7
1284 1d833b97 Florent Chuffart
		c2c$V7 = tmp_col
1285 1d833b97 Florent Chuffart
		tmp_col = c2c$V2
1286 1d833b97 Florent Chuffart
		c2c$V2 = c2c$V9
1287 1d833b97 Florent Chuffart
		c2c$V9 = tmp_col
1288 1d833b97 Florent Chuffart
		tmp_col = c2c$V3
1289 1d833b97 Florent Chuffart
		c2c$V3 = c2c$V10
1290 1d833b97 Florent Chuffart
		c2c$V10 = tmp_col
1291 1d833b97 Florent Chuffart
	}
1292 1d833b97 Florent Chuffart
	# Restrict c2c to big_roi
1293 1d833b97 Florent Chuffart
	# if (FALSE) {
1294 1d833b97 Florent Chuffart
	if (!is.null(big_roi)) {
1295 62d69d53 Florent Chuffart
		if (roi$strain_ref != big_roi$strain_ref) {
1296 1e34bb1f Florent Chuffart
      # print("WARNING, big_roi and roi not in the same strain_ref... translating big_roi.")
1297 62d69d53 Florent Chuffart
      big_roi = translate_roi(big_roi, roi$strain_ref, config=config)
1298 62d69d53 Florent Chuffart
    }
1299 62d69d53 Florent Chuffart
    if (big_roi$end < big_roi$begin) {
1300 62d69d53 Florent Chuffart
      tmp_var = big_roi$begin
1301 62d69d53 Florent Chuffart
      big_roi$begin = big_roi$end 
1302 62d69d53 Florent Chuffart
      big_roi$end = tmp_var        
1303 62d69d53 Florent Chuffart
      big_roi$length = big_roi$length        
1304 62d69d53 Florent Chuffart
    }    
1305 ded736c8 Florent Chuffart
    if (big_roi$chr!=roi$chr | roi$end > big_roi$end | roi$end < big_roi$begin | roi$begin > big_roi$end | roi$begin < big_roi$begin) {
1306 ded736c8 Florent Chuffart
      print("WARNING! Trying to translate a roi not included in a big_roi.")
1307 ded736c8 Florent Chuffart
      return(NULL)
1308 ded736c8 Florent Chuffart
    }
1309 62d69d53 Florent Chuffart
		if (strain1 == "BY") {
1310 62d69d53 Florent Chuffart
			big_chro_1 = paste("chr", ARAB2ROM()[[big_roi$chr]], sep="")
1311 62d69d53 Florent Chuffart
		} else if (strain1 == "RM") {			
1312 62d69d53 Florent Chuffart
		  big_chro_1 = paste("supercontig_1.",big_roi$chr,sep="")
1313 62d69d53 Florent Chuffart
		} else if (strain1 == "YJM") {			
1314 62d69d53 Florent Chuffart
		  big_chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[big_roi$chr]]
1315 1d833b97 Florent Chuffart
		}
1316 62d69d53 Florent Chuffart
		big_begin_1 = big_roi$begin
1317 62d69d53 Florent Chuffart
	  big_end_1 = big_roi$end
1318 62d69d53 Florent Chuffart
		c2c = c2c[c2c$V1==big_chro_1,]
1319 62d69d53 Florent Chuffart
    if (length(c2c[c2c$V3 < big_begin_1 & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < big_begin_1 & c2c$V2 < c2c$V3,c("V2", "V3") ] = big_begin_1}
1320 62d69d53 Florent Chuffart
    if (length(c2c[c2c$V2 > big_end_1   & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > big_end_1   & c2c$V2 < c2c$V3, c("V2", "V3")] = big_end_1}
1321 62d69d53 Florent Chuffart
    if (length(c2c[c2c$V2 < big_begin_1 & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < big_begin_1 & c2c$V3 < c2c$V2,c("V2", "V3") ] = big_begin_1}
1322 62d69d53 Florent Chuffart
    if (length(c2c[c2c$V3 > big_end_1   & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > big_end_1   & c2c$V3 < c2c$V2, c("V2", "V3")] = big_end_1}
1323 62d69d53 Florent Chuffart
    c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1324 1d833b97 Florent Chuffart
	}
1325 1d833b97 Florent Chuffart
  #	Convert initial roi$chr into c2c format
1326 1d833b97 Florent Chuffart
	if (strain1 == "BY") {
1327 1d833b97 Florent Chuffart
		chro_1 = paste("chr", ARAB2ROM()[[roi$chr]], sep="")
1328 1d833b97 Florent Chuffart
	} else if (strain1 == "RM") {			
1329 1d833b97 Florent Chuffart
	  chro_1 = paste("supercontig_1.",roi$chr,sep="")
1330 1d833b97 Florent Chuffart
	} else if (strain1 == "YJM") {			
1331 1d833b97 Florent Chuffart
	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[roi$chr]]
1332 1d833b97 Florent Chuffart
	}
1333 1d833b97 Florent Chuffart
	begin_1 = roi$begin
1334 1d833b97 Florent Chuffart
  end_1 = roi$end
1335 1d833b97 Florent Chuffart
  # Computing equivalent strain_2 alignment coordinates	
1336 1d833b97 Florent Chuffart
	if (reverse) {
1337 1d833b97 Florent Chuffart
  	tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
1338 1d833b97 Florent Chuffart
    tmptransfostop = c2c[c2c$V1==chro_1 &  ((c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1)),]
1339 1d833b97 Florent Chuffart
	} else {
1340 1d833b97 Florent Chuffart
		tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1341 1d833b97 Florent Chuffart
	  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
1342 1d833b97 Florent Chuffart
	}
1343 1d833b97 Florent Chuffart
	# Never happend conditions ...	
1344 1d833b97 Florent Chuffart
	{
1345 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) == 0) {
1346 1d833b97 Florent Chuffart
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1347 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1348 54cc8283 Florent Chuffart
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2>=begin_1,]
1349 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V2)
1350 54cc8283 Florent Chuffart
      } else {
1351 5a26597a Florent Chuffart
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1,]
1352 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V3)
1353 54cc8283 Florent Chuffart
      }
1354 1d833b97 Florent Chuffart
			if (reverse) {
1355 1d833b97 Florent Chuffart
		  	tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
1356 1d833b97 Florent Chuffart
			} else {
1357 1d833b97 Florent Chuffart
				tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1358 1d833b97 Florent Chuffart
			}
1359 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) == 0) {
1360 1d833b97 Florent Chuffart
				if (!is.null(big_roi)) {
1361 1d833b97 Florent Chuffart
					return(NULL)
1362 1d833b97 Florent Chuffart
					tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$begin & c2c$V2<=big_roi$begin,]
1363 1d833b97 Florent Chuffart
				} else { 
1364 1d833b97 Florent Chuffart
					# return(NULL)
1365 1d833b97 Florent Chuffart
					# print(c2c[c2c$V1==chro_1 & c2c$V2<=end_1 & c2c$V3>=begin_1,])
1366 1d833b97 Florent Chuffart
					# print(c2c[c2c$V1==chro_1,])
1367 1d833b97 Florent Chuffart
					print(tmptransfostart)
1368 1d833b97 Florent Chuffart
					print(tmptransfostop)
1369 1d833b97 Florent Chuffart
					stop("Never happend condition 1.")					
1370 1d833b97 Florent Chuffart
				}
1371 1d833b97 Florent Chuffart
			}
1372 1d833b97 Florent Chuffart
		} 
1373 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) == 0) {
1374 1d833b97 Florent Chuffart
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1375 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1376 54cc8283 Florent Chuffart
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3<=end_1,]
1377 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V3)
1378 54cc8283 Florent Chuffart
      } else {
1379 5a26597a Florent Chuffart
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2<=end_1,]
1380 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V2)
1381 54cc8283 Florent Chuffart
      }
1382 1d833b97 Florent Chuffart
			if (reverse) {
1383 1d833b97 Florent Chuffart
		    tmptransfostop = c2c[c2c$V1==chro_1 &  ((c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1)),]
1384 1d833b97 Florent Chuffart
			} else {
1385 1d833b97 Florent Chuffart
			  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
1386 1d833b97 Florent Chuffart
			}
1387 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) == 0) {
1388 1d833b97 Florent Chuffart
				if (!is.null(big_roi)) {
1389 1d833b97 Florent Chuffart
					return(NULL)
1390 1d833b97 Florent Chuffart
				  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$end & c2c$V2<=big_roi$end,]
1391 1d833b97 Florent Chuffart
				} else {
1392 1d833b97 Florent Chuffart
					# return(NULL)
1393 1d833b97 Florent Chuffart
					print(c2c[c2c$V1==chro_1,])
1394 1d833b97 Florent Chuffart
					print(tmptransfostart)
1395 1d833b97 Florent Chuffart
					print(tmptransfostop)
1396 1d833b97 Florent Chuffart
					stop("Never happend condition 2.")
1397 1d833b97 Florent Chuffart
				} 
1398 1d833b97 Florent Chuffart
			} 
1399 1d833b97 Florent Chuffart
		}
1400 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) != 1) {
1401 1d833b97 Florent Chuffart
			# tmptransfostart = tmptransfostart[1,]
1402 1d833b97 Florent Chuffart
			# print("many start")
1403 1d833b97 Florent Chuffart
			# print(c2c[c2c$V1==chro_1,])
1404 1d833b97 Florent Chuffart
			tmptransfostart = tmptransfostart[tmptransfostart$V1==chro_1 & tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1405 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) != 1) {
1406 1d833b97 Florent Chuffart
				# return(NULL)
1407 1d833b97 Florent Chuffart
				print(tmptransfostart)
1408 1d833b97 Florent Chuffart
				print(tmptransfostop)
1409 1d833b97 Florent Chuffart
  			stop("Never happend condition 3.")
1410 1d833b97 Florent Chuffart
			}
1411 1d833b97 Florent Chuffart
		}
1412 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) != 1) {
1413 1d833b97 Florent Chuffart
			# tmptransfostop = tmptransfostop[length(tmptransfostop$V8),]
1414 1d833b97 Florent Chuffart
			# print("many stop")
1415 1d833b97 Florent Chuffart
			# print(tmptransfostop)
1416 1d833b97 Florent Chuffart
			# print(roi)
1417 1d833b97 Florent Chuffart
		  tmptransfostop = tmptransfostop[tmptransfostop$V1==chro_1 & tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1418 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) != 1) {
1419 1d833b97 Florent Chuffart
				# return(NULL)
1420 1d833b97 Florent Chuffart
				print(tmptransfostart)
1421 1d833b97 Florent Chuffart
				print(tmptransfostop)
1422 1d833b97 Florent Chuffart
  			stop("Never happend condition 4.")
1423 1d833b97 Florent Chuffart
			}
1424 1d833b97 Florent Chuffart
		}		
1425 1d833b97 Florent Chuffart
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1426 1d833b97 Florent Chuffart
			print(tmptransfostart)
1427 1d833b97 Florent Chuffart
			print(tmptransfostop)		
1428 1d833b97 Florent Chuffart
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1429 1d833b97 Florent Chuffart
		}
1430 1d833b97 Florent Chuffart
	} 
1431 1d833b97 Florent Chuffart
  # Deal with strand
1432 1d833b97 Florent Chuffart
  if (tmptransfostart$V8 == 1) {
1433 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1434 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1435 1d833b97 Florent Chuffart
  } else {
1436 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1437 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1438 1d833b97 Florent Chuffart
  }
1439 1d833b97 Florent Chuffart
	# Build returned roi
1440 1d833b97 Florent Chuffart
	roi$strain_ref = strain2
1441 1d833b97 Florent Chuffart
	if (roi$strain_ref == "BY") {
1442 1d833b97 Florent Chuffart
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1443 1d833b97 Florent Chuffart
	} else {
1444 1d833b97 Florent Chuffart
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1445 1d833b97 Florent Chuffart
	}
1446 1d833b97 Florent Chuffart
  roi$begin = begin_2
1447 1d833b97 Florent Chuffart
  roi$end = end_2
1448 1d833b97 Florent Chuffart
	if (sign(roi$end - roi$begin) == 0) {
1449 1d833b97 Florent Chuffart
		roi$length = 1			
1450 1d833b97 Florent Chuffart
	} else {		
1451 1d833b97 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1452 1d833b97 Florent Chuffart
	}
1453 1d833b97 Florent Chuffart
  return(roi)  
1454 1d833b97 Florent Chuffart
}, ex=function(){
1455 1d833b97 Florent Chuffart
	# Define new translate_roi function...
1456 1d833b97 Florent Chuffart
	translate_roi = function(roi, strain2, config) {
1457 1d833b97 Florent Chuffart
		strain1 = roi$strain_ref
1458 1d833b97 Florent Chuffart
		if (strain1 == strain2) {
1459 1d833b97 Florent Chuffart
			return(roi)
1460 1d833b97 Florent Chuffart
		} else {
1461 1d833b97 Florent Chuffart
		  stop("Here is my new translate_roi function...")		
1462 1d833b97 Florent Chuffart
		}	
1463 1d833b97 Florent Chuffart
	}
1464 1d833b97 Florent Chuffart
	# Binding it by uncomment follwing lines.
1465 1d833b97 Florent Chuffart
	# unlockBinding("translate_roi", as.environment("package:nm"))
1466 1d833b97 Florent Chuffart
	# unlockBinding("translate_roi", getNamespace("nm"))
1467 1d833b97 Florent Chuffart
	# assign("translate_roi", translate_roi, "package:nm")
1468 1d833b97 Florent Chuffart
	# assign("translate_roi", translate_roi, getNamespace("nm"))
1469 1d833b97 Florent Chuffart
	# lockBinding("translate_roi", getNamespace("nm"))
1470 1d833b97 Florent Chuffart
	# lockBinding("translate_roi", as.environment("package:nm"))	
1471 1d833b97 Florent Chuffart
})
1472 1d833b97 Florent Chuffart
1473 1d833b97 Florent Chuffart
1474 1d833b97 Florent Chuffart
1475 1d833b97 Florent Chuffart
1476 1d833b97 Florent Chuffart
1477 1d833b97 Florent Chuffart
1478 1d833b97 Florent Chuffart
1479 1d833b97 Florent Chuffart
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1480 1d833b97 Florent Chuffart
### CURs are regions that can be aligned between the genomes 
1481 1d833b97 Florent Chuffart
diff_allowed = 10, ##<< the maximum indel width allowe din a CUR
1482 1d833b97 Florent Chuffart
min_cur_width = 200, ##<< The minimum width of a CUR
1483 8e9facd8 Florent Chuffart
config = NULL, ##<< GLOBAL config variable
1484 1d833b97 Florent Chuffart
plot = FALSE ##<< Plot CURs or not
1485 1d833b97 Florent Chuffart
) {
1486 1d833b97 Florent Chuffart
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 10, min_cur_width = 200, plot=FALSE) {
1487 8e9facd8 Florent Chuffart
  	c2c = get_content(config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]], "table", stringsAsFactors=FALSE)
1488 1d833b97 Florent Chuffart
    # Filtering unagapped
1489 1d833b97 Florent Chuffart
    c2c = c2c[c2c$V6=="-",]
1490 1d833b97 Florent Chuffart
    # filtering some things (chr...)
1491 1d833b97 Florent Chuffart
    # c2c = c2c[c2c$V1 == "chrIV",]
1492 1d833b97 Florent Chuffart
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)] 
1493 1d833b97 Florent Chuffart
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)] 
1494 1d833b97 Florent Chuffart
  	# Plot diffs to define a threshold (diff_allowed)
1495 1d833b97 Florent Chuffart
  	# hist(abs(c(diff2, diff)),breaks=c(0:2000, 200000000000), xlim=c(0,100))
1496 1d833b97 Florent Chuffart
    # Filtering
1497 1d833b97 Florent Chuffart
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1498 1d833b97 Florent Chuffart
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1499 1d833b97 Florent Chuffart
1500 1d833b97 Florent Chuffart
    rois = NULL	
1501 1d833b97 Florent Chuffart
  	for(i in 1:length(indexes_start)) {
1502 1d833b97 Florent Chuffart
  		start = indexes_start[i]
1503 1d833b97 Florent Chuffart
  		stop = indexes_stop[i]
1504 1d833b97 Florent Chuffart
  		sub_c2c = c2c[start:stop,]
1505 1d833b97 Florent Chuffart
  		if (strain1 == "BY") {
1506 1d833b97 Florent Chuffart
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1507 1d833b97 Florent Chuffart
  		} else {			
1508 1d833b97 Florent Chuffart
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1509 1d833b97 Florent Chuffart
  		}
1510 1d833b97 Florent Chuffart
  		roi = list(chr=chr, begin=sub_c2c[1,]$V2, end=sub_c2c[length(sub_c2c$V1),]$V3, strain_ref=strain1)
1511 1d833b97 Florent Chuffart
  		roi[["length"]] = roi$end - roi$begin
1512 1d833b97 Florent Chuffart
  		if (roi$length >= min_cur_width) {
1513 1d833b97 Florent Chuffart
        rois = dfadd(rois,roi)
1514 1d833b97 Florent Chuffart
  	  }
1515 1d833b97 Florent Chuffart
  		if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1516 1d833b97 Florent Chuffart
  			print("*************** ERROR, non homogenous region! ********************")
1517 1d833b97 Florent Chuffart
  		}
1518 1d833b97 Florent Chuffart
  		# print(i)
1519 1d833b97 Florent Chuffart
  		# print(roi)
1520 1d833b97 Florent Chuffart
  		# print(sub_c2c)
1521 1d833b97 Florent Chuffart
  		# print("________________________________________________________________")
1522 1d833b97 Florent Chuffart
  	}
1523 1d833b97 Florent Chuffart
  	if (plot) {
1524 1d833b97 Florent Chuffart
  		print(paste(length(indexes_stop), "area of interest."))
1525 1d833b97 Florent Chuffart
  	  # Plot rois
1526 8e9facd8 Florent Chuffart
  	  genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[strain1]], "fasta")   
1527 1d833b97 Florent Chuffart
  		plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
1528 1d833b97 Florent Chuffart
  		for (name in names(genome)) {
1529 1d833b97 Florent Chuffart
  			if (strain1 == "BY") {
1530 1d833b97 Florent Chuffart
  				chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[[strain1]][[name]]]], sep="")
1531 1d833b97 Florent Chuffart
  			} else {			
1532 1d833b97 Florent Chuffart
  				chr_ref = name
1533 1d833b97 Florent Chuffart
  			}
1534 1d833b97 Florent Chuffart
  			y_lev = as.integer(config$FASTA_INDEXES[[strain1]][[name]])
1535 1d833b97 Florent Chuffart
  			lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
1536 1d833b97 Florent Chuffart
  			text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)	
1537 1d833b97 Florent Chuffart
  		}
1538 1d833b97 Florent Chuffart
  	  col=1
1539 1d833b97 Florent Chuffart
  	  for (roi_index in 1:length(rois$chr)) {
1540 1d833b97 Florent Chuffart
  			roi = rois[roi_index,]	
1541 1d833b97 Florent Chuffart
  			y_lev = as.integer(roi$chr) + 0.3
1542 1d833b97 Florent Chuffart
  			lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
1543 1d833b97 Florent Chuffart
  			text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
1544 1d833b97 Florent Chuffart
  	  	col = col + 1
1545 1d833b97 Florent Chuffart
  	  }	
1546 1d833b97 Florent Chuffart
  	}
1547 1d833b97 Florent Chuffart
  	return(rois)
1548 1d833b97 Florent Chuffart
  }
1549 1d833b97 Florent Chuffart
	rois = NULL	
1550 1d833b97 Florent Chuffart
	rois_BY_RM = get_inter_strain_rois("BY", "RM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1551 1d833b97 Florent Chuffart
	rois_BY_YJM = get_inter_strain_rois("BY", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1552 1d833b97 Florent Chuffart
	for (roi_1_index in 1:length(rois_BY_RM[,1])) {
1553 1d833b97 Florent Chuffart
		roi_1 = rois_BY_RM[roi_1_index,]
1554 1d833b97 Florent Chuffart
		roi_2_candidates = rois_BY_YJM[rois_BY_YJM$chr== roi_1$chr & rois_BY_YJM$begin <= roi_1$end & rois_BY_YJM$end >= roi_1$begin , ] ;  
1555 1d833b97 Florent Chuffart
		# print(length(roi_2_candidates[,1]))
1556 1d833b97 Florent Chuffart
		if (length(roi_2_candidates[,1]) > 0) {
1557 1d833b97 Florent Chuffart
			for(roi_2_index in 1:length(roi_2_candidates[,1])) {
1558 1d833b97 Florent Chuffart
				roi_2 = roi_2_candidates[roi_2_index,]
1559 1d833b97 Florent Chuffart
				roi = list(chr=roi_1$chr, begin=max(roi_1$begin, roi_2$begin), end=min(roi_1$end, roi_2$end), strain_ref="BY")
1560 1d833b97 Florent Chuffart
				roi[["length"]] = roi$end - roi$begin + 1
1561 1d833b97 Florent Chuffart
				if (roi$length >= min_cur_width) {
1562 1d833b97 Florent Chuffart
					# if (length(rois[,1]) == 153) {
1563 1d833b97 Florent Chuffart
					# 	print(paste(length(rois[,1]), roi_1_index, roi_2_index ))
1564 1d833b97 Florent Chuffart
					# 	print(roi_1)
1565 1d833b97 Florent Chuffart
					# 	print(roi_2)
1566 1d833b97 Florent Chuffart
					# 	print(roi)
1567 1d833b97 Florent Chuffart
					# }
1568 1d833b97 Florent Chuffart
			    rois = dfadd(rois,roi)
1569 1d833b97 Florent Chuffart
			  }			
1570 1d833b97 Florent Chuffart
			}
1571 1d833b97 Florent Chuffart
		}
1572 1d833b97 Florent Chuffart
	}
1573 1d833b97 Florent Chuffart
	print(length(rois[,1]))
1574 1d833b97 Florent Chuffart
	print(sum(rois$length))
1575 1d833b97 Florent Chuffart
	rois_1st_round = rois
1576 1d833b97 Florent Chuffart
	rois_2nd_round = NULL	
1577 1d833b97 Florent Chuffart
	rois_RM_YJM = get_inter_strain_rois("RM", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1578 1d833b97 Florent Chuffart
	for (roi_1_index in 1:length(rois_1st_round[,1])) {
1579 1d833b97 Florent Chuffart
		roi_1 = rois_1st_round[roi_1_index,]
1580 8e9facd8 Florent Chuffart
		translated_roi_1 = translate_roi(roi_1, "RM", config = config)
1581 1d833b97 Florent Chuffart
		t_b = min(translated_roi_1$begin, translated_roi_1$end)
1582 1d833b97 Florent Chuffart
		t_e = max(translated_roi_1$begin, translated_roi_1$end)
1583 1d833b97 Florent Chuffart
		roi_2_candidates = rois_RM_YJM[rois_RM_YJM$chr== translated_roi_1$chr & rois_RM_YJM$begin <= t_e & rois_RM_YJM$end >= t_b , ] ;  
1584 1d833b97 Florent Chuffart
		if (length(roi_2_candidates[,1]) > 0) {
1585 1d833b97 Florent Chuffart
			for(roi_2_index in 1:length(roi_2_candidates[,1])) {
1586 1d833b97 Florent Chuffart
				roi_2 = roi_2_candidates[roi_2_index,]
1587 1d833b97 Florent Chuffart
				roi = list(chr=translated_roi_1$chr, begin=max(t_b, roi_2$begin), end=min(t_e, roi_2$end), strain_ref="RM")
1588 1d833b97 Florent Chuffart
				roi[["length"]] = roi$end - roi$begin + 1
1589 1d833b97 Florent Chuffart
				if (roi$length >= min_cur_width) {
1590 1d833b97 Florent Chuffart
			    rois_2nd_round = dfadd(rois_2nd_round,roi)
1591 1d833b97 Florent Chuffart
			  }			
1592 1d833b97 Florent Chuffart
			}
1593 1d833b97 Florent Chuffart
		}
1594 1d833b97 Florent Chuffart
	}
1595 1d833b97 Florent Chuffart
	print(length(rois_2nd_round[,1]))
1596 1d833b97 Florent Chuffart
	print(sum(rois_2nd_round$length))
1597 1d833b97 Florent Chuffart
	rois = rois_2nd_round
1598 1d833b97 Florent Chuffart
1599 1d833b97 Florent Chuffart
	rois_translator_round = list()	
1600 1d833b97 Florent Chuffart
	for (roi_index in 1:length(rois[,1])) {
1601 1d833b97 Florent Chuffart
		roi = rois[roi_index,]
1602 8e9facd8 Florent Chuffart
		BY_roi  = translate_roi(roi, "BY", config = config)
1603 1d833b97 Florent Chuffart
		tmp_BY_roi = BY_roi 
1604 1d833b97 Florent Chuffart
		BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1605 1d833b97 Florent Chuffart
		BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1606 1d833b97 Florent Chuffart
		BY_roi$length = abs(BY_roi$length)
1607 1d833b97 Florent Chuffart
		rois_translator_round = dfadd(rois_translator_round, BY_roi)
1608 1d833b97 Florent Chuffart
	}
1609 1d833b97 Florent Chuffart
	rois = rois_translator_round
1610 1d833b97 Florent Chuffart
1611 1d833b97 Florent Chuffart
	rois_3rd_round = NULL	
1612 1d833b97 Florent Chuffart
	for (roi_index in 1:length(rois[,1])) {
1613 1d833b97 Florent Chuffart
	# for (roi_index in 1:2) {
1614 1d833b97 Florent Chuffart
		current_roi = rois[roi_index,]
1615 1d833b97 Florent Chuffart
		# print(roi_index)
1616 1d833b97 Florent Chuffart
	
1617 1d833b97 Florent Chuffart
	  to_be_check_rois = dfadd(NULL, current_roi)
1618 1d833b97 Florent Chuffart
		NEED_RERUN = TRUE
1619 1d833b97 Florent Chuffart
		while (NEED_RERUN) {
1620 1d833b97 Florent Chuffart
			# print("RERUN"),
1621 1d833b97 Florent Chuffart
			NEED_RERUN = FALSE
1622 1d833b97 Florent Chuffart
			to_be_check_again = NULL
1623 1d833b97 Florent Chuffart
			for (to_be_check_roi_index in 1:length(to_be_check_rois[,1])) {
1624 1d833b97 Florent Chuffart
				# print(to_be_check_rois)
1625 1d833b97 Florent Chuffart
				to_be_check_roi = to_be_check_rois[to_be_check_roi_index,]
1626 1d833b97 Florent Chuffart
		    combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"), c("RM", "BY"), c("YJM", "BY"), c("YJM", "RM"))
1627 1d833b97 Florent Chuffart
			  for (combi in combis) {
1628 1d833b97 Florent Chuffart
					# print(combi)
1629 1d833b97 Florent Chuffart
			    strain1 = combi[1]
1630 1d833b97 Florent Chuffart
		      strain2 = combi[2]
1631 8e9facd8 Florent Chuffart
					trans_roi = translate_roi(to_be_check_roi, strain1, config = config)
1632 1d833b97 Florent Chuffart
					lower_bound=min(trans_roi$begin, trans_roi$end)
1633 1d833b97 Florent Chuffart
					upper_bound=max(trans_roi$begin, trans_roi$end)			
1634 1d833b97 Florent Chuffart
          
1635 1d833b97 Florent Chuffart
          check_overlaping = structure(function(strain1 = "BY", strain2 = "RM", chr = NULL, lower_bound=NULL, upper_bound=NULL) {
1636 1d833b97 Florent Chuffart
            reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1637 1d833b97 Florent Chuffart
          	if (strain1 == strain2) {
1638 1d833b97 Florent Chuffart
          		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1639 1d833b97 Florent Chuffart
          		return(roi)
1640 1d833b97 Florent Chuffart
          	}
1641 1d833b97 Florent Chuffart
          	# Launch c2c file
1642 1d833b97 Florent Chuffart
          	if (reverse) {
1643 1d833b97 Florent Chuffart
          		c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
1644 1d833b97 Florent Chuffart
          	} else {
1645 1d833b97 Florent Chuffart
          		c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
1646 1d833b97 Florent Chuffart
          	}
1647 1d833b97 Florent Chuffart
          	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)	
1648 1d833b97 Florent Chuffart
          	# filtering it
1649 1d833b97 Florent Chuffart
            c2c = c2c[c2c$V6=="-",]
1650 1d833b97 Florent Chuffart
          	# Reverse
1651 1d833b97 Florent Chuffart
          	if (reverse) {
1652 1d833b97 Florent Chuffart
          		tmp_col = c2c$V1
1653 1d833b97 Florent Chuffart
          		c2c$V1 = c2c$V7
1654 1d833b97 Florent Chuffart
          		c2c$V7 = tmp_col
1655 1d833b97 Florent Chuffart
          		tmp_col = c2c$V2
1656 1d833b97 Florent Chuffart
          		c2c$V2 = c2c$V9
1657 1d833b97 Florent Chuffart
          		c2c$V9 = tmp_col
1658 1d833b97 Florent Chuffart
          		tmp_col = c2c$V3
1659 1d833b97 Florent Chuffart
          		c2c$V3 = c2c$V10
1660 1d833b97 Florent Chuffart
          		c2c$V10 = tmp_col
1661 1d833b97 Florent Chuffart
          	}
1662 1d833b97 Florent Chuffart
1663 1d833b97 Florent Chuffart
          	if (strain1 == "BY") {
1664 1d833b97 Florent Chuffart
          		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1665 1d833b97 Florent Chuffart
          	} else if (strain1 == "RM") {			
1666 1d833b97 Florent Chuffart
          	  chro_1 = paste("supercontig_1.",chr,sep="")
1667 1d833b97 Florent Chuffart
          	} else if (strain1 == "YJM") {			
1668 1d833b97 Florent Chuffart
          	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1669 1d833b97 Florent Chuffart
          	}
1670 1d833b97 Florent Chuffart
          	# print(chro_1)
1671 1d833b97 Florent Chuffart
          	if (!is.null(lower_bound) & !is.null(upper_bound)) {
1672 1d833b97 Florent Chuffart
              if (reverse) {
1673 1d833b97 Florent Chuffart
          	  	tmp_c2c = c2c[c2c$V1==chro_1 & ((c2c$V3>=lower_bound & c2c$V2<=upper_bound & c2c$V8==1) | (c2c$V2>=lower_bound & c2c$V3<=upper_bound & c2c$V8==-1)),]
1674 1d833b97 Florent Chuffart
          		} else {
1675 1d833b97 Florent Chuffart
          			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=lower_bound & c2c$V2<=upper_bound,]
1676 1d833b97 Florent Chuffart
          		}
1677 1d833b97 Florent Chuffart
          	} else {
1678 1d833b97 Florent Chuffart
            	tmp_c2c = c2c[c2c$V1 == chr,]
1679 1d833b97 Florent Chuffart
          	}
1680 1d833b97 Florent Chuffart
          	if (length(tmp_c2c[,1]) > 1) {
1681 1d833b97 Florent Chuffart
          		pbs = apply(t(1:(length(tmp_c2c[,1]) - 1)), 2, function(i){
1682 1d833b97 Florent Chuffart
          			# print(paste(i, "/", length(tmp_c2c[,1])))
1683 1d833b97 Florent Chuffart
          			apply(t((i+1):length(tmp_c2c[,1])), 2, function(j){
1684 1d833b97 Florent Chuffart
          				l1 = tmp_c2c[i,] 
1685 1d833b97 Florent Chuffart
          				b1 = min(l1$V2, l1$V3)
1686 1d833b97 Florent Chuffart
          				e1 = max(l1$V2, l1$V3)
1687 1d833b97 Florent Chuffart
          				l2 = tmp_c2c[j,]
1688 1d833b97 Florent Chuffart
          				b2 = min(l2$V2, l2$V3)
1689 1d833b97 Florent Chuffart
          				e2 = max(l2$V2, l2$V3)
1690 1d833b97 Florent Chuffart
          				if ((e1>=b2 & b1<=e2) | (e2>=b1 & b2<=e1)) {
1691 1d833b97 Florent Chuffart
          					print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ",chr, " [", b1, ",", e1, "] [", b2, ",", e2, "]", sep=""))				
1692 1d833b97 Florent Chuffart
          					pb = list(strain1, strain2, chr, b1, e1, b2, e2)					
1693 1d833b97 Florent Chuffart
          					pb
1694 1d833b97 Florent Chuffart
          				} else {
1695 1d833b97 Florent Chuffart
          					NULL
1696 1d833b97 Florent Chuffart
          				}
1697 1d833b97 Florent Chuffart
          			})
1698 1d833b97 Florent Chuffart
          		})
1699 1d833b97 Florent Chuffart
          		return(pbs)
1700 1d833b97 Florent Chuffart
          	}
1701 1d833b97 Florent Chuffart
1702 1d833b97 Florent Chuffart
          }, ex=function(){
1703 1d833b97 Florent Chuffart
          	source("src/nucleo_miner/yeast_strain_conversion.R"); 
1704 1d833b97 Florent Chuffart
          	pbs1 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=TRUE)
1705 1d833b97 Florent Chuffart
          	pbs3 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=TRUE)
1706 1d833b97 Florent Chuffart
          	pbs5 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=TRUE)
1707 1d833b97 Florent Chuffart
          	pbs2 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=FALSE)
1708 1d833b97 Florent Chuffart
          	pbs4 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=FALSE)
1709 1d833b97 Florent Chuffart
          	pbs6 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=FALSE)
1710 1d833b97 Florent Chuffart
          })
1711 1d833b97 Florent Chuffart
          
1712 1d833b97 Florent Chuffart
1713 1d833b97 Florent Chuffart
					res = check_overlaping(strain1 = strain1, strain2 = strain2, chr = trans_roi$chr, lower_bound=lower_bound, upper_bound=upper_bound) 
1714 1d833b97 Florent Chuffart
					if (!is.null(res)) {
1715 1d833b97 Florent Chuffart
						df_res = data.frame(matrix(unlist(res), ncol = 7, byrow=TRUE), stringsAsFactors=FALSE)		
1716 1d833b97 Florent Chuffart
						interval = df_res[1,]
1717 1d833b97 Florent Chuffart
						inter_min = as.numeric(max( min(interval$X4, interval$X5), min(interval$X6, interval$X7)))
1718 1d833b97 Florent Chuffart
						inter_max = as.numeric(min( max(interval$X4, interval$X5), max(interval$X6, interval$X7)))
1719 1d833b97 Florent Chuffart
						# print(paste("SPLIT ROI", roi_index, "for", combi[1], combi[2]))			
1720 1d833b97 Florent Chuffart
						new_roi1 = trans_roi
1721 1d833b97 Florent Chuffart
						new_roi2 = trans_roi
1722 1d833b97 Florent Chuffart
						new_roi1$begin = lower_bound
1723 1d833b97 Florent Chuffart
						new_roi1$end = inter_min - 1
1724 1d833b97 Florent Chuffart
						new_roi1$length = new_roi1$end - new_roi1$begin + 1
1725 1d833b97 Florent Chuffart
						new_roi2$begin = inter_max + 1
1726 1d833b97 Florent Chuffart
						new_roi2$end = upper_bound
1727 1d833b97 Florent Chuffart
						new_roi2$length = new_roi2$end - new_roi2$begin + 1
1728 1d833b97 Florent Chuffart
						if (new_roi1$length > min_cur_width) {
1729 8e9facd8 Florent Chuffart
							BY_roi  = translate_roi(new_roi1, "BY", config = config)
1730 1d833b97 Florent Chuffart
							tmp_BY_roi = BY_roi
1731 1d833b97 Florent Chuffart
							BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1732 1d833b97 Florent Chuffart
							BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1733 1d833b97 Florent Chuffart
							BY_roi$length = abs(BY_roi$length)
1734 1d833b97 Florent Chuffart
							to_be_check_again = dfadd(to_be_check_again, BY_roi)	
1735 1d833b97 Florent Chuffart
						}
1736 1d833b97 Florent Chuffart
						if (new_roi2$length > min_cur_width) {
1737 8e9facd8 Florent Chuffart
							BY_roi  = translate_roi(new_roi2, "BY", config = config)
1738 1d833b97 Florent Chuffart
							tmp_BY_roi = BY_roi
1739 1d833b97 Florent Chuffart
							BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1740 1d833b97 Florent Chuffart
							BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1741 1d833b97 Florent Chuffart
							BY_roi$length = abs(BY_roi$length)
1742 1d833b97 Florent Chuffart
							to_be_check_again = dfadd(to_be_check_again, BY_roi)	
1743 1d833b97 Florent Chuffart
						}
1744 1d833b97 Florent Chuffart
						if (to_be_check_roi_index < length(to_be_check_rois[,1])) {
1745 1d833b97 Florent Chuffart
							for (i in (to_be_check_roi_index + 1):length(to_be_check_rois[,1])) {
1746 1d833b97 Florent Chuffart
								to_be_check_again = dfadd(to_be_check_again, to_be_check_rois[i,])								
1747 1d833b97 Florent Chuffart
							}
1748 1d833b97 Florent Chuffart
						}
1749 1d833b97 Florent Chuffart
						NEED_RERUN = TRUE
1750 1d833b97 Florent Chuffart
						break
1751 1d833b97 Florent Chuffart
					}
1752 1d833b97 Florent Chuffart
				}
1753 1d833b97 Florent Chuffart
				if (NEED_RERUN) {
1754 1d833b97 Florent Chuffart
					to_be_check_rois = to_be_check_again
1755 1d833b97 Florent Chuffart
					break
1756 1d833b97 Florent Chuffart
				}
1757 1d833b97 Florent Chuffart
			}
1758 1d833b97 Florent Chuffart
		}
1759 1d833b97 Florent Chuffart
		
1760 1d833b97 Florent Chuffart
		checked_rois = to_be_check_rois
1761 1d833b97 Florent Chuffart
		for (checked_roi_index in 1:length(checked_rois[,1])) {
1762 1d833b97 Florent Chuffart
			rois_3rd_round = dfadd(rois_3rd_round, checked_rois[checked_roi_index,])	
1763 1d833b97 Florent Chuffart
		}
1764 1d833b97 Florent Chuffart
	}
1765 1d833b97 Florent Chuffart
1766 729c934e Florent Chuffart
1767 1d833b97 Florent Chuffart
	print(length(rois_3rd_round[,1]))
1768 1d833b97 Florent Chuffart
	print(sum(rois_3rd_round$length))
1769 1d833b97 Florent Chuffart
	rois = rois_3rd_round
1770 1d833b97 Florent Chuffart
	
1771 1d833b97 Florent Chuffart
	
1772 1d833b97 Florent Chuffart
	if (plot) {
1773 1d833b97 Florent Chuffart
		print(paste(length(rois$chr), "area of interest."))
1774 1d833b97 Florent Chuffart
	  # Plot rois
1775 8e9facd8 Florent Chuffart
	  genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[["BY"]], "fasta")   
1776 1d833b97 Florent Chuffart
		plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
1777 1d833b97 Florent Chuffart
		for (name in names(genome)) {
1778 1d833b97 Florent Chuffart
			if (TRUE) {
1779 1d833b97 Florent Chuffart
				chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[["BY"]][[name]]]], sep="")
1780 1d833b97 Florent Chuffart
			} else {			
1781 1d833b97 Florent Chuffart
				chr_ref = name
1782 1d833b97 Florent Chuffart
			}
1783 1d833b97 Florent Chuffart
			y_lev = as.integer(config$FASTA_INDEXES[["BY"]][[name]])
1784 1d833b97 Florent Chuffart
			lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
1785 1d833b97 Florent Chuffart
			text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)	
1786 1d833b97 Florent Chuffart
		}
1787 1d833b97 Florent Chuffart
	  col=1
1788 1d833b97 Florent Chuffart
	  for (roi_index in 1:length(rois$chr)) {
1789 1d833b97 Florent Chuffart
			roi = rois[roi_index,]	
1790 1d833b97 Florent Chuffart
			y_lev = as.integer(roi$chr) + 0.3
1791 1d833b97 Florent Chuffart
			lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
1792 1d833b97 Florent Chuffart
			text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
1793 1d833b97 Florent Chuffart
	  	col = col + 1
1794 1d833b97 Florent Chuffart
	  }	
1795 1d833b97 Florent Chuffart
	}
1796 1d833b97 Florent Chuffart
	return (rois)
1797 1d833b97 Florent Chuffart
}
1798 1d833b97 Florent Chuffart
1799 1d833b97 Florent Chuffart
1800 1d833b97 Florent Chuffart
1801 1d833b97 Florent Chuffart
1802 61914235 Florent Chuffart
build_replicates = structure(function(# Stage replicates data 
1803 61914235 Florent Chuffart
### This function loads in memory data corresponding to the given experiments.
1804 61914235 Florent Chuffart
expe, ##<< a list of vector corresponding to vector of replicates.
1805 61914235 Florent Chuffart
roi, ##<< the region that we are interested in.
1806 61914235 Florent Chuffart
only_fetch=FALSE, ##<< filter or not inputs.
1807 61914235 Florent Chuffart
get_genome=FALSE,##<< Load or not corresponding genome.
1808 61914235 Florent Chuffart
all_samples, ##<< Global list of samples.
1809 61914235 Florent Chuffart
config=NULL ##<< GLOBAL config variable.
1810 61914235 Florent Chuffart
) {
1811 61914235 Florent Chuffart
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1812 61914235 Florent Chuffart
  	samples=list()
1813 61914235 Florent Chuffart
  	for (i in samples_ids) {
1814 61914235 Florent Chuffart
  		sample = as.list(all_samples[all_samples$id==i,])
1815 a0b91fee Florent Chuffart
      sample$orig_roi = roi
1816 8e9facd8 Florent Chuffart
      sample$roi = translate_roi(roi, sample$strain, config = config)
1817 61914235 Florent Chuffart
  		if (get_genome) {
1818 61914235 Florent Chuffart
  			# Get Genome
1819 61914235 Florent Chuffart
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1820 61914235 Florent Chuffart
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]		
1821 61914235 Florent Chuffart
  		}
1822 61914235 Florent Chuffart
  		# Get inputs
1823 61914235 Florent Chuffart
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1824 61914235 Florent Chuffart
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE) 
1825 61914235 Florent Chuffart
  		sample$total_reads = sum(sample$inputs[,4]) 	
1826 61914235 Florent Chuffart
  		if (!only_fetch) {
1827 b8a95426 Florent Chuffart
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
1828 61914235 Florent Chuffart
  	  }
1829 61914235 Florent Chuffart
  	  # Get TF outputs for Mnase_Seq samples
1830 61914235 Florent Chuffart
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {	
1831 61914235 Florent Chuffart
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1832 61914235 Florent Chuffart
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1833 61914235 Florent Chuffart
  			if (!only_fetch) {
1834 61914235 Florent Chuffart
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1835 61914235 Florent Chuffart
    		}
1836 61914235 Florent Chuffart
  		}
1837 61914235 Florent Chuffart
  		samples[[length(samples) + 1]] = sample		
1838 61914235 Florent Chuffart
  	}
1839 61914235 Florent Chuffart
  	return(samples)
1840 61914235 Florent Chuffart
  }
1841 61914235 Florent Chuffart
	replicates = list()
1842 61914235 Florent Chuffart
	for(samples_ids in expe) {
1843 61914235 Florent Chuffart
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1844 61914235 Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
1845 61914235 Florent Chuffart
	}
1846 61914235 Florent Chuffart
	return(replicates)
1847 61914235 Florent Chuffart
  }, ex = function() {
1848 61914235 Florent Chuffart
    # library(rjson)
1849 61914235 Florent Chuffart
    # library(nucleominer)
1850 61914235 Florent Chuffart
    # 
1851 61914235 Florent Chuffart
    # # Read config file
1852 61914235 Florent Chuffart
    # json_conf_file = "nucleo_miner_config.json"
1853 61914235 Florent Chuffart
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1854 61914235 Florent Chuffart
    # # Read sample file
1855 61914235 Florent Chuffart
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)  
1856 61914235 Florent Chuffart
    # # here are the sample ids in a list
1857 61914235 Florent Chuffart
    # expes = list(c(1))
1858 61914235 Florent Chuffart
    # # here is the region that we wnt to see the coverage
1859 61914235 Florent Chuffart
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY") 
1860 61914235 Florent Chuffart
    # # it displays the corverage
1861 61914235 Florent Chuffart
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1862 61914235 Florent Chuffart
    # out = watch_samples(replicates, config$READ_LENGTH, 
1863 61914235 Florent Chuffart
    #       plot_coverage = TRUE,  
1864 61914235 Florent Chuffart
    #       plot_squared_reads = FALSE,  
1865 61914235 Florent Chuffart
    #       plot_ref_genome = FALSE, 
1866 61914235 Florent Chuffart
    #       plot_arrow_raw_reads = FALSE,  
1867 61914235 Florent Chuffart
    #       plot_arrow_nuc_reads = FALSE,  
1868 61914235 Florent Chuffart
    #       plot_gaussian_reads = FALSE,  
1869 61914235 Florent Chuffart
    #       plot_gaussian_unified_reads = FALSE,  
1870 61914235 Florent Chuffart
    #       plot_ellipse_nucs = FALSE,  
1871 61914235 Florent Chuffart
    #       plot_wp_nucs = FALSE,  
1872 61914235 Florent Chuffart
    #       plot_wp_nuc_model = FALSE,  
1873 61914235 Florent Chuffart
    #       plot_common_nucs = FALSE,  
1874 61914235 Florent Chuffart
    #       height = 50)
1875 61914235 Florent Chuffart
  })
1876 61914235 Florent Chuffart
1877 61914235 Florent Chuffart
1878 61914235 Florent Chuffart
1879 61914235 Florent Chuffart
1880 61914235 Florent Chuffart
1881 61914235 Florent Chuffart
1882 61914235 Florent Chuffart
1883 61914235 Florent Chuffart
1884 61914235 Florent Chuffart
1885 61914235 Florent Chuffart
1886 61914235 Florent Chuffart
1887 61914235 Florent Chuffart
1888 61914235 Florent Chuffart
1889 61914235 Florent Chuffart
1890 61914235 Florent Chuffart
1891 61914235 Florent Chuffart
1892 61914235 Florent Chuffart
1893 61914235 Florent Chuffart
1894 61914235 Florent Chuffart
1895 61914235 Florent Chuffart
1896 61914235 Florent Chuffart
1897 61914235 Florent Chuffart
1898 61914235 Florent Chuffart
1899 61914235 Florent Chuffart
1900 61914235 Florent Chuffart
1901 61914235 Florent Chuffart
1902 61914235 Florent Chuffart
1903 61914235 Florent Chuffart
1904 61914235 Florent Chuffart
1905 61914235 Florent Chuffart
1906 61914235 Florent Chuffart
1907 61914235 Florent Chuffart
1908 61914235 Florent Chuffart
1909 1d833b97 Florent Chuffart
1910 1d833b97 Florent Chuffart
1911 1d833b97 Florent Chuffart
1912 1d833b97 Florent Chuffart
1913 1d833b97 Florent Chuffart
1914 1d833b97 Florent Chuffart
1915 1d833b97 Florent Chuffart
1916 1d833b97 Florent Chuffart
1917 1d833b97 Florent Chuffart
1918 1d833b97 Florent Chuffart
1919 1d833b97 Florent Chuffart
1920 1d833b97 Florent Chuffart
1921 1d833b97 Florent Chuffart
1922 1d833b97 Florent Chuffart
1923 1d833b97 Florent Chuffart
1924 1d833b97 Florent Chuffart
1925 1d833b97 Florent Chuffart
1926 1d833b97 Florent Chuffart
1927 1d833b97 Florent Chuffart
1928 1d833b97 Florent Chuffart
1929 1d833b97 Florent Chuffart
1930 1d833b97 Florent Chuffart
1931 1d833b97 Florent Chuffart
1932 1d833b97 Florent Chuffart
1933 1d833b97 Florent Chuffart
1934 1d833b97 Florent Chuffart
1935 1d833b97 Florent Chuffart
1936 1d833b97 Florent Chuffart
1937 1d833b97 Florent Chuffart
1938 1d833b97 Florent Chuffart
1939 1d833b97 Florent Chuffart
1940 1d833b97 Florent Chuffart
1941 1d833b97 Florent Chuffart
1942 1d833b97 Florent Chuffart
1943 6b5a528c Florent Chuffart
1944 6b5a528c Florent Chuffart
1945 6b5a528c Florent Chuffart
1946 6b5a528c Florent Chuffart
1947 6b5a528c Florent Chuffart
1948 6b5a528c Florent Chuffart
1949 6b5a528c Florent Chuffart
1950 6b5a528c Florent Chuffart
1951 6b5a528c Florent Chuffart
1952 6b5a528c Florent Chuffart
1953 6b5a528c Florent Chuffart
1954 6b5a528c Florent Chuffart
1955 6b5a528c Florent Chuffart
1956 6b5a528c Florent Chuffart
1957 6b5a528c Florent Chuffart
1958 6b5a528c Florent Chuffart
1959 6b5a528c Florent Chuffart
1960 6b5a528c Florent Chuffart
1961 6b5a528c Florent Chuffart
1962 6b5a528c Florent Chuffart
1963 6b5a528c Florent Chuffart
1964 6b5a528c Florent Chuffart
1965 6b5a528c Florent Chuffart
1966 6b5a528c Florent Chuffart
1967 6b5a528c Florent Chuffart
1968 6b5a528c Florent Chuffart
1969 6b5a528c Florent Chuffart
1970 6b5a528c Florent Chuffart
1971 6b5a528c Florent Chuffart
1972 6b5a528c Florent Chuffart
1973 6b5a528c Florent Chuffart
1974 6b5a528c Florent Chuffart
1975 6b5a528c Florent Chuffart
1976 6b5a528c Florent Chuffart
1977 6b5a528c Florent Chuffart
perform_anovas = function(# Performaing ANOVAs
1978 6b5a528c Florent Chuffart
### Counts reads and Performs ANOVAS for each common nucleosomes involved.
1979 6b5a528c Florent Chuffart
replicates, ##<< Set of replicates, each replicate is a list of samples (ideally 3). Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}. In the \emph{perform_anovas} contexte, we need 4 replicates (4 * (3 samples)): 2 strains * (1 marker + 1 input (Mnase_Seq)). 
1980 6b5a528c Florent Chuffart
aligned_inter_strain_nucs, ##<< List of common nucleosomes.
1981 6b5a528c Florent Chuffart
inputs_name="Mnase_Seq", ##<< Name of the input.
1982 6b5a528c Florent Chuffart
plot_anova_boxes=FALSE ##<< Plot (or not) boxplot for each nuc.
1983 6b5a528c Florent Chuffart
) {
1984 6b5a528c Florent Chuffart
	anova_results = NULL
1985 6b5a528c Florent Chuffart
	for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
1986 6b5a528c Florent Chuffart
		inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
1987 6b5a528c Florent Chuffart
	  # counting reads
1988 6b5a528c Florent Chuffart
		my_data = NULL
1989 6b5a528c Florent Chuffart
		for (replicate_rank in 1:length(replicates)) {
1990 6b5a528c Florent Chuffart
			samples = replicates[[replicate_rank]]
1991 6b5a528c Florent Chuffart
			strain = samples[[1]]$strain
1992 6b5a528c Florent Chuffart
			marker = samples[[1]]$marker
1993 6b5a528c Florent Chuffart
			for (sample_rank in 1:length(samples)) {
1994 6b5a528c Florent Chuffart
				sample = samples[[sample_rank]]
1995 6b5a528c Florent Chuffart
				nuc_width = as.integer(inter_strain_nuc[[paste("upper_bound_",strain,sep="")]]) - inter_strain_nuc[[paste("lower_bound_",strain,sep="")]]
1996 6b5a528c Florent Chuffart
				nuc_reads = filter_tf_inputs(sample$inputs, inter_strain_nuc[[paste("chr_",strain,sep="")]], inter_strain_nuc[[paste("lower_bound_",strain,sep="")]], inter_strain_nuc[[paste("upper_bound_",strain,sep="")]], nuc_width - 1)
1997 6b5a528c Florent Chuffart
				indic = sum(nuc_reads[,4]) * 1000000/sample$total_reads / nuc_width 
1998 6b5a528c Florent Chuffart
				my_data = dfadd(my_data, list(strain = strain, marker = marker, indic = indic))
1999 6b5a528c Florent Chuffart
			}
2000 6b5a528c Florent Chuffart
		}
2001 6b5a528c Florent Chuffart
		my_data$strain = as.factor(my_data$strain)
2002 6b5a528c Florent Chuffart
		my_data$marker = as.factor(my_data$marker)
2003 6b5a528c Florent Chuffart
	
2004 6b5a528c Florent Chuffart
		strain_ref1 = replicates[[1]][[1]]$strain
2005 6b5a528c Florent Chuffart
		strain_ref2 = replicates[[2]][[1]]$strain 
2006 6b5a528c Florent Chuffart
		marker = replicates[[length(replicates)]][[1]]$marker				
2007 6b5a528c Florent Chuffart
2008 6b5a528c Florent Chuffart
	  # Collecting anova results
2009 6b5a528c Florent Chuffart
	  anova_result = list()
2010 6b5a528c Florent Chuffart
		# nucs info
2011 6b5a528c Florent Chuffart
		for (name in names(inter_strain_nuc)) {
2012 6b5a528c Florent Chuffart
			anova_result[[name]] = inter_strain_nuc[[name]]
2013 6b5a528c Florent Chuffart
		}
2014 6b5a528c Florent Chuffart
	  #
2015 6b5a528c Florent Chuffart
		for (strain in c(strain_ref1, strain_ref2)) {
2016 6b5a528c Florent Chuffart
			for (manip in c(inputs_name, marker)) {
2017 6b5a528c Florent Chuffart
				replicat = 1
2018 6b5a528c Florent Chuffart
				for(indic in my_data[my_data$strain == strain & my_data$marker == manip,]$indic) {
2019 6b5a528c Florent Chuffart
					anova_result[[paste(strain, manip, replicat, sep="_")]] = indic
2020 6b5a528c Florent Chuffart
					replicat = replicat + 1
2021 6b5a528c Florent Chuffart
				}			
2022 6b5a528c Florent Chuffart
			}	
2023 6b5a528c Florent Chuffart
		}
2024 6b5a528c Florent Chuffart
2025 6b5a528c Florent Chuffart
	  # Compute ANOVAs
2026 6b5a528c Florent Chuffart
		mnase_data = my_data[my_data$marker == inputs_name,]
2027 6b5a528c Florent Chuffart
		mnase_aov = aov(indic~strain,mnase_data)
2028 6b5a528c Florent Chuffart
		mnase_effects = model.tables(mnase_aov)$tables
2029 6b5a528c Florent Chuffart
		mnase_pvalues = summary(mnase_aov)[[1]]$Pr
2030 6b5a528c Florent Chuffart
		# boxplot(indic~ma*st,mnase_data)
2031 6b5a528c Florent Chuffart
		anova_result[["mnase_st"]] = mnase_effects$strain[1]
2032 6b5a528c Florent Chuffart
		anova_result[["mnase_st_pvalue"]] = mnase_pvalues[1]	
2033 6b5a528c Florent Chuffart
		# 
2034 6b5a528c Florent Chuffart
		marker_data = my_data[my_data$marker == marker,]
2035 6b5a528c Florent Chuffart
		marker_aov = aov(indic~strain,marker_data)
2036 6b5a528c Florent Chuffart
		marker_effects = model.tables(marker_aov)$tables
2037 6b5a528c Florent Chuffart
		marker_pvalues = summary(marker_aov)[[1]]$Pr
2038 6b5a528c Florent Chuffart
		# boxplot(indic~ma*st,marker_data)
2039 6b5a528c Florent Chuffart
		anova_result[["marker_st"]] = marker_effects$strain[1]
2040 6b5a528c Florent Chuffart
		anova_result[["marker_st_pvalue"]] = marker_pvalues[1]
2041 6b5a528c Florent Chuffart
		# 
2042 6b5a528c Florent Chuffart
		st1_data = my_data[my_data$strain == strain_ref1,]
2043 6b5a528c Florent Chuffart
		st1_aov = aov(indic~marker,st1_data)
2044 6b5a528c Florent Chuffart
		st1_effects = model.tables(st1_aov)$tables
2045 6b5a528c Florent Chuffart
		st1_pvalues = summary(st1_aov)[[1]]$Pr
2046 6b5a528c Florent Chuffart
		# boxplot(indic~ma*st,st1_data)
2047 6b5a528c Florent Chuffart
		anova_result[["st1_ma"]]=st1_effects$ma[inputs_name]
2048 6b5a528c Florent Chuffart
		anova_result[["st1_ma_pvalue"]]=st1_pvalues[1]
2049 6b5a528c Florent Chuffart
		# 
2050 6b5a528c Florent Chuffart
		st2_data = my_data[my_data$strain == strain_ref2,]
2051 6b5a528c Florent Chuffart
		st2_aov = aov(indic~marker,st2_data)
2052 6b5a528c Florent Chuffart
		st2_effects = model.tables(st2_aov)$tables
2053 6b5a528c Florent Chuffart
		st2_pvalues = summary(st2_aov)[[1]]$Pr
2054 6b5a528c Florent Chuffart
		# boxplot(indic~ma*st,st2_data)
2055 6b5a528c Florent Chuffart
		anova_result[["st2_ma"]]=st2_effects$ma[inputs_name]
2056 6b5a528c Florent Chuffart
		anova_result[["st2_ma_pvalue"]]=st2_pvalues[1]
2057 6b5a528c Florent Chuffart
		# 
2058 6b5a528c Florent Chuffart
		correl_data = my_data
2059 6b5a528c Florent Chuffart
		correl_aov = aov(indic~strain*marker,correl_data)
2060 6b5a528c Florent Chuffart
		correl_effects = model.tables(correl_aov)$tables
2061 6b5a528c Florent Chuffart
		correl_pvalues = summary(correl_aov)[[1]]$Pr
2062 6b5a528c Florent Chuffart
		if (plot_anova_boxes) {
2063 6b5a528c Florent Chuffart
			x11()
2064 6b5a528c Florent Chuffart
			boxplot(indic~marker*strain,correl_data)
2065 6b5a528c Florent Chuffart
		}
2066 6b5a528c Florent Chuffart
		anova_result[["correl_st"]]=correl_effects$strain[1]
2067 6b5a528c Florent Chuffart
		anova_result[["correl_ma"]]=correl_effects$ma[inputs_name]
2068 6b5a528c Florent Chuffart
		anova_result[["correl_st_ma"]]=correl_effects$"strain:marker"[1,1]
2069 6b5a528c Florent Chuffart
		anova_result[["correl_st_pvalue"]]=correl_pvalues[1]
2070 6b5a528c Florent Chuffart
		anova_result[["correl_ma_pvalue"]]=correl_pvalues[2]
2071 6b5a528c Florent Chuffart
		anova_result[["correl_st_ma_pvalue"]]=correl_pvalues[3]
2072 6b5a528c Florent Chuffart
		
2073 6b5a528c Florent Chuffart
		anova_results = dfadd(anova_results, anova_result)
2074 6b5a528c Florent Chuffart
	}
2075 6b5a528c Florent Chuffart
	return(anova_results)
2076 6b5a528c Florent Chuffart
### Returns ANOVA results and comunted reads.
2077 6b5a528c Florent Chuffart
}
2078 6b5a528c Florent Chuffart
2079 6b5a528c Florent Chuffart
2080 6b5a528c Florent Chuffart
watch_samples = function(# Watching analysis of samples 
2081 6b5a528c Florent Chuffart
### This function allows to view analysis for a particuler region of the genome.
2082 6b5a528c Florent Chuffart
replicates, ##<< replicates under the form...
2083 6b5a528c Florent Chuffart
read_length, ##<< length of the reads
2084 6b5a528c Florent Chuffart
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
2085 6b5a528c Florent Chuffart
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
2086 6b5a528c Florent Chuffart
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
2087 6b5a528c Florent Chuffart
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
2088 6b5a528c Florent Chuffart
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
2089 6b5a528c Florent Chuffart
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
2090 6b5a528c Florent Chuffart
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
2091 6b5a528c Florent Chuffart
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
2092 7646593d Florent Chuffart
change_col = TRUE, ##<< Change the color of each nucleosome.
2093 6b5a528c Florent Chuffart
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
2094 6b5a528c Florent Chuffart
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
2095 6b5a528c Florent Chuffart
plot_common_nucs = TRUE,  ##<< Plot (or not) aligned reads.
2096 6b5a528c Florent Chuffart
plot_anovas = FALSE,  ##<< Plot (or not) scatter for each nuc.
2097 6b5a528c Florent Chuffart
plot_anova_boxes =  FALSE,  ##<< Plot (or not) boxplot for each nuc.
2098 6b5a528c Florent Chuffart
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
2099 729c934e Florent Chuffart
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
2100 6b5a528c Florent Chuffart
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed. 
2101 6b5a528c Florent Chuffart
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
2102 1d833b97 Florent Chuffart
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
2103 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
2104 6b5a528c Florent Chuffart
){
2105 6b5a528c Florent Chuffart
  returned_list = list()
2106 6b5a528c Florent Chuffart
  # Computing global display parameters
2107 6b5a528c Florent Chuffart
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
2108 6b5a528c Florent Chuffart
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
2109 6b5a528c Florent Chuffart
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
2110 6b5a528c Florent Chuffart
  } else {
2111 6b5a528c Florent Chuffart
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
2112 6b5a528c Florent Chuffart
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
2113 6b5a528c Florent Chuffart
  }
2114 6b5a528c Florent Chuffart
	base_glo = 0
2115 6b5a528c Florent Chuffart
	nb_rank_glo = 0
2116 6b5a528c Florent Chuffart
  for (samples in replicates) {
2117 6b5a528c Florent Chuffart
  	nb_rank_glo = nb_rank_glo + length(samples)
2118 6b5a528c Florent Chuffart
  }
2119 6b5a528c Florent Chuffart
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
2120 6b5a528c Florent Chuffart
	y_min_glo = min(ylim_glo)
2121 6b5a528c Florent Chuffart
	y_max_glo = max(ylim_glo)
2122 6b5a528c Florent Chuffart
  delta_y_glo = y_max_glo - y_min_glo
2123 6b5a528c Florent Chuffart
  # Plot main frame
2124 935a568c Florent Chuffart
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab="#reads (per million reads)", xlab=paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr), main="NucleoMiner2" )
2125 6b5a528c Florent Chuffart
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
2126 6b5a528c Florent Chuffart
  # Go
2127 6b5a528c Florent Chuffart
	replicates_wp_nucs = list()
2128 6b5a528c Florent Chuffart
  for (replicate_rank in 1:length(replicates)) {
2129 6b5a528c Florent Chuffart
		# Computing replicate parameters
2130 6b5a528c Florent Chuffart
		nb_rank = length(samples)
2131 6b5a528c Florent Chuffart
		base = (replicate_rank-1) * height * nb_rank
2132 6b5a528c Florent Chuffart
		ylim = c(base, base + height * nb_rank)
2133 6b5a528c Florent Chuffart
		y_min = min(ylim)
2134 6b5a528c Florent Chuffart
		y_max = max(ylim)
2135 6b5a528c Florent Chuffart
	  delta_y = y_max - y_min
2136 6b5a528c Florent Chuffart
		samples = replicates[[replicate_rank]]
2137 6b5a528c Florent Chuffart
		for (sample_rank in 1:length(samples)) {
2138 6b5a528c Florent Chuffart
			# computing sample parameters
2139 6b5a528c Florent Chuffart
			sample = samples[[sample_rank]]
2140 6b5a528c Florent Chuffart
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
2141 6b5a528c Florent Chuffart
			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
2142 6b5a528c Florent Chuffart
2143 6b5a528c Florent Chuffart
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2144 6b5a528c Florent Chuffart
			  x_min = sample$roi[["begin"]]
2145 6b5a528c Florent Chuffart
			  x_max = sample$roi[["end"]]
2146 6b5a528c Florent Chuffart
		  } else {
2147 6b5a528c Florent Chuffart
			  x_min = - sample$roi[["begin"]]
2148 6b5a528c Florent Chuffart
			  x_max = - sample$roi[["end"]]
2149 6b5a528c Florent Chuffart
		  }		
2150 6b5a528c Florent Chuffart
			shift = x_min_glo - x_min
2151 6b5a528c Florent Chuffart
	    # Plot Genome seq
2152 6b5a528c Florent Chuffart
			if (plot_ref_genome) {
2153 6b5a528c Florent Chuffart
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
2154 6b5a528c Florent Chuffart
		  }
2155 6b5a528c Florent Chuffart
			# Plot reads
2156 6b5a528c Florent Chuffart
			reads = sample$inputs
2157 6b5a528c Florent Chuffart
			signs = sign_from_strand(reads[,3])	
2158 6b5a528c Florent Chuffart
			if (plot_arrow_raw_reads) {
2159 6b5a528c Florent Chuffart
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, 
2160 6b5a528c Florent Chuffart
				col=1, length=0.15/nb_rank)          	
2161 6b5a528c Florent Chuffart
			}
2162 6b5a528c Florent Chuffart
	    if (plot_squared_reads) {    	
2163 6b5a528c Florent Chuffart
        # require(plotrix)
2164 6b5a528c Florent Chuffart
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
2165 6b5a528c Florent Chuffart
			}
2166 6b5a528c Florent Chuffart
	    if (plot_coverage) {    	
2167 a52f85d7 Florent Chuffart
        if (length(reads[,1]) != 0) {
2168 a52f85d7 Florent Chuffart
          step_h = sign(x_min) * signs * reads[,4]
2169 a52f85d7 Florent Chuffart
          step_b = sign(x_min) * reads[,2] + shift
2170 a52f85d7 Florent Chuffart
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
2171 a52f85d7 Florent Chuffart
          steps_x = min(step_b, step_e):max(step_b, step_e)          
2172 a52f85d7 Florent Chuffart
          steps_y = rep(0, length(steps_x))
2173 a52f85d7 Florent Chuffart
          for (i in 1:length(step_h)) {          
2174 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
2175 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
2176 a52f85d7 Florent Chuffart
          }
2177 a52f85d7 Florent Chuffart
          tmp_index = which(steps_y != 0)
2178 a52f85d7 Florent Chuffart
          steps_x = steps_x[tmp_index]
2179 a52f85d7 Florent Chuffart
          steps_y = steps_y[tmp_index]
2180 a52f85d7 Florent Chuffart
          tmp_current_level = 0
2181 a52f85d7 Florent Chuffart
          for (i in 1:length(steps_y)) {          
2182 a52f85d7 Florent Chuffart
            steps_y[i] = tmp_current_level + steps_y[i]
2183 a52f85d7 Florent Chuffart
            tmp_current_level = steps_y[i]
2184 a52f85d7 Florent Chuffart
          }
2185 a52f85d7 Florent Chuffart
          steps_y = c(0, steps_y)
2186 a52f85d7 Florent Chuffart
          steps_y = steps_y * 1000000/sample$total_reads
2187 a52f85d7 Florent Chuffart
        } else {
2188 a52f85d7 Florent Chuffart
          steps_y = c(0, 0, 0)
2189 1e310114 Florent Chuffart
          steps_x = c(x_min, x_max)
2190 6b5a528c Florent Chuffart
        }
2191 0d1475ac Florent Chuffart
        # print(steps_x)
2192 0d1475ac Florent Chuffart
        # print(steps_y)
2193 6b5a528c Florent Chuffart
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
2194 6b5a528c Florent Chuffart
        abline(y_lev,0)
2195 6b5a528c Florent Chuffart
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
2196 6b5a528c Florent Chuffart
			}
2197 6b5a528c Florent Chuffart
			# Plot nucs
2198 6b5a528c Florent Chuffart
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
2199 6b5a528c Florent Chuffart
				nucs = sample$outputs
2200 6b5a528c Florent Chuffart
				if (length(nucs$center) > 0) {
2201 6b5a528c Florent Chuffart
					col = 1
2202 6b5a528c Florent Chuffart
		      for (i in 1:length(nucs$center)) {
2203 7646593d Florent Chuffart
            if (change_col) {
2204 7646593d Florent Chuffart
  						col = col + 1              
2205 7646593d Florent Chuffart
            }
2206 6b5a528c Florent Chuffart
		        nuc = nucs[i,]
2207 6b5a528c Florent Chuffart
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
2208 6b5a528c Florent Chuffart
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
2209 6b5a528c Florent Chuffart
						total_involved_reads = sum(involved_reads[,4]) 
2210 6b5a528c Florent Chuffart
						if (plot_arrow_nuc_reads ) {
2211 6b5a528c Florent Chuffart
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, 
2212 6b5a528c Florent Chuffart
							col=col, length=0.15/nb_rank)          	
2213 6b5a528c Florent Chuffart
						}
2214 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
2215 6b5a528c Florent Chuffart
  						flatted_reads = flat_reads(involved_reads, nuc$width)
2216 6b5a528c Florent Chuffart
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
2217 6b5a528c Florent Chuffart
		  			}
2218 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads ) {
2219 6b5a528c Florent Chuffart
							flatted_reads = flat_reads(involved_reads, nuc$width)
2220 6b5a528c Florent Chuffart
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
2221 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
2222 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
2223 6b5a528c Florent Chuffart
	          }
2224 6b5a528c Florent Chuffart
	          if (plot_gaussian_unified_reads ) {
2225 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=2)
2226 6b5a528c Florent Chuffart
	          }
2227 6b5a528c Florent Chuffart
	          if (plot_ellipse_nucs) {
2228 6b5a528c Florent Chuffart
				      # require(plotrix)
2229 883439d6 Florent Chuffart
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col) 
2230 6b5a528c Florent Chuffart
						}
2231 6b5a528c Florent Chuffart
		      }
2232 6b5a528c Florent Chuffart
		    } else {
2233 6b5a528c Florent Chuffart
		      print("WARNING! No nucs to print.")
2234 6b5a528c Florent Chuffart
		    }
2235 6b5a528c Florent Chuffart
			}
2236 6b5a528c Florent Chuffart
	  }
2237 729c934e Florent Chuffart
    
2238 6b5a528c Florent Chuffart
	  # Plot wp nucs
2239 729c934e Florent Chuffart
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_common_nucs | plot_chain)) {
2240 6b5a528c Florent Chuffart
			if (samples[[1]]$marker == "Mnase_Seq") {
2241 7646593d Florent Chuffart
				if (is.null(aggregated_intra_strain_nucs)) {      
2242 6b5a528c Florent Chuffart
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]			
2243 6b5a528c Florent Chuffart
				} else {
2244 6b5a528c Florent Chuffart
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
2245 6b5a528c Florent Chuffart
				}
2246 6b5a528c Florent Chuffart
		  } else {
2247 6b5a528c Florent Chuffart
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
2248 6b5a528c Florent Chuffart
		  }
2249 729c934e Florent Chuffart
      if (plot_chain) {
2250 729c934e Florent Chuffart
        tf_nucs = lapply(wp_nucs, function(nuc) {      
2251 729c934e Florent Chuffart
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
2252 729c934e Florent Chuffart
            tmp_nuc = tmp_nuc[[1]]
2253 729c934e Florent Chuffart
            tmp_nuc$inputs = NULL
2254 729c934e Florent Chuffart
            tmp_nuc$original_reads = NULL
2255 729c934e Florent Chuffart
            tmp_nuc$wp = nuc$wp
2256 729c934e Florent Chuffart
            # print(tmp_nuc)
2257 729c934e Florent Chuffart
            return(tmp_nuc)
2258 dc58ee6c Florent Chuffart
          }) 
2259 dc58ee6c Florent Chuffart
          return(do.call(rbind, bar))
2260 dc58ee6c Florent Chuffart
        })
2261 729c934e Florent Chuffart
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
2262 dc58ee6c Florent Chuffart
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2        
2263 dc58ee6c Florent Chuffart
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
2264 dc58ee6c Florent Chuffart
        tmp_y_prev = tmp_y[-length(tmp_y)]
2265 dc58ee6c Florent Chuffart
        tmp_y_next = tmp_y[-1]
2266 dc58ee6c Florent Chuffart
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
2267 dc58ee6c Florent Chuffart
        tmp_track = unlist(tf_nucs$track)
2268 dc58ee6c Florent Chuffart
        tmp_track_prev = tmp_track[-length(tmp_track)]
2269 dc58ee6c Florent Chuffart
        tmp_track_next = tmp_track[-1]
2270 dc58ee6c Florent Chuffart
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
2271 dc58ee6c Florent Chuffart
        tmp_x_prev = tmp_x[-length(tmp_x)]
2272 dc58ee6c Florent Chuffart
        tmp_x_next = tmp_x[-1]
2273 dc58ee6c Florent Chuffart
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
2274 dc58ee6c Florent Chuffart
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
2275 dc58ee6c Florent Chuffart
        tmp_lod_inter =signif(unlist(tf_nucs$lod_score)[-1], 2)
2276 dc58ee6c Florent Chuffart
        new_tmp_x = c()               
2277 dc58ee6c Florent Chuffart
        new_tmp_y = c()              
2278 dc58ee6c Florent Chuffart
        index_odd = 1:length(tmp_x) * 2 - 1
2279 dc58ee6c Florent Chuffart
        index_even = (1:(length(tmp_x) - 1)) * 2 
2280 dc58ee6c Florent Chuffart
        new_tmp_x[index_odd] = tmp_x  
2281 dc58ee6c Florent Chuffart
        new_tmp_y[index_odd] = tmp_y
2282 dc58ee6c Florent Chuffart
        new_tmp_x[index_even] = tmp_x_inter
2283 dc58ee6c Florent Chuffart
        new_tmp_y[index_even] = tmp_y_inter
2284 dc58ee6c Florent Chuffart
        lines(new_tmp_x , new_tmp_y, lw=2)
2285 dc58ee6c Florent Chuffart
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
2286 dc58ee6c Florent Chuffart
        points(tmp_x, tmp_y, cex=4, lw=2)
2287 dc58ee6c Florent Chuffart
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
2288 7646593d Florent Chuffart
        text(tmp_x_inter, tmp_y_inter, tmp_lod_inter, srt=90, cex=0.9, bg = "yellow")#, col=(tmp_lod_inter < 20) + 2)  
2289 729c934e Florent Chuffart
      }
2290 729c934e Florent Chuffart
  
2291 729c934e Florent Chuffart
      if (plot_wp_nucs | plot_common_nucs ) {
2292 729c934e Florent Chuffart
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
2293 729c934e Florent Chuffart
  			for (wp_nuc in wp_nucs) {
2294 729c934e Florent Chuffart
  				if (wp_nuc$wp){
2295 729c934e Florent Chuffart
  					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2296 729c934e Florent Chuffart
  					all_original_reads = c()
2297 729c934e Florent Chuffart
  					for(initial_nuc in wp_nuc$nucs) {
2298 729c934e Florent Chuffart
  						all_original_reads = c(all_original_reads, initial_nuc$original_reads)						
2299 729c934e Florent Chuffart
  					}
2300 729c934e Florent Chuffart
  					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2301 729c934e Florent Chuffart
  					if (FALSE) {
2302 729c934e Florent Chuffart
  					  rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col="#EEEEEE", border=1)
2303 729c934e Florent Chuffart
  				  }
2304 729c934e Florent Chuffart
  					if (plot_wp_nuc_model) {
2305 729c934e Florent Chuffart
  					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)				
2306 729c934e Florent Chuffart
  				  }
2307 729c934e Florent Chuffart
  				}
2308 729c934e Florent Chuffart
  			}
2309 729c934e Florent Chuffart
      }
2310 6b5a528c Florent Chuffart
		}
2311 6b5a528c Florent Chuffart
	}	
2312 6b5a528c Florent Chuffart
  
2313 6b5a528c Florent Chuffart
	if (plot_common_nucs | plot_anovas | plot_anova_boxes) {
2314 6b5a528c Florent Chuffart
		if (is.null(aligned_inter_strain_nucs)) {
2315 1d833b97 Florent Chuffart
			aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2316 6b5a528c Florent Chuffart
		}
2317 6b5a528c Florent Chuffart
		if (plot_common_nucs) {
2318 6b5a528c Florent Chuffart
      #Plot common wp nucs
2319 6b5a528c Florent Chuffart
      mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2320 6b5a528c Florent Chuffart
            for (replicate_rank in 1:length(replicates)) {
2321 6b5a528c Florent Chuffart
        nb_rank[[replicate_rank]] = length(samples)
2322 6b5a528c Florent Chuffart
        base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2323 6b5a528c Florent Chuffart
        ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2324 6b5a528c Florent Chuffart
        y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2325 6b5a528c Florent Chuffart
        y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2326 6b5a528c Florent Chuffart
        delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2327 6b5a528c Florent Chuffart
        mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2328 6b5a528c Florent Chuffart
      
2329 6b5a528c Florent Chuffart
        samples = replicates[[replicate_rank]]
2330 6b5a528c Florent Chuffart
        for (sample_rank in 1:length(samples)) {
2331 6b5a528c Florent Chuffart
          sample = samples[[sample_rank]]
2332 6b5a528c Florent Chuffart
          y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2333 6b5a528c Florent Chuffart
          if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2334 6b5a528c Florent Chuffart
            x_min[[replicate_rank]] = sample$roi[["begin"]]
2335 6b5a528c Florent Chuffart
            x_max[[replicate_rank]] = sample$roi[["end"]]
2336 6b5a528c Florent Chuffart
          } else {
2337 6b5a528c Florent Chuffart
            x_min[[replicate_rank]] = - sample$roi[["begin"]]
2338 6b5a528c Florent Chuffart
            x_max[[replicate_rank]] = - sample$roi[["end"]]
2339 6b5a528c Florent Chuffart
          }
2340 6b5a528c Florent Chuffart
          shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2341 6b5a528c Florent Chuffart
        }
2342 6b5a528c Florent Chuffart
      }  
2343 6b5a528c Florent Chuffart
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2344 6b5a528c Florent Chuffart
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2345 6b5a528c Florent Chuffart
        tmp_xs = tmp_ys = c()
2346 6b5a528c Florent Chuffart
        for (replicate_rank in 1:length(replicates)) {
2347 6b5a528c Florent Chuffart
          samples = replicates[[replicate_rank]]
2348 6b5a528c Florent Chuffart
          strain = samples[[1]]$strain
2349 6b5a528c Florent Chuffart
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2350 6b5a528c Florent Chuffart
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2351 6b5a528c Florent Chuffart
        }
2352 6b5a528c Florent Chuffart
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]]), cex=dev.size()[1]*200/(x_max[[1]]-x_min[[1]]), pch=19)          
2353 6b5a528c Florent Chuffart
      }    
2354 6b5a528c Florent Chuffart
		}
2355 6b5a528c Florent Chuffart
		if (plot_anovas | plot_anova_boxes) {
2356 6b5a528c Florent Chuffart
			anova_results = perform_anovas(replicates, aligned_inter_strain_nucs, plot_anova_boxes=plot_anova_boxes)
2357 6b5a528c Florent Chuffart
			thres = FDR(anova_results[,"correl_st_ma_pvalue"],0.05)
2358 6b5a528c Florent Chuffart
			if (is.na(thres)) {
2359 6b5a528c Florent Chuffart
				# Boneferroni multiple test
2360 6b5a528c Florent Chuffart
				thres = 0.05/length(anova_results[,1])
2361 6b5a528c Florent Chuffart
			}
2362 6b5a528c Florent Chuffart
			filtred_anova_results = anova_results[anova_results[,"correl_st_ma_pvalue"]<thres,]
2363 6b5a528c Florent Chuffart
			x11()
2364 6b5a528c Florent Chuffart
			if (plot_anovas) {
2365 6b5a528c Florent Chuffart
			  plot(anova_results[,"mnase_st"],anova_results[,"correl_st_ma"], pch=1, xlim=c(-0.07,0.07), ylim=c(-0.07,0.07),main="SNEPs" ,xlab = "strain effect", ylab = "snep effect")
2366 6b5a528c Florent Chuffart
	      points(x=filtred_anova_results[,"mnase_st"],y=filtred_anova_results[,"correl_st_ma"],col=2, pch="+")
2367 6b5a528c Florent Chuffart
		  }
2368 6b5a528c Florent Chuffart
 	  }
2369 6b5a528c Florent Chuffart
	}
2370 6b5a528c Florent Chuffart
  return(returned_list)
2371 6b5a528c Florent Chuffart
}
2372 6b5a528c Florent Chuffart
2373 6b5a528c Florent Chuffart
2374 6b5a528c Florent Chuffart