Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ b20637ed

Historique | Voir | Annoter | Télécharger (88,15 ko)

1 6b5a528c Florent Chuffart
# get_content = function(# Get content from cached 
2 6b5a528c Florent Chuffart
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
3 6b5a528c Florent Chuffart
# obj, ##<< The object that we want its content.
4 6b5a528c Florent Chuffart
# ... ##<< Parameters that will be passed to my_read
5 6b5a528c Florent Chuffart
# ) {
6 6b5a528c Florent Chuffart
#   UseMethod("get_content", obj)
7 6b5a528c Florent Chuffart
#   ### Returns the cached content of the current object. 
8 6b5a528c Florent Chuffart
# }
9 6b5a528c Florent Chuffart
# 
10 6b5a528c Florent Chuffart
# get_content.default = structure( function(# Get content from cached 
11 6b5a528c Florent Chuffart
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
12 6b5a528c Florent Chuffart
# obj, ##<< The object that we want its content.
13 6b5a528c Florent Chuffart
# ... ##<< Parameters that will be passed to my_read
14 6b5a528c Florent Chuffart
# ) {
15 6b5a528c Florent Chuffart
#   if (inherits(try(NM_CACHE,TRUE), "try-error") || is.null(NM_CACHE)) {
16 6b5a528c Florent Chuffart
#     NM_CACHE <<- list()
17 6b5a528c Florent Chuffart
#   }
18 6b5a528c Florent Chuffart
#   if (is.null(NM_CACHE[[obj$filename]])) {
19 6b5a528c Florent Chuffart
#     print(paste("Loading file ",obj$filename, sep=""))
20 6b5a528c Florent Chuffart
#     tmp_content = my_read(obj, ...)
21 6b5a528c Florent Chuffart
#     print("affect it...")
22 6b5a528c Florent Chuffart
#     NM_CACHE[[obj$filename]] <<- tmp_content
23 6b5a528c Florent Chuffart
#     print("done.")
24 6b5a528c Florent Chuffart
#   }  
25 6b5a528c Florent Chuffart
#   return(NM_CACHE[[obj$filename]])
26 6b5a528c Florent Chuffart
# ### Returns the cached content of the current object. 
27 6b5a528c Florent Chuffart
# }, ex=function(){
28 6b5a528c Florent Chuffart
#   # Create a dataframe
29 6b5a528c Florent Chuffart
#   df = NULL
30 6b5a528c Florent Chuffart
#   df = dfadd(df, list(key1 = "value1", key2 = "value2"))
31 6b5a528c Florent Chuffart
#   df = dfadd(df, list(key1 = "value1'", key2 = "value3'"))
32 6b5a528c Florent Chuffart
#   # Dump it into tmp file
33 6b5a528c Florent Chuffart
#   write.table(df, file="/tmp/tmp.dump.table.tmp")
34 6b5a528c Florent Chuffart
#   # Load it into cache using feature.
35 6b5a528c Florent Chuffart
#   cached_content_object  = list(filename="/tmp/tmp.dump.table.tmp")
36 6b5a528c Florent Chuffart
#   class(cached_content_object) = "table"  
37 6b5a528c Florent Chuffart
#   # First time it will be load into cache
38 6b5a528c Florent Chuffart
#   print(get_content(cached_content_object))
39 6b5a528c Florent Chuffart
#   # Second time not
40 6b5a528c Florent Chuffart
#   print(get_content(cached_content_object))
41 6b5a528c Florent Chuffart
#   
42 6b5a528c Florent Chuffart
# })
43 6b5a528c Florent Chuffart
#   
44 6b5a528c Florent Chuffart
# my_read = function(
45 6b5a528c Florent Chuffart
# ### Abstract my_read function.  
46 6b5a528c Florent Chuffart
#   obj, ...) {
47 6b5a528c Florent Chuffart
#   UseMethod("my_read", obj)
48 6b5a528c Florent Chuffart
# }
49 6b5a528c Florent Chuffart
#   
50 6b5a528c Florent Chuffart
# my_read.default = function(
51 6b5a528c Florent Chuffart
# ### Default my_read function.  
52 6b5a528c Florent Chuffart
#   obj, ...){
53 6b5a528c Florent Chuffart
#   stop(paste("ERROR, my_read is not defined for any Objects (file ", obj$filename," )", sep=""))      
54 6b5a528c Florent Chuffart
# }
55 6b5a528c Florent Chuffart
# 
56 6b5a528c Florent Chuffart
# my_read.fasta = function(
57 6b5a528c Florent Chuffart
# ### my_read function for fasta files.  
58 6b5a528c Florent Chuffart
#   obj, ...){
59 6b5a528c Florent Chuffart
#   # require(seqinr)
60 6b5a528c Florent Chuffart
#   return(read.fasta(obj$filename, ...))
61 6b5a528c Florent Chuffart
# }
62 6b5a528c Florent Chuffart
# 
63 6b5a528c Florent Chuffart
# my_read.table = function(
64 6b5a528c Florent Chuffart
# ### my_read function for table files.  
65 6b5a528c Florent Chuffart
#   obj, ...){
66 6b5a528c Florent Chuffart
#   if (rev(unlist(strsplit(obj$filename, ".", fixed=TRUE)))[1] == "gz") {
67 6b5a528c Florent Chuffart
#     return(read.table(file=gzfile(obj$filename), ...))      
68 6b5a528c Florent Chuffart
#   } else { 
69 6b5a528c Florent Chuffart
#     return(read.table(file=obj$filename, ...))
70 6b5a528c Florent Chuffart
#   }
71 6b5a528c Florent Chuffart
# }  
72 6b5a528c Florent Chuffart
# 
73 6b5a528c Florent Chuffart
# my_read.cvs = function(
74 6b5a528c Florent Chuffart
# ### my_read function for cvs files.  
75 6b5a528c Florent Chuffart
#   obj, ...){
76 6b5a528c Florent Chuffart
#   return(read.csv(file=obj$filename, ...))
77 6b5a528c Florent Chuffart
# }  
78 6b5a528c Florent Chuffart
79 6b5a528c Florent Chuffart
80 6b5a528c Florent Chuffart
81 6b5a528c Florent Chuffart
82 6b5a528c Florent Chuffart
83 6b5a528c Florent Chuffart
84 6b5a528c Florent Chuffart
85 6b5a528c Florent Chuffart
86 6b5a528c Florent Chuffart
87 6b5a528c Florent Chuffart
FDR = structure(function#  False Discovery Rate
88 6b5a528c Florent Chuffart
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
89 6b5a528c Florent Chuffart
##author<< Gael Yvert, 
90 6b5a528c Florent Chuffart
(
91 6b5a528c Florent Chuffart
x, ##<< A vector x of independent p-values.
92 6b5a528c Florent Chuffart
FDR ##<< The specified FDR.
93 6b5a528c Florent Chuffart
) {
94 6b5a528c Florent Chuffart
  x <- sort(na.omit(x))
95 6b5a528c Florent Chuffart
  N = length(x)
96 6b5a528c Florent Chuffart
  i = 1;
97 6b5a528c Florent Chuffart
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
98 6b5a528c Florent Chuffart
  if (i == 1)
99 6b5a528c Florent Chuffart
    return (NA)
100 6b5a528c Florent Chuffart
  else
101 6b5a528c Florent Chuffart
    return( x[i-1] )
102 6b5a528c Florent Chuffart
### Return the the corresponding cutoff.
103 6b5a528c Florent Chuffart
}, ex=function(){
104 6b5a528c Florent Chuffart
  print("example")
105 6b5a528c Florent Chuffart
})
106 6b5a528c Florent Chuffart
107 6b5a528c Florent Chuffart
108 6b5a528c Florent Chuffart
109 6b5a528c Florent Chuffart
lod_score_vecs = structure(function # Likelihood ratio
110 6b5a528c Florent Chuffart
### Compute the likelihood log of two set of value from two models Vs. a unique model.
111 6b5a528c Florent Chuffart
(
112 6b5a528c Florent Chuffart
x ,##<< First vector.
113 6b5a528c Florent Chuffart
y ##<< Second vector.
114 6b5a528c Florent Chuffart
) {
115 6b5a528c Florent Chuffart
	if (length(x) <=1 | length(y) <= 1) {
116 6b5a528c Florent Chuffart
		return(NA)
117 6b5a528c Florent Chuffart
	}
118 6b5a528c Florent Chuffart
  meanX = mean(x)
119 6b5a528c Florent Chuffart
  sdX = sd(x)
120 6b5a528c Florent Chuffart
  meanY = mean(y)
121 6b5a528c Florent Chuffart
  sdY = sd(y)
122 6b5a528c Florent Chuffart
  meanXY = mean(c(x,y))
123 6b5a528c Florent Chuffart
  sdXY = sd(c(x,y))
124 6b5a528c Florent Chuffart
  llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
125 6b5a528c Florent Chuffart
  llY = sum(log(dnorm(y,mean=meanY,sd=sdY)))
126 6b5a528c Florent Chuffart
  llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY)))
127 6b5a528c Florent Chuffart
  ratio = -(llX + llY - llXY)
128 6b5a528c Florent Chuffart
  return(ratio)
129 6b5a528c Florent Chuffart
### Returns the likelihood ratio.
130 6b5a528c Florent Chuffart
}, ex=function(){
131 6b5a528c Florent Chuffart
  # LOD score for 2 set of values
132 6b5a528c Florent Chuffart
  mean1=5; sd1=2; card2 = 250
133 6b5a528c Florent Chuffart
  mean2=6; sd2=3; card1 = 200
134 6b5a528c Florent Chuffart
  x1 = rnorm(card1, mean1, sd1)
135 6b5a528c Florent Chuffart
  x2 = rnorm(card2, mean2, sd2)  
136 6b5a528c Florent Chuffart
  min = floor(min(c(x1,x2)))
137 6b5a528c Florent Chuffart
  max = ceiling(max(c(x1,x2)))
138 6b5a528c Florent Chuffart
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
139 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
140 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
141 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
142 6b5a528c Florent Chuffart
  lod_score_vecs(x1,x2)
143 6b5a528c Florent Chuffart
 })
144 6b5a528c Florent Chuffart
145 6b5a528c Florent Chuffart
dfadd = structure(function# Adding list to a dataframe.
146 6b5a528c Florent Chuffart
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
147 6b5a528c Florent Chuffart
	(df, ##<<  A dataframe
148 6b5a528c Florent Chuffart
		l ##<<  A list
149 6b5a528c Florent Chuffart
	) {
150 6b5a528c Florent Chuffart
  if (is.null(df)) {
151 6b5a528c Florent Chuffart
    df = data.frame(l,stringsAsFactors=FALSE)
152 6b5a528c Florent Chuffart
  } else {
153 6b5a528c Florent Chuffart
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
154 6b5a528c Florent Chuffart
  }
155 6b5a528c Florent Chuffart
  return(df)
156 6b5a528c Florent Chuffart
### Return the dataframe \emph{df}.
157 6b5a528c Florent Chuffart
}, ex=function(){
158 6b5a528c Florent Chuffart
		## Here dataframe is NULL
159 6b5a528c Florent Chuffart
		print(df)
160 6b5a528c Florent Chuffart
		df = NULL
161 6b5a528c Florent Chuffart
		
162 6b5a528c Florent Chuffart
		# Initialize df
163 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
164 6b5a528c Florent Chuffart
		print(df)
165 6b5a528c Florent Chuffart
		
166 6b5a528c Florent Chuffart
		# Adding elements to df
167 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
168 6b5a528c Florent Chuffart
		print(df)
169 6b5a528c Florent Chuffart
})
170 6b5a528c Florent Chuffart
171 6b5a528c Florent Chuffart
172 6b5a528c Florent Chuffart
sign_from_strand = function(
173 6b5a528c Florent Chuffart
### Get the sign of strand 
174 6b5a528c Florent Chuffart
strands) {
175 6b5a528c Florent Chuffart
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
176 6b5a528c Florent Chuffart
### If strand in forward then returns 1 else returns -1
177 6b5a528c Florent Chuffart
}
178 6b5a528c Florent Chuffart
179 6b5a528c Florent Chuffart
flat_reads = function(
180 6b5a528c Florent Chuffart
### Extract reads coordinates from TempleteFilter input sequence
181 6b5a528c Florent Chuffart
reads, ##<< TemplateFilter input reads 
182 6b5a528c Florent Chuffart
nuc_width ##<< Width used to shift F and R reads.
183 6b5a528c Florent Chuffart
) {
184 6b5a528c Florent Chuffart
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
185 6b5a528c Florent Chuffart
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
186 6b5a528c Florent Chuffart
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
187 6b5a528c Florent Chuffart
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
188 6b5a528c Florent Chuffart
### Returns a list of F reads, R reads and joint/shifted F and R reads.
189 6b5a528c Florent Chuffart
}
190 6b5a528c Florent Chuffart
191 6b5a528c Florent Chuffart
192 6b5a528c Florent Chuffart
193 6b5a528c Florent Chuffart
filter_tf_outputs = function(# Filter TemplateFilter outputs
194 6b5a528c Florent Chuffart
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlap threshold.
195 6b5a528c Florent Chuffart
tf_outputs, ##<< TemplateFilter outputs.
196 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
197 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
198 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
199 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
200 6b5a528c Florent Chuffart
ol_bp = 59, ##<< Overlap Threshold.
201 6b5a528c Florent Chuffart
corr_thres = 0.5 ##<< Correlation threshold.
202 6b5a528c Florent Chuffart
) {
203 6b5a528c Florent Chuffart
  if (x_min < 0) {
204 6b5a528c Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center > (-x_max - nuc_width/2) & tf_outputs$center <  (-x_min + nuc_width/2),]
205 6b5a528c Florent Chuffart
	} else {
206 6b5a528c Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center > (x_min - nuc_width/2) & tf_outputs$center < (x_max + nuc_width/2),]
207 6b5a528c Florent Chuffart
  }
208 6b5a528c Florent Chuffart
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
209 6b5a528c Florent Chuffart
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
210 6b5a528c Florent Chuffart
  tf_outputs = tf_outputs[tf_outputs$correlation >= corr_thres,]  
211 6b5a528c Florent Chuffart
  tf_outputs = tf_outputs[order(tf_outputs$correlation,decreasing=TRUE),]  
212 6b5a528c Florent Chuffart
  i = 1
213 6b5a528c Florent Chuffart
  while (i <= length(tf_outputs[,1])) {    
214 6b5a528c Florent Chuffart
    lb = tf_outputs[i,]$low 
215 6b5a528c Florent Chuffart
    ub = tf_outputs[i,]$up
216 6b5a528c Florent Chuffart
    tf_outputs = tf_outputs[!(tf_outputs$low <= (ub-ol_bp) & tf_outputs$up > ub) & !(tf_outputs$up >= (lb+ol_bp) & tf_outputs$low < lb),]
217 6b5a528c Florent Chuffart
    i = i+1
218 6b5a528c Florent Chuffart
  }
219 6b5a528c Florent Chuffart
  return(tf_outputs)
220 6b5a528c Florent Chuffart
### Returns filtered TemplateFilter Outputs
221 6b5a528c Florent Chuffart
}
222 6b5a528c Florent Chuffart
223 6b5a528c Florent Chuffart
224 6b5a528c Florent Chuffart
225 6b5a528c Florent Chuffart
filter_tf_inputs = function(# Filter TemplateFilter inputs
226 6b5a528c Florent Chuffart
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
227 6b5a528c Florent Chuffart
inputs, ##<< TF inputs to be filtered.
228 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
229 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
230 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
231 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
232 6b5a528c Florent Chuffart
only_f = FALSE, ##<< Filter only F reads.
233 6b5a528c Florent Chuffart
only_r = FALSE ##<< Filter only R reads.
234 6b5a528c Florent Chuffart
) {
235 6b5a528c Florent Chuffart
	if (only_f) {
236 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
237 6b5a528c Florent Chuffart
	} else if (only_r) {
238 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]			
239 6b5a528c Florent Chuffart
	} else {
240 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
241 6b5a528c Florent Chuffart
	}
242 6b5a528c Florent Chuffart
	corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
243 6b5a528c Florent Chuffart
  inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
244 6b5a528c Florent Chuffart
	return(inputs)
245 6b5a528c Florent Chuffart
### Returns filtred inputs.
246 6b5a528c Florent Chuffart
}
247 6b5a528c Florent Chuffart
248 6b5a528c Florent Chuffart
249 6b5a528c Florent Chuffart
250 6b5a528c Florent Chuffart
get_comp_strand = function(
251 6b5a528c Florent Chuffart
### Compute the complementatry strand.
252 6b5a528c Florent Chuffart
strand ##<< The original strand.
253 6b5a528c Florent Chuffart
) {
254 6b5a528c Florent Chuffart
	apply(t(strand),2, function(n){
255 6b5a528c Florent Chuffart
	  if (n=="a") {return("t")} 
256 6b5a528c Florent Chuffart
		if (n=="t") {return("a")}
257 6b5a528c Florent Chuffart
		if (n=="c") {return("g")} 
258 6b5a528c Florent Chuffart
		if (n=="g") {return("c")} 
259 6b5a528c Florent Chuffart
	})
260 6b5a528c Florent Chuffart
### Returns the complementatry strand.
261 6b5a528c Florent Chuffart
}
262 6b5a528c Florent Chuffart
263 6b5a528c Florent Chuffart
264 6b5a528c Florent Chuffart
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes. 
265 6b5a528c Florent Chuffart
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
266 6b5a528c Florent Chuffart
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
267 6b5a528c Florent Chuffart
lod_thres=-20, ##<< Log likelihood ration threshold.
268 6b5a528c Florent Chuffart
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
269 6b5a528c Florent Chuffart
){
270 6b5a528c Florent Chuffart
	end_of_tracks = function(tracks) {
271 6b5a528c Florent Chuffart
		if (length(tracks) == 0) {
272 6b5a528c Florent Chuffart
			return(TRUE)
273 6b5a528c Florent Chuffart
		}
274 6b5a528c Florent Chuffart
	  for (lower_bound in tracks) {
275 6b5a528c Florent Chuffart
			if(!is.na(lower_bound)) {
276 6b5a528c Florent Chuffart
	      if (lower_bound < coord_max) {
277 6b5a528c Florent Chuffart
	        return(FALSE)
278 6b5a528c Florent Chuffart
	      }
279 6b5a528c Florent Chuffart
	  	}
280 6b5a528c Florent Chuffart
	  }
281 6b5a528c Florent Chuffart
	  return(TRUE)
282 6b5a528c Florent Chuffart
	}
283 6b5a528c Florent Chuffart
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
284 6b5a528c Florent Chuffart
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
285 6b5a528c Florent Chuffart
			new_cluster$wp = TRUE
286 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
287 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
288 6b5a528c Florent Chuffart
		  	clusters[[length(clusters) + 1]] = new_cluster
289 6b5a528c Florent Chuffart
				# print(new_cluster)
290 6b5a528c Florent Chuffart
		  }
291 6b5a528c Florent Chuffart
		} else {
292 6b5a528c Florent Chuffart
			new_cluster$wp = FALSE
293 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
294 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
295 6b5a528c Florent Chuffart
			  clusters[[length(clusters) + 1]] = new_cluster
296 6b5a528c Florent Chuffart
			}
297 6b5a528c Florent Chuffart
		}
298 6b5a528c Florent Chuffart
		return(clusters)
299 6b5a528c Florent Chuffart
	}
300 6b5a528c Florent Chuffart
	strain = samples[[1]]$strain
301 6b5a528c Florent Chuffart
	lod_scores = c()
302 6b5a528c Florent Chuffart
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
303 6b5a528c Florent Chuffart
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
304 6b5a528c Florent Chuffart
  # compute clusters
305 6b5a528c Florent Chuffart
  clusters = list()
306 6b5a528c Florent Chuffart
  cluster_contents = list()
307 6b5a528c Florent Chuffart
  # Init reader
308 6b5a528c Florent Chuffart
  indexes = c()
309 6b5a528c Florent Chuffart
  track_readers = c()
310 6b5a528c Florent Chuffart
  current_nuc = NULL
311 6b5a528c Florent Chuffart
	lod_score = lod_thres - 1 
312 6b5a528c Florent Chuffart
  # Read nucs from TF outputs  
313 6b5a528c Florent Chuffart
  tf_outs = list()
314 6b5a528c Florent Chuffart
	i = 1
315 6b5a528c Florent Chuffart
  for (sample in samples) {
316 6b5a528c Florent Chuffart
		# print(sample$roi$chr)
317 6b5a528c Florent Chuffart
		# print(min_nuc_center)
318 6b5a528c Florent Chuffart
		# print(max_nuc_center)
319 6b5a528c Florent Chuffart
		# print(sample$outputs)
320 6b5a528c Florent Chuffart
		# tf_outs[[i]] = filter_tf_outputs(sample$outputs, sample$roi$chr, min_nuc_center, max_nuc_center)
321 6b5a528c Florent Chuffart
		# print(tf_outs[[i]])
322 6b5a528c Florent Chuffart
		tf_outs[[i]] = sample$outputs
323 6b5a528c Florent Chuffart
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]  
324 6b5a528c Florent Chuffart
    indexes[i] = 1
325 6b5a528c Florent Chuffart
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
326 6b5a528c Florent Chuffart
      track_readers[i] = coord_max				
327 6b5a528c Florent Chuffart
	  } else {
328 6b5a528c Florent Chuffart
      track_readers[i] = tf_outs[[i]][indexes[i],]$center				
329 6b5a528c Florent Chuffart
		}
330 6b5a528c Florent Chuffart
		i = i+1		
331 6b5a528c Florent Chuffart
  }
332 6b5a528c Florent Chuffart
	# print(track_readers)
333 6b5a528c Florent Chuffart
  new_cluster = NULL
334 6b5a528c Florent Chuffart
  nb_nucs_in_cluster = 0
335 6b5a528c Florent Chuffart
  nuc_from_track = c()
336 6b5a528c Florent Chuffart
  for (i in 1:length(tf_outs)){
337 6b5a528c Florent Chuffart
    nuc_from_track[i] = FALSE
338 6b5a528c Florent Chuffart
  }
339 6b5a528c Florent Chuffart
  # Start clustering
340 6b5a528c Florent Chuffart
  while (!end_of_tracks(track_readers)) {
341 6b5a528c Florent Chuffart
    new_center = min(track_readers)
342 6b5a528c Florent Chuffart
		current_track = which(track_readers == new_center)[1]			
343 6b5a528c Florent Chuffart
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
344 6b5a528c Florent Chuffart
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
345 6b5a528c Florent Chuffart
		new_nuc$inputs = samples[[current_track]]$inputs
346 6b5a528c Florent Chuffart
		new_nuc$chr = samples[[current_track]]$roi$chr
347 6b5a528c Florent Chuffart
		new_nuc$track = current_track
348 6b5a528c Florent Chuffart
		
349 6b5a528c Florent Chuffart
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width) 
350 6b5a528c Florent Chuffart
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
351 6b5a528c Florent Chuffart
		new_nuc$original_reads = flatted_reads[[3]]
352 6b5a528c Florent Chuffart
353 6b5a528c Florent Chuffart
    new_upper_bound = new_nuc$upper_bound
354 6b5a528c Florent Chuffart
355 6b5a528c Florent Chuffart
    if (!is.null(current_nuc)) {
356 6b5a528c Florent Chuffart
			lod_score = lod_score_vecs(current_nuc$original_reads,new_nuc$original_reads)
357 6b5a528c Florent Chuffart
			lod_scores = c(lod_scores,lod_score)
358 6b5a528c Florent Chuffart
		}
359 6b5a528c Florent Chuffart
		# print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
360 6b5a528c Florent Chuffart
		if (is.na(lod_score)) {
361 6b5a528c Florent Chuffart
			lod_score = lod_thres - 1 
362 6b5a528c Florent Chuffart
		}
363 6b5a528c Florent Chuffart
	  if (lod_score > lod_thres) {
364 6b5a528c Florent Chuffart
			# Store lod_score
365 6b5a528c Florent Chuffart
			new_nuc$lod_score = lod_score 			
366 6b5a528c Florent Chuffart
      # aggregate to current cluster
367 6b5a528c Florent Chuffart
      #   update bound
368 6b5a528c Florent Chuffart
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
369 6b5a528c Florent Chuffart
        new_cluster$upper_bound = new_nuc$upper_bound      
370 6b5a528c Florent Chuffart
      }
371 6b5a528c Florent Chuffart
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
372 6b5a528c Florent Chuffart
        new_cluster$lower_bound = new_nuc$lower_bound         
373 6b5a528c Florent Chuffart
      }
374 6b5a528c Florent Chuffart
      #   add nucleosome to current cluster
375 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
376 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1 
377 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
378 6b5a528c Florent Chuffart
    } else {
379 6b5a528c Florent Chuffart
			if (!is.null(new_cluster)) {
380 6b5a528c Florent Chuffart
        # store old cluster
381 6b5a528c Florent Chuffart
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
382 6b5a528c Florent Chuffart
			}
383 6b5a528c Florent Chuffart
      # Reinit current cluster composition stuff
384 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = 0
385 6b5a528c Florent Chuffart
      nuc_from_track = c()
386 6b5a528c Florent Chuffart
      for (i in 1:length(tf_outs)){
387 6b5a528c Florent Chuffart
        nuc_from_track[i] = FALSE
388 6b5a528c Florent Chuffart
      }
389 6b5a528c Florent Chuffart
      # create new cluster        
390 6b5a528c Florent Chuffart
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
391 6b5a528c Florent Chuffart
      # update upper bound
392 6b5a528c Florent Chuffart
      current_upper_bound = new_upper_bound
393 6b5a528c Florent Chuffart
      # add nucleosome to current cluster
394 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1 
395 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
396 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
397 6b5a528c Florent Chuffart
				
398 6b5a528c Florent Chuffart
		}
399 6b5a528c Florent Chuffart
			
400 6b5a528c Florent Chuffart
		current_nuc = new_nuc
401 6b5a528c Florent Chuffart
402 6b5a528c Florent Chuffart
    # update indexes
403 6b5a528c Florent Chuffart
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {      
404 6b5a528c Florent Chuffart
      indexes[current_track] = indexes[current_track] + 1
405 6b5a528c Florent Chuffart
      # update track
406 6b5a528c Florent Chuffart
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
407 6b5a528c Florent Chuffart
    } else {
408 6b5a528c Florent Chuffart
      # update track
409 6b5a528c Florent Chuffart
      track_readers[current_track] = coord_max
410 6b5a528c Florent Chuffart
    }
411 6b5a528c Florent Chuffart
  }
412 6b5a528c Florent Chuffart
  # store last cluster
413 6b5a528c Florent Chuffart
  if (!is.null(new_cluster)) {
414 6b5a528c Florent Chuffart
    # store old cluster
415 6b5a528c Florent Chuffart
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
416 6b5a528c Florent Chuffart
  }
417 6b5a528c Florent Chuffart
	return(list(clusters, lod_scores))
418 6b5a528c Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed lod scores.
419 6b5a528c Florent Chuffart
}, ex=function(){	
420 6b5a528c Florent Chuffart
	# Dealing with a region of interest
421 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
422 6b5a528c Florent Chuffart
	samples = list()
423 6b5a528c Florent Chuffart
	for (i in 1:3) {
424 6b5a528c Florent Chuffart
		# Create TF output
425 6b5a528c Florent Chuffart
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
426 6b5a528c Florent Chuffart
		outputs = dfadd(NULL,tf_nuc)
427 6b5a528c Florent Chuffart
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
428 6b5a528c Florent Chuffart
		# Generate corresponding reads
429 6b5a528c Florent Chuffart
		nb_reads = round(runif(1,170,230))
430 6b5a528c Florent Chuffart
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
431 6b5a528c Florent Chuffart
		u_reads = sort(unique(reads))
432 6b5a528c Florent Chuffart
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
433 6b5a528c Florent Chuffart
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
434 6b5a528c Florent Chuffart
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
435 6b5a528c Florent Chuffart
		u_reads = u_reads + shifts
436 6b5a528c Florent Chuffart
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)), 
437 6b5a528c Florent Chuffart
		                         "V2" = u_reads, 
438 6b5a528c Florent Chuffart
														 "V3" = strands, 
439 6b5a528c Florent Chuffart
														 "V4" = counts), stringsAsFactors=FALSE)
440 6b5a528c Florent Chuffart
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
441 6b5a528c Florent Chuffart
	}
442 6b5a528c Florent Chuffart
	print(aggregate_intra_strain_nucs(samples))
443 6b5a528c Florent Chuffart
})
444 6b5a528c Florent Chuffart
445 6b5a528c Florent Chuffart
446 6b5a528c Florent Chuffart
447 6b5a528c Florent Chuffart
448 6b5a528c Florent Chuffart
449 6b5a528c Florent Chuffart
450 6b5a528c Florent Chuffart
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
451 6b5a528c Florent Chuffart
### This function aligns nucs between two strains for a given genome region.
452 6b5a528c Florent Chuffart
replicates, ##<< Set of replicates, ideally 3 per strain. 
453 6b5a528c Florent Chuffart
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
454 6b5a528c Florent Chuffart
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
455 6b5a528c Florent Chuffart
corr_thres=0.5, ##<< Correlation threshold.
456 6b5a528c Florent Chuffart
lod_thres=-100, ##<< LOD cut off.
457 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
458 6b5a528c Florent Chuffart
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.	
459 6b5a528c Florent Chuffart
) {
460 6b5a528c Florent Chuffart
	if (length(replicates) < 2) {
461 6b5a528c Florent Chuffart
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
462 6b5a528c Florent Chuffart
	} else if (length(replicates) > 2) {
463 6b5a528c Florent Chuffart
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")			
464 6b5a528c Florent Chuffart
	}
465 6b5a528c Florent Chuffart
	common_nuc = NULL
466 6b5a528c Florent Chuffart
	lod_scores = c()
467 6b5a528c Florent Chuffart
	chr = replicates[[1]][[1]]$roi$chr
468 6b5a528c Florent Chuffart
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
469 6b5a528c Florent Chuffart
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
470 6b5a528c Florent Chuffart
	strain_ref1 = replicates[[1]][[1]]$strain
471 6b5a528c Florent Chuffart
	strain_ref2 = replicates[[2]][[1]]$strain
472 6b5a528c Florent Chuffart
	big_roi = replicates[[1]][[1]]$roi
473 a0b91fee Florent Chuffart
  orig_big_roi = replicates[[1]][[1]]$orig_roi
474 6b5a528c Florent Chuffart
	if(big_roi$end - big_roi$begin < 0) {
475 6b5a528c Florent Chuffart
		tmp_begin = big_roi$begin
476 6b5a528c Florent Chuffart
		big_roi$begin =  big_roi$end
477 6b5a528c Florent Chuffart
		big_roi$end =  tmp_begin
478 6b5a528c Florent Chuffart
	}
479 6b5a528c Florent Chuffart
	# GO!
480 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref1)) {
481 6b5a528c Florent Chuffart
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
482 6b5a528c Florent Chuffart
	}
483 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref2)) {
484 6b5a528c Florent Chuffart
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
485 6b5a528c Florent Chuffart
  }
486 1d833b97 Florent Chuffart
  # foo <<- wp_nucs_strain_ref1
487 1d833b97 Florent Chuffart
  # print(apply(t(wp_nucs_strain_ref1), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
488 1d833b97 Florent Chuffart
  # print(apply(t(wp_nucs_strain_ref2), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
489 6b5a528c Florent Chuffart
	# dealing with matching_nas
490 6b5a528c Florent Chuffart
	lws = c()
491 6b5a528c Florent Chuffart
	ups = c()
492 6b5a528c Florent Chuffart
	for (na in wp_nucs_strain_ref2) {
493 6b5a528c Florent Chuffart
		lws = c(lws, na$lower_bound)
494 6b5a528c Florent Chuffart
		ups = c(ups, na$upper_bound)
495 6b5a528c Florent Chuffart
	}
496 6b5a528c Florent Chuffart
497 6b5a528c Florent Chuffart
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))			
498 6b5a528c Florent Chuffart
	roi_strain_ref1 = NULL
499 6b5a528c Florent Chuffart
	roi_strain_ref2 = NULL
500 6b5a528c Florent Chuffart
	if (length(wp_nucs_strain_ref1) > 0) {
501 6b5a528c Florent Chuffart
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
502 6b5a528c Florent Chuffart
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
503 6b5a528c Florent Chuffart
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
504 6b5a528c Florent Chuffart
			# Filtering on Well Positionned
505 883439d6 Florent Chuffart
			if (nuc_strain_ref1$wp) {
506 6b5a528c Florent Chuffart
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
507 a0b91fee Florent Chuffart
				roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi=orig_big_roi, config=config) 
508 1d833b97 Florent Chuffart
        if (!is.null(roi_strain_ref2)){
509 6b5a528c Florent Chuffart
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
510 6b5a528c Florent Chuffart
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
511 6b5a528c Florent Chuffart
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
512 6b5a528c Florent Chuffart
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
513 6b5a528c Florent Chuffart
					for (index_nuc_strain_ref2 in matching_nas) {
514 6b5a528c Florent Chuffart
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
515 6b5a528c Florent Chuffart
						# Filtering on Well Positionned
516 883439d6 Florent Chuffart
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)             
517 a0b91fee Florent Chuffart
						if (!is.null(translate_roi(nuc_strain_ref2_to_roi, strain_ref1, big_roi=orig_big_roi, config=config)) &  
518 1e34bb1f Florent Chuffart
                nuc_strain_ref2$wp) {
519 6b5a528c Florent Chuffart
							# Filtering on correlation Score and collecting reads					
520 6b5a528c Florent Chuffart
							SKIP = FALSE
521 6b5a528c Florent Chuffart
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
522 6b5a528c Florent Chuffart
							reads_strain_ref1 = c()
523 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref1$nucs){
524 6b5a528c Florent Chuffart
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)							
525 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
526 6b5a528c Florent Chuffart
									SKIP = TRUE
527 6b5a528c Florent Chuffart
								}
528 6b5a528c Florent Chuffart
							}
529 6b5a528c Florent Chuffart
							reads_strain_ref2 = c()
530 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref2$nucs){
531 6b5a528c Florent Chuffart
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)							
532 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
533 6b5a528c Florent Chuffart
									SKIP = TRUE
534 6b5a528c Florent Chuffart
								}
535 6b5a528c Florent Chuffart
							}
536 6b5a528c Florent Chuffart
							# Filtering on correlation Score						
537 6b5a528c Florent Chuffart
							if (!SKIP) {
538 6b5a528c Florent Chuffart
								# tranlation of reads into strain 2 coords
539 6b5a528c Florent Chuffart
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
540 6b5a528c Florent Chuffart
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
541 6b5a528c Florent Chuffart
								lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2)
542 6b5a528c Florent Chuffart
								lod_scores = c(lod_scores, lod_score)
543 6b5a528c Florent Chuffart
								# Filtering on LOD Score						
544 6b5a528c Florent Chuffart
								if (lod_score > lod_thres) {
545 6b5a528c Florent Chuffart
									tmp_nuc = list()
546 6b5a528c Florent Chuffart
									# strain_ref1
547 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
548 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
549 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
550 6b5a528c Florent Chuffart
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
551 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
552 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
553 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
554 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr1_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[1]]$corr,5)
555 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr2_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[2]]$corr,5)
556 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr3_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[3]]$corr,5)
557 6b5a528c Florent Chuffart
									# strain_ref2
558 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
559 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
560 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
561 6b5a528c Florent Chuffart
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
562 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
563 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
564 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
565 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr1_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[1]]$corr,5)
566 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr2_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[2]]$corr,5)
567 6b5a528c Florent Chuffart
									# tmp_nuc[[paste("corr3_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[3]]$corr,5)
568 6b5a528c Florent Chuffart
									# common
569 6b5a528c Florent Chuffart
									tmp_nuc[["lod_score"]] = signif(lod_score,5)
570 6b5a528c Florent Chuffart
									# print(tmp_nuc)
571 6b5a528c Florent Chuffart
									common_nuc = dfadd(common_nuc, tmp_nuc)
572 6b5a528c Florent Chuffart
								}
573 6b5a528c Florent Chuffart
							}
574 6b5a528c Florent Chuffart
						}
575 6b5a528c Florent Chuffart
					}				
576 6b5a528c Florent Chuffart
				} else {
577 6b5a528c Florent Chuffart
		      print("WARNING! No roi for strain ref 2.")
578 6b5a528c Florent Chuffart
			  }
579 6b5a528c Florent Chuffart
		  }
580 6b5a528c Florent Chuffart
		}
581 6b5a528c Florent Chuffart
		
582 6b5a528c Florent Chuffart
		if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
583 6b5a528c Florent Chuffart
			index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
584 6b5a528c Florent Chuffart
			to_remove_list = c()			
585 6b5a528c Florent Chuffart
			for (i in 1:length(index_redundant)) {	
586 6b5a528c Florent Chuffart
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
587 6b5a528c Florent Chuffart
				  to_remove = index_redundant[i]
588 6b5a528c Florent Chuffart
				}	 else {
589 6b5a528c Florent Chuffart
					to_remove = index_redundant[i] + 1
590 6b5a528c Florent Chuffart
			  } 		
591 6b5a528c Florent Chuffart
				to_remove_list = c(to_remove_list, to_remove)
592 6b5a528c Florent Chuffart
			}
593 6b5a528c Florent Chuffart
			common_nuc = common_nuc[-to_remove_list,]
594 6b5a528c Florent Chuffart
		}
595 6b5a528c Florent Chuffart
		
596 6b5a528c Florent Chuffart
		if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
597 8c87ee64 Florent Chuffart
			index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
598 6b5a528c Florent Chuffart
			to_remove_list = c()			
599 6b5a528c Florent Chuffart
			for (i in 1:length(index_redundant)) {	
600 6b5a528c Florent Chuffart
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
601 6b5a528c Florent Chuffart
				  to_remove = index_redundant[i]
602 6b5a528c Florent Chuffart
				}	 else {
603 6b5a528c Florent Chuffart
					to_remove = index_redundant[i] + 1
604 6b5a528c Florent Chuffart
			  } 		
605 6b5a528c Florent Chuffart
				to_remove_list = c(to_remove_list, to_remove)
606 6b5a528c Florent Chuffart
			}
607 6b5a528c Florent Chuffart
			common_nuc = common_nuc[-to_remove_list,]
608 6b5a528c Florent Chuffart
		}
609 6b5a528c Florent Chuffart
				
610 6b5a528c Florent Chuffart
		return(list(common_nuc, lod_scores))
611 6b5a528c Florent Chuffart
	} else {
612 6b5a528c Florent Chuffart
		print("WARNING, no nucs for strain_ref1.")
613 6b5a528c Florent Chuffart
		return(NULL)
614 6b5a528c Florent Chuffart
	}
615 6b5a528c Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed lod scores.
616 6b5a528c Florent Chuffart
}, ex=function(){	
617 1d833b97 Florent Chuffart
618 1d833b97 Florent Chuffart
    # Define new translate_roi function...
619 1d833b97 Florent Chuffart
    translate_roi = function(roi, strain2, big_roi=NULL, config=NULL) {
620 1d833b97 Florent Chuffart
      return(roi)
621 1d833b97 Florent Chuffart
    }
622 1d833b97 Florent Chuffart
    # Binding it by uncomment follwing lines.
623 1d833b97 Florent Chuffart
    unlockBinding("translate_roi", as.environment("package:nucleominer"))
624 1d833b97 Florent Chuffart
    unlockBinding("translate_roi", getNamespace("nucleominer"))
625 1d833b97 Florent Chuffart
    assign("translate_roi", translate_roi, "package:nucleominer")
626 1d833b97 Florent Chuffart
    assign("translate_roi", translate_roi, getNamespace("nucleominer"))
627 1d833b97 Florent Chuffart
    lockBinding("translate_roi", getNamespace("nucleominer"))
628 1d833b97 Florent Chuffart
    lockBinding("translate_roi", as.environment("package:nucleominer"))  
629 1d833b97 Florent Chuffart
630 6b5a528c Florent Chuffart
	# Dealing with a region of interest
631 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
632 6b5a528c Florent Chuffart
	roi2 = translate_roi(roi, roi$strain_ref1)
633 6b5a528c Florent Chuffart
	replicates = list()
634 6b5a528c Florent Chuffart
	for (j in 1:2) {
635 6b5a528c Florent Chuffart
		samples = list()
636 6b5a528c Florent Chuffart
		for (i in 1:3) {
637 6b5a528c Florent Chuffart
			# Create TF output
638 6b5a528c Florent Chuffart
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
639 6b5a528c Florent Chuffart
			outputs = dfadd(NULL,tf_nuc)
640 6b5a528c Florent Chuffart
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
641 6b5a528c Florent Chuffart
			# Generate corresponding reads
642 6b5a528c Florent Chuffart
			nb_reads = round(runif(1,170,230))
643 6b5a528c Florent Chuffart
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
644 6b5a528c Florent Chuffart
			u_reads = sort(unique(reads))
645 6b5a528c Florent Chuffart
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
646 6b5a528c Florent Chuffart
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
647 6b5a528c Florent Chuffart
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
648 6b5a528c Florent Chuffart
			u_reads = u_reads + shifts
649 6b5a528c Florent Chuffart
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)), 
650 6b5a528c Florent Chuffart
			                         "V2" = u_reads, 
651 6b5a528c Florent Chuffart
															 "V3" = strands, 
652 6b5a528c Florent Chuffart
															 "V4" = counts), stringsAsFactors=FALSE)
653 6b5a528c Florent Chuffart
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
654 6b5a528c Florent Chuffart
		}
655 6b5a528c Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
656 6b5a528c Florent Chuffart
	}
657 6b5a528c Florent Chuffart
	print(align_inter_strain_nucs(replicates))
658 6b5a528c Florent Chuffart
})
659 6b5a528c Florent Chuffart
660 6b5a528c Florent Chuffart
661 6b5a528c Florent Chuffart
662 6b5a528c Florent Chuffart
663 6b5a528c Florent Chuffart
664 6b5a528c Florent Chuffart
665 6b5a528c Florent Chuffart
666 6b5a528c Florent Chuffart
667 6b5a528c Florent Chuffart
668 6b5a528c Florent Chuffart
669 6b5a528c Florent Chuffart
670 6b5a528c Florent Chuffart
671 6b5a528c Florent Chuffart
672 6b5a528c Florent Chuffart
673 6b5a528c Florent Chuffart
674 6b5a528c Florent Chuffart
675 6b5a528c Florent Chuffart
676 6b5a528c Florent Chuffart
677 6b5a528c Florent Chuffart
678 6b5a528c Florent Chuffart
679 6b5a528c Florent Chuffart
680 6b5a528c Florent Chuffart
681 6b5a528c Florent Chuffart
682 6b5a528c Florent Chuffart
683 6b5a528c Florent Chuffart
684 6b5a528c Florent Chuffart
685 6b5a528c Florent Chuffart
fetch_mnase_replicates = function(# Prefetch data
686 6b5a528c Florent Chuffart
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
687 6b5a528c Florent Chuffart
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
688 6b5a528c Florent Chuffart
roi, ##<< Region of interest.
689 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
690 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
691 6b5a528c Florent Chuffart
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
692 6b5a528c Florent Chuffart
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
693 6b5a528c Florent Chuffart
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
694 6b5a528c Florent Chuffart
) {
695 6b5a528c Florent Chuffart
	samples=list()
696 6b5a528c Florent Chuffart
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
697 6b5a528c Florent Chuffart
	for (i in samples_ids) {
698 6b5a528c Florent Chuffart
		sample = as.list(all_samples[all_samples$id==i,])
699 a0b91fee Florent Chuffart
    sample$orig_roi = roi
700 8e9facd8 Florent Chuffart
    sample$roi = translate_roi(roi, sample$strain, config = config)
701 6b5a528c Florent Chuffart
		if (get_genome) {
702 6b5a528c Florent Chuffart
			# Get Genome
703 1d833b97 Florent Chuffart
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]		
704 6b5a528c Florent Chuffart
		}
705 6b5a528c Florent Chuffart
		# Get inputs
706 a52f85d7 Florent Chuffart
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE) 
707 6b5a528c Florent Chuffart
		sample$total_reads = sum(sample$inputs[,4]) 	
708 6b5a528c Florent Chuffart
		if (!only_fetch) {
709 6b5a528c Florent Chuffart
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
710 6b5a528c Florent Chuffart
	  }
711 6b5a528c Florent Chuffart
	  # Get TF outputs for Mnase_Seq samples
712 6b5a528c Florent Chuffart
		if (sample$marker == "Mnase_Seq" & get_ouputs) {	
713 a52f85d7 Florent Chuffart
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
714 6b5a528c Florent Chuffart
			if (!only_fetch) {
715 6b5a528c Florent Chuffart
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
716 6b5a528c Florent Chuffart
  		}
717 6b5a528c Florent Chuffart
		}
718 6b5a528c Florent Chuffart
		samples[[length(samples) + 1]] = sample		
719 6b5a528c Florent Chuffart
	}
720 6b5a528c Florent Chuffart
  return(samples)
721 6b5a528c Florent Chuffart
}
722 6b5a528c Florent Chuffart
723 6b5a528c Florent Chuffart
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
724 6b5a528c Florent Chuffart
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
725 6b5a528c Florent Chuffart
region1, ##<< Original regions. 
726 6b5a528c Florent Chuffart
region2 ##<< Regions to substract.
727 6b5a528c Florent Chuffart
) {
728 6b5a528c Florent Chuffart
  rec_substract_region = function(region1, region2) {
729 6b5a528c Florent Chuffart
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
730 6b5a528c Florent Chuffart
    cur_fuzzy = region1[i,]
731 6b5a528c Florent Chuffart
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
732 6b5a528c Florent Chuffart
    if (length(inter_wp[,1]) > 0) {
733 6b5a528c Florent Chuffart
      ret = c()
734 6b5a528c Florent Chuffart
      for (j in 1:length(inter_wp[,1])) {
735 6b5a528c Florent Chuffart
        cur_wp = inter_wp[j,]
736 6b5a528c Florent Chuffart
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
737 6b5a528c Florent Chuffart
          # remove cur_fuzzy
738 6b5a528c Florent Chuffart
          ret = c()
739 6b5a528c Florent Chuffart
          break
740 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
741 6b5a528c Florent Chuffart
          # crop fuzzy 
742 6b5a528c Florent Chuffart
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1 
743 6b5a528c Florent Chuffart
          ret = cur_fuzzy
744 6b5a528c Florent Chuffart
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
745 6b5a528c Florent Chuffart
          # crop fuzzy 
746 6b5a528c Florent Chuffart
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1 
747 6b5a528c Florent Chuffart
          ret = cur_fuzzy
748 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
749 6b5a528c Florent Chuffart
          # split fuzzy
750 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_1 = cur_fuzzy
751 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1 
752 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_2 = cur_fuzzy
753 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1 
754 6b5a528c Florent Chuffart
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
755 6b5a528c Florent Chuffart
          # print(ret)
756 6b5a528c Florent Chuffart
          # ret = cur_fuzzy
757 6b5a528c Florent Chuffart
          break
758 6b5a528c Florent Chuffart
        } else {
759 6b5a528c Florent Chuffart
          stop("WARNING NO ADAPTED CASE!")
760 6b5a528c Florent Chuffart
        }          
761 6b5a528c Florent Chuffart
      }
762 6b5a528c Florent Chuffart
      return(ret)
763 6b5a528c Florent Chuffart
    } else {
764 6b5a528c Florent Chuffart
      return(cur_fuzzy)
765 6b5a528c Florent Chuffart
    }
766 6b5a528c Florent Chuffart
  })
767 6b5a528c Florent Chuffart
  }
768 6b5a528c Florent Chuffart
  non_inter_fuzzy = rec_substract_region(region1, region2)
769 6b5a528c Florent Chuffart
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
770 b20637ed Florent Chuffart
  tmp_ulist = unlist(non_inter_fuzzy)
771 b20637ed Florent Chuffart
  tmp_names = names(tmp_ulist)[1:4]
772 b20637ed Florent Chuffart
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE) 
773 b20637ed Florent Chuffart
  names(non_inter_fuzzy) = tmp_names
774 b20637ed Florent Chuffart
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)     
775 b20637ed Florent Chuffart
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)     
776 b20637ed Florent Chuffart
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
777 b20637ed Florent Chuffart
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
778 b20637ed Florent Chuffart
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]      
779 b20637ed Florent Chuffart
  return(non_inter_fuzzy)
780 6b5a528c Florent Chuffart
}
781 6b5a528c Florent Chuffart
782 6b5a528c Florent Chuffart
union_regions = function(# Aggregate regions that intersect themnselves.
783 6b5a528c Florent Chuffart
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.  
784 6b5a528c Florent Chuffart
regions ##<< The Regions to be aggregated
785 6b5a528c Florent Chuffart
) {
786 6b5a528c Florent Chuffart
  old_length = length(regions[,1])
787 6b5a528c Florent Chuffart
  new_length = 0
788 6b5a528c Florent Chuffart
    
789 6b5a528c Florent Chuffart
  while (old_length != new_length) {
790 6b5a528c Florent Chuffart
    regions = regions[order(regions$lower_bound), ]
791 6b5a528c Florent Chuffart
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 0, TRUE)      
792 6b5a528c Florent Chuffart
    vec_end_1 = which(regions$stop)
793 6b5a528c Florent Chuffart
    if (length(vec_end_1) == 0) {
794 6b5a528c Florent Chuffart
      vec_end_1 = c(length(regions$stop))      
795 6b5a528c Florent Chuffart
    } 
796 6b5a528c Florent Chuffart
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
797 6b5a528c Florent Chuffart
      vec_end_1 = c(vec_end_1, length(regions$stop))
798 6b5a528c Florent Chuffart
    }
799 6b5a528c Florent Chuffart
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
800 6b5a528c Florent Chuffart
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
801 6b5a528c Florent Chuffart
      chr = regions$chr[vec_beg_1[i]]
802 6b5a528c Florent Chuffart
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
803 6b5a528c Florent Chuffart
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
804 6b5a528c Florent Chuffart
      roi_index = regions$roi_index[vec_beg_1[i]] 
805 6b5a528c Florent Chuffart
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, roi_index=roi_index))
806 6b5a528c Florent Chuffart
      })
807 59ad95ca Florent Chuffart
    union = collapse_regions(union)
808 6b5a528c Florent Chuffart
    old_length = length(regions[,1])
809 6b5a528c Florent Chuffart
    new_length = length(union[,1])
810 6b5a528c Florent Chuffart
    regions = union
811 6b5a528c Florent Chuffart
  }
812 6b5a528c Florent Chuffart
  return(union)
813 6b5a528c Florent Chuffart
}
814 6b5a528c Florent Chuffart
815 6b5a528c Florent Chuffart
remove_aligned_wp = function(# Remove wp nucs from common nucs list.
816 6b5a528c Florent Chuffart
### It is based on common wp nucs index on nucs and region.  
817 6b5a528c Florent Chuffart
strain_maps, ##<< Nuc maps.
818 6b5a528c Florent Chuffart
roi_index, ##<< The region of interest index.
819 6b5a528c Florent Chuffart
tmp_common_nucs, ##<< the list of wp nucs.
820 6b5a528c Florent Chuffart
strain##<< The strain to consider.
821 6b5a528c Florent Chuffart
){
822 6b5a528c Florent Chuffart
  fuzzy_nucs = strain_maps[[strain]]
823 6b5a528c Florent Chuffart
  fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$roi_index == roi_index,]
824 6b5a528c Florent Chuffart
  fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
825 6b5a528c Florent Chuffart
  if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
826 6b5a528c Florent Chuffart
  if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
827 6b5a528c Florent Chuffart
  anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
828 6b5a528c Florent Chuffart
  fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
829 6b5a528c Florent Chuffart
  return(fuzzy_nucs)
830 6b5a528c Florent Chuffart
}
831 6b5a528c Florent Chuffart
832 6b5a528c Florent Chuffart
translate_regions = function(# Translate a list of regions from a strain ref to another.
833 6b5a528c Florent Chuffart
### This function is an eloborated call to translate_roi.
834 6b5a528c Florent Chuffart
regions, ##<< Regions to be translated. 
835 6b5a528c Florent Chuffart
combi, ##<< Combination of strains.
836 6b5a528c Florent Chuffart
roi_index, ##<< The region of interest index.
837 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
838 6b5a528c Florent Chuffart
roi ##<< The region of interest.
839 6b5a528c Florent Chuffart
) {
840 6b5a528c Florent Chuffart
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
841 6b5a528c Florent Chuffart
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
842 5a26597a Florent Chuffart
    big_roi =  roi
843 5a26597a Florent Chuffart
    trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], config = config, big_roi = big_roi) 
844 6b5a528c Florent Chuffart
    data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), roi_index=roi_index))
845 6b5a528c Florent Chuffart
    })
846 59ad95ca Florent Chuffart
  return(collapse_regions(tr_regions))
847 59ad95ca Florent Chuffart
}
848 59ad95ca Florent Chuffart
849 59ad95ca Florent Chuffart
collapse_regions = function(# reformat an "apply  manipulated" list of regions
850 59ad95ca Florent Chuffart
### Utils to reformat an "apply  manipulated" list of regions
851 59ad95ca Florent Chuffart
regions ##< a list of regions
852 59ad95ca Florent Chuffart
) {
853 b20637ed Florent Chuffart
  regions = do.call(rbind, regions)      
854 59ad95ca Florent Chuffart
  regions$chr = as.character(regions$chr)     
855 59ad95ca Florent Chuffart
  regions$chr = as.numeric(regions$chr)     
856 59ad95ca Florent Chuffart
  regions$lower_bound = as.numeric(regions$lower_bound)
857 59ad95ca Florent Chuffart
  regions$upper_bound = as.numeric(regions$upper_bound)
858 59ad95ca Florent Chuffart
  regions = regions[order(regions$lower_bound),]      
859 59ad95ca Florent Chuffart
  return(regions)  
860 6b5a528c Florent Chuffart
}
861 6b5a528c Florent Chuffart
862 6b5a528c Florent Chuffart
extract_wp = function(# Extract wp nucs from nuc map. 
863 6b5a528c Florent Chuffart
### Function based on common wp nuc index and roi_index.
864 6b5a528c Florent Chuffart
strain_maps, ##<< Nuc maps.
865 6b5a528c Florent Chuffart
roi_index, ##<< The region of interest index.
866 6b5a528c Florent Chuffart
strain, ##<< The strain to consider.
867 6b5a528c Florent Chuffart
tmp_common_nucs ##<< the list of wp nucs.
868 6b5a528c Florent Chuffart
) {
869 6b5a528c Florent Chuffart
  wp_nucs = apply(t(tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]), 2, function(i) {
870 6b5a528c Florent Chuffart
    tmp_wp_nucs = strain_maps[[strain]]
871 6b5a528c Florent Chuffart
    tmp_wp_nucs = tmp_wp_nucs[tmp_wp_nucs$roi_index == roi_index & tmp_wp_nucs$index_nuc == i,]
872 6b5a528c Florent Chuffart
    return(tmp_wp_nucs)
873 6b5a528c Florent Chuffart
    })
874 59ad95ca Florent Chuffart
  return(collapse_regions(wp_nucs))
875 6b5a528c Florent Chuffart
}
876 6b5a528c Florent Chuffart
877 6b5a528c Florent Chuffart
878 6b5a528c Florent Chuffart
879 6b5a528c Florent Chuffart
880 6b5a528c Florent Chuffart
881 6b5a528c Florent Chuffart
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
882 6b5a528c Florent Chuffart
### The fucntion is no more necessary since we remove "big_roi" bug in translate_roi function.
883 6b5a528c Florent Chuffart
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
884 6b5a528c Florent Chuffart
roi, ##<< The region of interest.
885 1d833b97 Florent Chuffart
strain, ##<< The strain to consider.
886 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
887 6b5a528c Florent Chuffart
) {
888 8e9facd8 Florent Chuffart
  tr_roi = translate_roi(roi, strain, config = config)
889 6b5a528c Florent Chuffart
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
890 6b5a528c Florent Chuffart
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
891 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
892 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
893 6b5a528c Florent Chuffart
  }
894 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
895 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
896 6b5a528c Florent Chuffart
  }
897 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
898 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
899 6b5a528c Florent Chuffart
  }
900 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
901 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
902 6b5a528c Florent Chuffart
  }
903 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
904 6b5a528c Florent Chuffart
  return(tmp_fuzzy_nucs)
905 6b5a528c Florent Chuffart
}
906 6b5a528c Florent Chuffart
907 6b5a528c Florent Chuffart
908 6b5a528c Florent Chuffart
get_fuzzy = function(# Compute the fuzzy nucs.
909 6b5a528c Florent Chuffart
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting fuzzy regions. It will be take into account in the count read part og the pipeline.
910 6b5a528c Florent Chuffart
combi, ##<< The strain combination to consider.
911 6b5a528c Florent Chuffart
roi, ##<< The region of interest.
912 6b5a528c Florent Chuffart
roi_index, ##<< The region of interest index.
913 6b5a528c Florent Chuffart
strain_maps, ##<< Nuc maps.
914 1d833b97 Florent Chuffart
common_nuc_results, ##<< Common wp nuc maps
915 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
916 6b5a528c Florent Chuffart
) {
917 6b5a528c Florent Chuffart
  print(roi_index)
918 6b5a528c Florent Chuffart
  PLOT = FALSE
919 6b5a528c Florent Chuffart
  tmp_common_nucs = common_nuc_results[[paste(combi[1], combi[2], sep="_")]]
920 6b5a528c Florent Chuffart
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$roi_index == roi_index, ]
921 6b5a528c Florent Chuffart
922 6b5a528c Florent Chuffart
  print(paste("Dealing with fuzzy from", combi[1]))
923 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs_1 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[1])
924 1d833b97 Florent Chuffart
  tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1], config)
925 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs_1[,1]) == 0) {return(NULL)}
926 6b5a528c Florent Chuffart
  agg_fuzzy_1 = union_regions(tmp_fuzzy_nucs_1)    
927 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(agg_fuzzy_1[,1])) {
928 6b5a528c Florent Chuffart
    lines(c(agg_fuzzy_1[i,]$lower_bound, agg_fuzzy_1[i,]$upper_bound), c(+3.1,+3.1), col=2)
929 6b5a528c Florent Chuffart
  }
930 6b5a528c Florent Chuffart
931 6b5a528c Florent Chuffart
  print(paste("Dealing with fuzzy from ", combi[2]))
932 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs_2 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[2])
933 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs_2[,1]) == 0) {return(NULL)}
934 6b5a528c Florent Chuffart
  agg_fuzzy_2 = union_regions(tmp_fuzzy_nucs_2)      
935 1d833b97 Florent Chuffart
  agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2], config)
936 ddd6e221 Florent Chuffart
  tr_agg_fuzzy_2 = translate_regions(agg_fuzzy_2, combi, roi_index, roi=roi, config=config)
937 1d833b97 Florent Chuffart
  tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2], config)
938 6b5a528c Florent Chuffart
  # tr_agg_fuzzy_2 = union_regions(tr_agg_fuzzy_2)
939 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(tr_agg_fuzzy_2[,1])) {
940 6b5a528c Florent Chuffart
    lines(c(tr_agg_fuzzy_2[i,]$lower_bound, tr_agg_fuzzy_2[i,]$upper_bound), c(+3.3,+3.3), col=2)
941 6b5a528c Florent Chuffart
  }
942 6b5a528c Florent Chuffart
  
943 6b5a528c Florent Chuffart
  print("Dealing with fuzzy from both...")
944 6b5a528c Florent Chuffart
  all_fuzzy = union_regions(rbind(agg_fuzzy_1, tr_agg_fuzzy_2))
945 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(all_fuzzy[,1])) {
946 6b5a528c Florent Chuffart
    lines(c(all_fuzzy[i,]$lower_bound, all_fuzzy[i,]$upper_bound), c(+3.2, +3.2), col=1)
947 6b5a528c Florent Chuffart
  }
948 6b5a528c Florent Chuffart
  
949 6b5a528c Florent Chuffart
  print(paste("Dealing with wp from", combi[1]))
950 6b5a528c Florent Chuffart
  wp_nucs_1 = extract_wp(strain_maps, roi_index, combi[1], tmp_common_nucs)
951 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(wp_nucs_1[,1])) {
952 6b5a528c Florent Chuffart
    lines(c(wp_nucs_1[i,]$lower_bound, wp_nucs_1[i,]$upper_bound), c(+3.5,+3.5), col=3)
953 6b5a528c Florent Chuffart
  }
954 6b5a528c Florent Chuffart
955 6b5a528c Florent Chuffart
  print(paste("Dealing with wp from", combi[2]))
956 6b5a528c Florent Chuffart
  wp_nucs_2 = extract_wp(strain_maps, roi_index, combi[2], tmp_common_nucs)
957 ddd6e221 Florent Chuffart
  tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, roi_index, roi=roi, config=config)
958 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(tr_wp_nucs_2[,1])) {
959 6b5a528c Florent Chuffart
    lines(c(tr_wp_nucs_2[i,]$lower_bound, tr_wp_nucs_2[i,]$upper_bound), c(+3.7,+3.7), col=3)
960 6b5a528c Florent Chuffart
  }
961 6b5a528c Florent Chuffart
962 6b5a528c Florent Chuffart
  print("Dealing with wp from both...")
963 6b5a528c Florent Chuffart
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
964 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(all_wp[,1])) {
965 6b5a528c Florent Chuffart
    lines(c(all_wp[i,]$lower_bound, all_wp[i,]$upper_bound), c(+3.6, +3.6), col=1)
966 6b5a528c Florent Chuffart
  }  
967 6b5a528c Florent Chuffart
968 6b5a528c Florent Chuffart
  print("Dealing with fuzzy and wp...")
969 6b5a528c Florent Chuffart
  non_inter_fuzzy = substract_region(all_fuzzy, all_wp)
970 6b5a528c Florent Chuffart
  if (is.null(non_inter_fuzzy)) { return(NULL) }
971 6b5a528c Florent Chuffart
  
972 6b5a528c Florent Chuffart
  non_inter_fuzzy$len = non_inter_fuzzy$upper_bound - non_inter_fuzzy$lower_bound
973 6b5a528c Florent Chuffart
  # non_inter_fuzzy = non_inter_fuzzy[non_inter_fuzzy$len >= min_fuzz_width,]
974 6b5a528c Florent Chuffart
  if (PLOT) for (i in 1:length(non_inter_fuzzy[,1])) {
975 6b5a528c Florent Chuffart
    lines(c(non_inter_fuzzy[i,]$lower_bound, non_inter_fuzzy[i,]$upper_bound), c(+3.9, +3.9), col=1)
976 6b5a528c Florent Chuffart
  }
977 6b5a528c Florent Chuffart
  
978 6b5a528c Florent Chuffart
  non_inter_fuzzy$index_nuc = 1:length(non_inter_fuzzy[,1])
979 6b5a528c Florent Chuffart
  return (non_inter_fuzzy)
980 6b5a528c Florent Chuffart
}
981 6b5a528c Florent Chuffart
982 6b5a528c Florent Chuffart
983 6b5a528c Florent Chuffart
984 6b5a528c Florent Chuffart
get_all_reads = function(# Retrieve Reads 
985 6b5a528c Florent Chuffart
### Retrieve reads for a given marker, combi, form.
986 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
987 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
988 6b5a528c Florent Chuffart
form="wp" ##<< The nuc form to considere.
989 6b5a528c Florent Chuffart
) {
990 6b5a528c Florent Chuffart
	all_reads = NULL
991 6b5a528c Florent Chuffart
  for (manip in c("Mnase_Seq", marker)) {
992 6b5a528c Florent Chuffart
    if (form == "fuzzy") {
993 6b5a528c Florent Chuffart
		  out_filename = paste("results/2013-08/",combi[1],"_",combi[2],"_",manip,"_fuzzy_and_nbreads.tab",sep="")
994 6b5a528c Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)				
995 6b5a528c Florent Chuffart
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
996 6b5a528c Florent Chuffart
      tmp_res$form = form
997 6b5a528c Florent Chuffart
    } else if (form == "wp") {
998 6b5a528c Florent Chuffart
		 	out_filename = paste("results/2013-08/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
999 6b5a528c Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)				
1000 6b5a528c Florent Chuffart
      tmp_res$form = form
1001 6b5a528c Florent Chuffart
    } else if (form == "wpfuzzy") {
1002 6b5a528c Florent Chuffart
		 	out_filename = paste("results/2013-08/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
1003 6b5a528c Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)				
1004 6b5a528c Florent Chuffart
      tmp_res$form = "wp"
1005 6b5a528c Florent Chuffart
		  out_filename = paste("results/2013-08/",combi[1],"_",combi[2],"_",manip,"_fuzzy_and_nbreads.tab",sep="")
1006 6b5a528c Florent Chuffart
  		tmp_res2 = read.table(file=out_filename, header=TRUE)				
1007 6b5a528c Florent Chuffart
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
1008 6b5a528c Florent Chuffart
      tmp_res2$form = "fuzzy"
1009 6b5a528c Florent Chuffart
      tmp_res = rbind(tmp_res, tmp_res2)
1010 6b5a528c Florent Chuffart
    }
1011 6b5a528c Florent Chuffart
		if (is.null(all_reads)) {
1012 6b5a528c Florent Chuffart
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
1013 6b5a528c Florent Chuffart
		}     
1014 6b5a528c Florent Chuffart
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
1015 6b5a528c Florent Chuffart
		all_reads = cbind(all_reads, tmp_res)
1016 6b5a528c Florent Chuffart
  }
1017 6b5a528c Florent Chuffart
  return(all_reads)	
1018 6b5a528c Florent Chuffart
}
1019 6b5a528c Florent Chuffart
1020 6b5a528c Florent Chuffart
get_design = function(# Build the design for deseq
1021 6b5a528c Florent Chuffart
### This function build the design according sample properties.
1022 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
1023 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
1024 6b5a528c Florent Chuffart
all_samples ##<< Global list of samples.
1025 6b5a528c Florent Chuffart
) {
1026 6b5a528c Florent Chuffart
  off1 = 0
1027 6b5a528c Florent Chuffart
  off2 = 0
1028 6b5a528c Florent Chuffart
	manips = c("Mnase_Seq", marker)
1029 6b5a528c Florent Chuffart
	design_rownames = c()
1030 6b5a528c Florent Chuffart
	design_manip = c()
1031 6b5a528c Florent Chuffart
	design_strain = c()
1032 6b5a528c Florent Chuffart
  off2index = function(off) {	
1033 6b5a528c Florent Chuffart
  	switch(toString(off), 
1034 6b5a528c Florent Chuffart
  		"1"=c(0,1,1),
1035 6b5a528c Florent Chuffart
  	  "2"=c(1,0,1),
1036 6b5a528c Florent Chuffart
    	"3"=c(1,1,0), 
1037 6b5a528c Florent Chuffart
  		c(1,1,1)
1038 6b5a528c Florent Chuffart
  		)	
1039 6b5a528c Florent Chuffart
  }
1040 6b5a528c Florent Chuffart
	for (manip in manips) {
1041 6b5a528c Florent Chuffart
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
1042 6b5a528c Florent Chuffart
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
1043 6b5a528c Florent Chuffart
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
1044 6b5a528c Florent Chuffart
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
1045 6b5a528c Florent Chuffart
		} else {
1046 6b5a528c Florent Chuffart
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
1047 6b5a528c Florent Chuffart
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
1048 6b5a528c Florent Chuffart
			}
1049 6b5a528c Florent Chuffart
		}
1050 6b5a528c Florent Chuffart
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))				
1051 6b5a528c Florent Chuffart
		for (strain in combi) {			
1052 6b5a528c Florent Chuffart
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
1053 6b5a528c Florent Chuffart
			design_strain = c(design_strain, rep(strain, length(cols)))
1054 6b5a528c Florent Chuffart
			design_rownames = c(design_rownames, cols)
1055 6b5a528c Florent Chuffart
		}
1056 6b5a528c Florent Chuffart
	}
1057 6b5a528c Florent Chuffart
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
1058 6b5a528c Florent Chuffart
	return(snep_design)	
1059 6b5a528c Florent Chuffart
}
1060 6b5a528c Florent Chuffart
1061 6b5a528c Florent Chuffart
plot_dist_samples = function(# Plot the distribution of reads.
1062 6b5a528c Florent Chuffart
### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
1063 6b5a528c Florent Chuffart
strain, ##<< The strain to considere.
1064 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
1065 6b5a528c Florent Chuffart
res, ##<< Data
1066 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
1067 6b5a528c Florent Chuffart
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
1068 6b5a528c Florent Chuffart
) {			
1069 6b5a528c Florent Chuffart
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
1070 6b5a528c Florent Chuffart
	snepCountTable = res[,cols]
1071 6b5a528c Florent Chuffart
	snepDesign = data.frame(
1072 6b5a528c Florent Chuffart
		row.names = cols,
1073 6b5a528c Florent Chuffart
		manip = rep(marker, length(cols)),
1074 6b5a528c Florent Chuffart
		strain = rep(strain, length(cols))
1075 6b5a528c Florent Chuffart
		)
1076 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
1077 6b5a528c Florent Chuffart
	sizeFactors = estimateSizeFactors(cdsFull)
1078 6b5a528c Florent Chuffart
	# print(sizeFactors[[1]])
1079 6b5a528c Florent Chuffart
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
1080 6b5a528c Florent Chuffart
	if (NEWPLOT) {
1081 6b5a528c Florent Chuffart
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))				
1082 6b5a528c Florent Chuffart
		NEWPLOT = FALSE
1083 6b5a528c Florent Chuffart
	}
1084 6b5a528c Florent Chuffart
	for (it in 1:length(sample_ids)) {
1085 6b5a528c Florent Chuffart
		sample_id = sample_ids[it]
1086 6b5a528c Florent Chuffart
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1087 6b5a528c Florent Chuffart
	}
1088 7164d3ac Florent Chuffart
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1089 6b5a528c Florent Chuffart
}
1090 6b5a528c Florent Chuffart
1091 6b5a528c Florent Chuffart
analyse_design = function(# Launch deseq methods.
1092 6b5a528c Florent Chuffart
### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
1093 6b5a528c Florent Chuffart
snep_design, ##<< The design to considere.
1094 6b5a528c Florent Chuffart
reads ##<< The data to considere. 
1095 6b5a528c Florent Chuffart
) {
1096 6b5a528c Florent Chuffart
	snep_count_table = reads[, rownames(snep_design)]
1097 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1098 6b5a528c Florent Chuffart
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1099 6b5a528c Florent Chuffart
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1100 6b5a528c Florent Chuffart
1101 6b5a528c Florent Chuffart
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1102 6b5a528c Florent Chuffart
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1103 6b5a528c Florent Chuffart
1104 6b5a528c Florent Chuffart
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1105 6b5a528c Florent Chuffart
	return(list(fit1, fit0, snep_design, pvalsGLM))	
1106 6b5a528c Florent Chuffart
}
1107 6b5a528c Florent Chuffart
1108 6b5a528c Florent Chuffart
1109 6b5a528c Florent Chuffart
1110 6b5a528c Florent Chuffart
1111 6b5a528c Florent Chuffart
1112 6b5a528c Florent Chuffart
1113 6b5a528c Florent Chuffart
1114 6b5a528c Florent Chuffart
1115 6b5a528c Florent Chuffart
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1116 6b5a528c Florent Chuffart
### This function uses 
1117 6b5a528c Florent Chuffart
marker, ##<< The marker involved.
1118 6b5a528c Florent Chuffart
combi, ##<< The strain combination involved.
1119 6b5a528c Florent Chuffart
form, ##<< the nuc form involved. 
1120 6b5a528c Florent Chuffart
all_samples ##<< Global list of samples.
1121 6b5a528c Florent Chuffart
) {
1122 6b5a528c Florent Chuffart
  # PRETREAT
1123 6b5a528c Florent Chuffart
  d = get_design(marker, combi, all_samples)
1124 6b5a528c Florent Chuffart
  reads = get_all_reads(marker, combi, form)
1125 6b5a528c Florent Chuffart
  # RUN ANALYSE
1126 6b5a528c Florent Chuffart
  tmp_analyse = analyse_design(d, reads)
1127 6b5a528c Florent Chuffart
  # RESULTS
1128 6b5a528c Florent Chuffart
	fit1 = tmp_analyse[[1]]
1129 6b5a528c Florent Chuffart
	fit0 = tmp_analyse[[2]]
1130 6b5a528c Florent Chuffart
  k = names(fit1)
1131 6b5a528c Florent Chuffart
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1132 6b5a528c Florent Chuffart
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1133 6b5a528c Florent Chuffart
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1134 6b5a528c Florent Chuffart
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1135 6b5a528c Florent Chuffart
	snep_design = tmp_analyse[[3]]
1136 6b5a528c Florent Chuffart
  print(snep_design)
1137 6b5a528c Florent Chuffart
	fdr = 0.0001
1138 6b5a528c Florent Chuffart
	thres = FDR(reads$pvalsGLM, fdr) 
1139 6b5a528c Florent Chuffart
	reads$snep_index = reads$pvalsGLM < thres
1140 6b5a528c Florent Chuffart
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = ""))
1141 6b5a528c Florent Chuffart
  return(reads)
1142 6b5a528c Florent Chuffart
  },  ex=function(){	
1143 6b5a528c Florent Chuffart
    marker = "H3K4me1"
1144 6b5a528c Florent Chuffart
    combi = c("BY", "YJM") 
1145 6b5a528c Florent Chuffart
    form = "wpfuzzy" # "wp" | "fuzzy" | "wpfuzzy"
1146 6b5a528c Florent Chuffart
    # foo = get_sneps(marker, combi, form)
1147 6b5a528c Florent Chuffart
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1148 6b5a528c Florent Chuffart
})
1149 6b5a528c Florent Chuffart
1150 6b5a528c Florent Chuffart
1151 6b5a528c Florent Chuffart
1152 6b5a528c Florent Chuffart
1153 6b5a528c Florent Chuffart
1154 6b5a528c Florent Chuffart
1155 6b5a528c Florent Chuffart
1156 6b5a528c Florent Chuffart
1157 6b5a528c Florent Chuffart
1158 6b5a528c Florent Chuffart
1159 6b5a528c Florent Chuffart
1160 6b5a528c Florent Chuffart
1161 6b5a528c Florent Chuffart
1162 6b5a528c Florent Chuffart
1163 6b5a528c Florent Chuffart
1164 6b5a528c Florent Chuffart
1165 6b5a528c Florent Chuffart
1166 6b5a528c Florent Chuffart
1167 6b5a528c Florent Chuffart
1168 6b5a528c Florent Chuffart
1169 6b5a528c Florent Chuffart
1170 6b5a528c Florent Chuffart
1171 6b5a528c Florent Chuffart
1172 6b5a528c Florent Chuffart
1173 1d833b97 Florent Chuffart
ROM2ARAB = function(# Roman to Arabic pair list.
1174 1d833b97 Florent Chuffart
### Util to convert Roman to Arabic
1175 1d833b97 Florent Chuffart
){list(
1176 1d833b97 Florent Chuffart
  "I" = 1,
1177 1d833b97 Florent Chuffart
  "II" = 2,
1178 1d833b97 Florent Chuffart
  "III" = 3,
1179 1d833b97 Florent Chuffart
  "IV" = 4,
1180 1d833b97 Florent Chuffart
  "V" = 5,
1181 1d833b97 Florent Chuffart
  "VI" = 6,
1182 1d833b97 Florent Chuffart
  "VII" = 7,
1183 1d833b97 Florent Chuffart
  "VIII" = 8,
1184 1d833b97 Florent Chuffart
  "IX" = 9,
1185 1d833b97 Florent Chuffart
  "X" = 10,
1186 1d833b97 Florent Chuffart
  "XI" = 11,
1187 1d833b97 Florent Chuffart
  "XII" = 12,
1188 1d833b97 Florent Chuffart
  "XIII" = 13,
1189 1d833b97 Florent Chuffart
  "XIV" = 14,
1190 1d833b97 Florent Chuffart
  "XV" = 15,
1191 1d833b97 Florent Chuffart
  "XVI" = 16,
1192 1d833b97 Florent Chuffart
  "XVII" = 17,
1193 1d833b97 Florent Chuffart
  "XVIII" = 18,
1194 1d833b97 Florent Chuffart
  "XIX" = 19,
1195 1d833b97 Florent Chuffart
  "XX" = 20
1196 1d833b97 Florent Chuffart
)}
1197 1d833b97 Florent Chuffart
1198 1d833b97 Florent Chuffart
switch_pairlist = structure(function(# Switch a pairlist
1199 1d833b97 Florent Chuffart
### Take a pairlist key:value and return the switched pairlist value:key.
1200 1d833b97 Florent Chuffart
l ##<< The pairlist to switch. 
1201 1d833b97 Florent Chuffart
) {
1202 1d833b97 Florent Chuffart
	ret = list()
1203 1d833b97 Florent Chuffart
	for (name in names(l)) {
1204 1d833b97 Florent Chuffart
		ret[[as.character(l[[name]])]] = name
1205 1d833b97 Florent Chuffart
	}
1206 1d833b97 Florent Chuffart
	ret
1207 1d833b97 Florent Chuffart
### The switched pairlist.
1208 1d833b97 Florent Chuffart
}, ex=function(){
1209 1d833b97 Florent Chuffart
	l = list(key1 = "value1", key2 = "value2")
1210 1d833b97 Florent Chuffart
	print(switch_pairlist(l))
1211 1d833b97 Florent Chuffart
})
1212 1d833b97 Florent Chuffart
1213 1d833b97 Florent Chuffart
ARAB2ROM = function(# Arabic to Roman pair list.
1214 1d833b97 Florent Chuffart
### Util to convert Arabicto Roman
1215 1d833b97 Florent Chuffart
){switch_pairlist(ROM2ARAB())}
1216 1d833b97 Florent Chuffart
1217 1d833b97 Florent Chuffart
1218 1d833b97 Florent Chuffart
1219 1d833b97 Florent Chuffart
1220 1d833b97 Florent Chuffart
translate_roi = structure(function(# Translate coords of a genome region.
1221 1d833b97 Florent Chuffart
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1222 1d833b97 Florent Chuffart
roi, ##<< Original genome region of interest.
1223 1d833b97 Florent Chuffart
strain2, ##<< The strain in wich you want the genome region of interest.
1224 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
1225 1d833b97 Florent Chuffart
big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1226 1d833b97 Florent Chuffart
) {
1227 1d833b97 Florent Chuffart
	strain1 = roi$strain_ref
1228 1d833b97 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1229 1d833b97 Florent Chuffart
	if (strain1 == strain2) {
1230 1d833b97 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1231 1d833b97 Florent Chuffart
		return(roi)
1232 1d833b97 Florent Chuffart
	}
1233 1d833b97 Florent Chuffart
	# Launch c2c file
1234 1d833b97 Florent Chuffart
	if (reverse) {
1235 1d833b97 Florent Chuffart
		c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
1236 1d833b97 Florent Chuffart
	} else {
1237 1d833b97 Florent Chuffart
		c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
1238 1d833b97 Florent Chuffart
	}
1239 1d833b97 Florent Chuffart
	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)	
1240 1d833b97 Florent Chuffart
	# filtering it
1241 1d833b97 Florent Chuffart
  c2c = c2c[c2c$V6=="-",]
1242 1d833b97 Florent Chuffart
	# Reverse
1243 1d833b97 Florent Chuffart
	if (reverse) {
1244 1d833b97 Florent Chuffart
		tmp_col = c2c$V1
1245 1d833b97 Florent Chuffart
		c2c$V1 = c2c$V7
1246 1d833b97 Florent Chuffart
		c2c$V7 = tmp_col
1247 1d833b97 Florent Chuffart
		tmp_col = c2c$V2
1248 1d833b97 Florent Chuffart
		c2c$V2 = c2c$V9
1249 1d833b97 Florent Chuffart
		c2c$V9 = tmp_col
1250 1d833b97 Florent Chuffart
		tmp_col = c2c$V3
1251 1d833b97 Florent Chuffart
		c2c$V3 = c2c$V10
1252 1d833b97 Florent Chuffart
		c2c$V10 = tmp_col
1253 1d833b97 Florent Chuffart
	}
1254 1d833b97 Florent Chuffart
	# Restrict c2c to big_roi
1255 1d833b97 Florent Chuffart
	# if (FALSE) {
1256 1d833b97 Florent Chuffart
	if (!is.null(big_roi)) {
1257 62d69d53 Florent Chuffart
		if (roi$strain_ref != big_roi$strain_ref) {
1258 1e34bb1f Florent Chuffart
      # print("WARNING, big_roi and roi not in the same strain_ref... translating big_roi.")
1259 62d69d53 Florent Chuffart
      big_roi = translate_roi(big_roi, roi$strain_ref, config=config)
1260 62d69d53 Florent Chuffart
    }
1261 62d69d53 Florent Chuffart
    if (big_roi$end < big_roi$begin) {
1262 62d69d53 Florent Chuffart
      tmp_var = big_roi$begin
1263 62d69d53 Florent Chuffart
      big_roi$begin = big_roi$end 
1264 62d69d53 Florent Chuffart
      big_roi$end = tmp_var        
1265 62d69d53 Florent Chuffart
      big_roi$length = big_roi$length        
1266 62d69d53 Florent Chuffart
    }    
1267 ded736c8 Florent Chuffart
    if (big_roi$chr!=roi$chr | roi$end > big_roi$end | roi$end < big_roi$begin | roi$begin > big_roi$end | roi$begin < big_roi$begin) {
1268 ded736c8 Florent Chuffart
      print("WARNING! Trying to translate a roi not included in a big_roi.")
1269 ded736c8 Florent Chuffart
      return(NULL)
1270 ded736c8 Florent Chuffart
    }
1271 62d69d53 Florent Chuffart
		if (strain1 == "BY") {
1272 62d69d53 Florent Chuffart
			big_chro_1 = paste("chr", ARAB2ROM()[[big_roi$chr]], sep="")
1273 62d69d53 Florent Chuffart
		} else if (strain1 == "RM") {			
1274 62d69d53 Florent Chuffart
		  big_chro_1 = paste("supercontig_1.",big_roi$chr,sep="")
1275 62d69d53 Florent Chuffart
		} else if (strain1 == "YJM") {			
1276 62d69d53 Florent Chuffart
		  big_chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[big_roi$chr]]
1277 1d833b97 Florent Chuffart
		}
1278 62d69d53 Florent Chuffart
		big_begin_1 = big_roi$begin
1279 62d69d53 Florent Chuffart
	  big_end_1 = big_roi$end
1280 62d69d53 Florent Chuffart
		c2c = c2c[c2c$V1==big_chro_1,]
1281 62d69d53 Florent Chuffart
    if (length(c2c[c2c$V3 < big_begin_1 & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < big_begin_1 & c2c$V2 < c2c$V3,c("V2", "V3") ] = big_begin_1}
1282 62d69d53 Florent Chuffart
    if (length(c2c[c2c$V2 > big_end_1   & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > big_end_1   & c2c$V2 < c2c$V3, c("V2", "V3")] = big_end_1}
1283 62d69d53 Florent Chuffart
    if (length(c2c[c2c$V2 < big_begin_1 & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < big_begin_1 & c2c$V3 < c2c$V2,c("V2", "V3") ] = big_begin_1}
1284 62d69d53 Florent Chuffart
    if (length(c2c[c2c$V3 > big_end_1   & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > big_end_1   & c2c$V3 < c2c$V2, c("V2", "V3")] = big_end_1}
1285 62d69d53 Florent Chuffart
    c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1286 1d833b97 Florent Chuffart
	}
1287 1d833b97 Florent Chuffart
  #	Convert initial roi$chr into c2c format
1288 1d833b97 Florent Chuffart
	if (strain1 == "BY") {
1289 1d833b97 Florent Chuffart
		chro_1 = paste("chr", ARAB2ROM()[[roi$chr]], sep="")
1290 1d833b97 Florent Chuffart
	} else if (strain1 == "RM") {			
1291 1d833b97 Florent Chuffart
	  chro_1 = paste("supercontig_1.",roi$chr,sep="")
1292 1d833b97 Florent Chuffart
	} else if (strain1 == "YJM") {			
1293 1d833b97 Florent Chuffart
	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[roi$chr]]
1294 1d833b97 Florent Chuffart
	}
1295 1d833b97 Florent Chuffart
	begin_1 = roi$begin
1296 1d833b97 Florent Chuffart
  end_1 = roi$end
1297 1d833b97 Florent Chuffart
  # Computing equivalent strain_2 alignment coordinates	
1298 1d833b97 Florent Chuffart
	if (reverse) {
1299 1d833b97 Florent Chuffart
  	tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
1300 1d833b97 Florent Chuffart
    tmptransfostop = c2c[c2c$V1==chro_1 &  ((c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1)),]
1301 1d833b97 Florent Chuffart
	} else {
1302 1d833b97 Florent Chuffart
		tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1303 1d833b97 Florent Chuffart
	  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
1304 1d833b97 Florent Chuffart
	}
1305 1d833b97 Florent Chuffart
	# Never happend conditions ...	
1306 1d833b97 Florent Chuffart
	{
1307 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) == 0) {
1308 1d833b97 Florent Chuffart
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1309 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1310 54cc8283 Florent Chuffart
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2>=begin_1,]
1311 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V2)
1312 54cc8283 Florent Chuffart
      } else {
1313 5a26597a Florent Chuffart
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1,]
1314 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V3)
1315 54cc8283 Florent Chuffart
      }
1316 1d833b97 Florent Chuffart
			if (reverse) {
1317 1d833b97 Florent Chuffart
		  	tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
1318 1d833b97 Florent Chuffart
			} else {
1319 1d833b97 Florent Chuffart
				tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1320 1d833b97 Florent Chuffart
			}
1321 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) == 0) {
1322 1d833b97 Florent Chuffart
				if (!is.null(big_roi)) {
1323 1d833b97 Florent Chuffart
					return(NULL)
1324 1d833b97 Florent Chuffart
					tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$begin & c2c$V2<=big_roi$begin,]
1325 1d833b97 Florent Chuffart
				} else { 
1326 1d833b97 Florent Chuffart
					# return(NULL)
1327 1d833b97 Florent Chuffart
					# print(c2c[c2c$V1==chro_1 & c2c$V2<=end_1 & c2c$V3>=begin_1,])
1328 1d833b97 Florent Chuffart
					# print(c2c[c2c$V1==chro_1,])
1329 1d833b97 Florent Chuffart
					print(tmptransfostart)
1330 1d833b97 Florent Chuffart
					print(tmptransfostop)
1331 1d833b97 Florent Chuffart
					stop("Never happend condition 1.")					
1332 1d833b97 Florent Chuffart
				}
1333 1d833b97 Florent Chuffart
			}
1334 1d833b97 Florent Chuffart
		} 
1335 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) == 0) {
1336 1d833b97 Florent Chuffart
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1337 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1338 54cc8283 Florent Chuffart
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3<=end_1,]
1339 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V3)
1340 54cc8283 Florent Chuffart
      } else {
1341 5a26597a Florent Chuffart
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2<=end_1,]
1342 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V2)
1343 54cc8283 Florent Chuffart
      }
1344 1d833b97 Florent Chuffart
			if (reverse) {
1345 1d833b97 Florent Chuffart
		    tmptransfostop = c2c[c2c$V1==chro_1 &  ((c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1)),]
1346 1d833b97 Florent Chuffart
			} else {
1347 1d833b97 Florent Chuffart
			  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
1348 1d833b97 Florent Chuffart
			}
1349 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) == 0) {
1350 1d833b97 Florent Chuffart
				if (!is.null(big_roi)) {
1351 1d833b97 Florent Chuffart
					return(NULL)
1352 1d833b97 Florent Chuffart
				  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$end & c2c$V2<=big_roi$end,]
1353 1d833b97 Florent Chuffart
				} else {
1354 1d833b97 Florent Chuffart
					# return(NULL)
1355 1d833b97 Florent Chuffart
					print(c2c[c2c$V1==chro_1,])
1356 1d833b97 Florent Chuffart
					print(tmptransfostart)
1357 1d833b97 Florent Chuffart
					print(tmptransfostop)
1358 1d833b97 Florent Chuffart
					stop("Never happend condition 2.")
1359 1d833b97 Florent Chuffart
				} 
1360 1d833b97 Florent Chuffart
			} 
1361 1d833b97 Florent Chuffart
		}
1362 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) != 1) {
1363 1d833b97 Florent Chuffart
			# tmptransfostart = tmptransfostart[1,]
1364 1d833b97 Florent Chuffart
			# print("many start")
1365 1d833b97 Florent Chuffart
			# print(c2c[c2c$V1==chro_1,])
1366 1d833b97 Florent Chuffart
			tmptransfostart = tmptransfostart[tmptransfostart$V1==chro_1 & tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1367 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) != 1) {
1368 1d833b97 Florent Chuffart
				# return(NULL)
1369 1d833b97 Florent Chuffart
				print(tmptransfostart)
1370 1d833b97 Florent Chuffart
				print(tmptransfostop)
1371 1d833b97 Florent Chuffart
  			stop("Never happend condition 3.")
1372 1d833b97 Florent Chuffart
			}
1373 1d833b97 Florent Chuffart
		}
1374 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) != 1) {
1375 1d833b97 Florent Chuffart
			# tmptransfostop = tmptransfostop[length(tmptransfostop$V8),]
1376 1d833b97 Florent Chuffart
			# print("many stop")
1377 1d833b97 Florent Chuffart
			# print(tmptransfostop)
1378 1d833b97 Florent Chuffart
			# print(roi)
1379 1d833b97 Florent Chuffart
		  tmptransfostop = tmptransfostop[tmptransfostop$V1==chro_1 & tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1380 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) != 1) {
1381 1d833b97 Florent Chuffart
				# return(NULL)
1382 1d833b97 Florent Chuffart
				print(tmptransfostart)
1383 1d833b97 Florent Chuffart
				print(tmptransfostop)
1384 1d833b97 Florent Chuffart
  			stop("Never happend condition 4.")
1385 1d833b97 Florent Chuffart
			}
1386 1d833b97 Florent Chuffart
		}		
1387 1d833b97 Florent Chuffart
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1388 1d833b97 Florent Chuffart
			print(tmptransfostart)
1389 1d833b97 Florent Chuffart
			print(tmptransfostop)		
1390 1d833b97 Florent Chuffart
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1391 1d833b97 Florent Chuffart
		}
1392 1d833b97 Florent Chuffart
	} 
1393 1d833b97 Florent Chuffart
  # Deal with strand
1394 1d833b97 Florent Chuffart
  if (tmptransfostart$V8 == 1) {
1395 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1396 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1397 1d833b97 Florent Chuffart
  } else {
1398 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1399 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1400 1d833b97 Florent Chuffart
  }
1401 1d833b97 Florent Chuffart
	# Build returned roi
1402 1d833b97 Florent Chuffart
	roi$strain_ref = strain2
1403 1d833b97 Florent Chuffart
	if (roi$strain_ref == "BY") {
1404 1d833b97 Florent Chuffart
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1405 1d833b97 Florent Chuffart
	} else {
1406 1d833b97 Florent Chuffart
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1407 1d833b97 Florent Chuffart
	}
1408 1d833b97 Florent Chuffart
  roi$begin = begin_2
1409 1d833b97 Florent Chuffart
  roi$end = end_2
1410 1d833b97 Florent Chuffart
	if (sign(roi$end - roi$begin) == 0) {
1411 1d833b97 Florent Chuffart
		roi$length = 1			
1412 1d833b97 Florent Chuffart
	} else {		
1413 1d833b97 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1414 1d833b97 Florent Chuffart
	}
1415 1d833b97 Florent Chuffart
  return(roi)  
1416 1d833b97 Florent Chuffart
}, ex=function(){
1417 1d833b97 Florent Chuffart
	# Define new translate_roi function...
1418 1d833b97 Florent Chuffart
	translate_roi = function(roi, strain2, config) {
1419 1d833b97 Florent Chuffart
		strain1 = roi$strain_ref
1420 1d833b97 Florent Chuffart
		if (strain1 == strain2) {
1421 1d833b97 Florent Chuffart
			return(roi)
1422 1d833b97 Florent Chuffart
		} else {
1423 1d833b97 Florent Chuffart
		  stop("Here is my new translate_roi function...")		
1424 1d833b97 Florent Chuffart
		}	
1425 1d833b97 Florent Chuffart
	}
1426 1d833b97 Florent Chuffart
	# Binding it by uncomment follwing lines.
1427 1d833b97 Florent Chuffart
	# unlockBinding("translate_roi", as.environment("package:nm"))
1428 1d833b97 Florent Chuffart
	# unlockBinding("translate_roi", getNamespace("nm"))
1429 1d833b97 Florent Chuffart
	# assign("translate_roi", translate_roi, "package:nm")
1430 1d833b97 Florent Chuffart
	# assign("translate_roi", translate_roi, getNamespace("nm"))
1431 1d833b97 Florent Chuffart
	# lockBinding("translate_roi", getNamespace("nm"))
1432 1d833b97 Florent Chuffart
	# lockBinding("translate_roi", as.environment("package:nm"))	
1433 1d833b97 Florent Chuffart
})
1434 1d833b97 Florent Chuffart
1435 1d833b97 Florent Chuffart
1436 1d833b97 Florent Chuffart
1437 1d833b97 Florent Chuffart
1438 1d833b97 Florent Chuffart
1439 1d833b97 Florent Chuffart
1440 1d833b97 Florent Chuffart
1441 1d833b97 Florent Chuffart
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1442 1d833b97 Florent Chuffart
### CURs are regions that can be aligned between the genomes 
1443 1d833b97 Florent Chuffart
diff_allowed = 10, ##<< the maximum indel width allowe din a CUR
1444 1d833b97 Florent Chuffart
min_cur_width = 200, ##<< The minimum width of a CUR
1445 8e9facd8 Florent Chuffart
config = NULL, ##<< GLOBAL config variable
1446 1d833b97 Florent Chuffart
plot = FALSE ##<< Plot CURs or not
1447 1d833b97 Florent Chuffart
) {
1448 1d833b97 Florent Chuffart
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 10, min_cur_width = 200, plot=FALSE) {
1449 8e9facd8 Florent Chuffart
  	c2c = get_content(config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]], "table", stringsAsFactors=FALSE)
1450 1d833b97 Florent Chuffart
    # Filtering unagapped
1451 1d833b97 Florent Chuffart
    c2c = c2c[c2c$V6=="-",]
1452 1d833b97 Florent Chuffart
    # filtering some things (chr...)
1453 1d833b97 Florent Chuffart
    # c2c = c2c[c2c$V1 == "chrIV",]
1454 1d833b97 Florent Chuffart
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)] 
1455 1d833b97 Florent Chuffart
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)] 
1456 1d833b97 Florent Chuffart
  	# Plot diffs to define a threshold (diff_allowed)
1457 1d833b97 Florent Chuffart
  	# hist(abs(c(diff2, diff)),breaks=c(0:2000, 200000000000), xlim=c(0,100))
1458 1d833b97 Florent Chuffart
    # Filtering
1459 1d833b97 Florent Chuffart
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1460 1d833b97 Florent Chuffart
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1461 1d833b97 Florent Chuffart
1462 1d833b97 Florent Chuffart
    rois = NULL	
1463 1d833b97 Florent Chuffart
  	for(i in 1:length(indexes_start)) {
1464 1d833b97 Florent Chuffart
  		start = indexes_start[i]
1465 1d833b97 Florent Chuffart
  		stop = indexes_stop[i]
1466 1d833b97 Florent Chuffart
  		sub_c2c = c2c[start:stop,]
1467 1d833b97 Florent Chuffart
  		if (strain1 == "BY") {
1468 1d833b97 Florent Chuffart
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1469 1d833b97 Florent Chuffart
  		} else {			
1470 1d833b97 Florent Chuffart
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1471 1d833b97 Florent Chuffart
  		}
1472 1d833b97 Florent Chuffart
  		roi = list(chr=chr, begin=sub_c2c[1,]$V2, end=sub_c2c[length(sub_c2c$V1),]$V3, strain_ref=strain1)
1473 1d833b97 Florent Chuffart
  		roi[["length"]] = roi$end - roi$begin
1474 1d833b97 Florent Chuffart
  		if (roi$length >= min_cur_width) {
1475 1d833b97 Florent Chuffart
        rois = dfadd(rois,roi)
1476 1d833b97 Florent Chuffart
  	  }
1477 1d833b97 Florent Chuffart
  		if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1478 1d833b97 Florent Chuffart
  			print("*************** ERROR, non homogenous region! ********************")
1479 1d833b97 Florent Chuffart
  		}
1480 1d833b97 Florent Chuffart
  		# print(i)
1481 1d833b97 Florent Chuffart
  		# print(roi)
1482 1d833b97 Florent Chuffart
  		# print(sub_c2c)
1483 1d833b97 Florent Chuffart
  		# print("________________________________________________________________")
1484 1d833b97 Florent Chuffart
  	}
1485 1d833b97 Florent Chuffart
  	if (plot) {
1486 1d833b97 Florent Chuffart
  		print(paste(length(indexes_stop), "area of interest."))
1487 1d833b97 Florent Chuffart
  	  # Plot rois
1488 8e9facd8 Florent Chuffart
  	  genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[strain1]], "fasta")   
1489 1d833b97 Florent Chuffart
  		plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
1490 1d833b97 Florent Chuffart
  		for (name in names(genome)) {
1491 1d833b97 Florent Chuffart
  			if (strain1 == "BY") {
1492 1d833b97 Florent Chuffart
  				chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[[strain1]][[name]]]], sep="")
1493 1d833b97 Florent Chuffart
  			} else {			
1494 1d833b97 Florent Chuffart
  				chr_ref = name
1495 1d833b97 Florent Chuffart
  			}
1496 1d833b97 Florent Chuffart
  			y_lev = as.integer(config$FASTA_INDEXES[[strain1]][[name]])
1497 1d833b97 Florent Chuffart
  			lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
1498 1d833b97 Florent Chuffart
  			text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)	
1499 1d833b97 Florent Chuffart
  		}
1500 1d833b97 Florent Chuffart
  	  col=1
1501 1d833b97 Florent Chuffart
  	  for (roi_index in 1:length(rois$chr)) {
1502 1d833b97 Florent Chuffart
  			roi = rois[roi_index,]	
1503 1d833b97 Florent Chuffart
  			y_lev = as.integer(roi$chr) + 0.3
1504 1d833b97 Florent Chuffart
  			lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
1505 1d833b97 Florent Chuffart
  			text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
1506 1d833b97 Florent Chuffart
  	  	col = col + 1
1507 1d833b97 Florent Chuffart
  	  }	
1508 1d833b97 Florent Chuffart
  	}
1509 1d833b97 Florent Chuffart
  	return(rois)
1510 1d833b97 Florent Chuffart
  }
1511 1d833b97 Florent Chuffart
	rois = NULL	
1512 1d833b97 Florent Chuffart
	rois_BY_RM = get_inter_strain_rois("BY", "RM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1513 1d833b97 Florent Chuffart
	rois_BY_YJM = get_inter_strain_rois("BY", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1514 1d833b97 Florent Chuffart
	for (roi_1_index in 1:length(rois_BY_RM[,1])) {
1515 1d833b97 Florent Chuffart
		roi_1 = rois_BY_RM[roi_1_index,]
1516 1d833b97 Florent Chuffart
		roi_2_candidates = rois_BY_YJM[rois_BY_YJM$chr== roi_1$chr & rois_BY_YJM$begin <= roi_1$end & rois_BY_YJM$end >= roi_1$begin , ] ;  
1517 1d833b97 Florent Chuffart
		# print(length(roi_2_candidates[,1]))
1518 1d833b97 Florent Chuffart
		if (length(roi_2_candidates[,1]) > 0) {
1519 1d833b97 Florent Chuffart
			for(roi_2_index in 1:length(roi_2_candidates[,1])) {
1520 1d833b97 Florent Chuffart
				roi_2 = roi_2_candidates[roi_2_index,]
1521 1d833b97 Florent Chuffart
				roi = list(chr=roi_1$chr, begin=max(roi_1$begin, roi_2$begin), end=min(roi_1$end, roi_2$end), strain_ref="BY")
1522 1d833b97 Florent Chuffart
				roi[["length"]] = roi$end - roi$begin + 1
1523 1d833b97 Florent Chuffart
				if (roi$length >= min_cur_width) {
1524 1d833b97 Florent Chuffart
					# if (length(rois[,1]) == 153) {
1525 1d833b97 Florent Chuffart
					# 	print(paste(length(rois[,1]), roi_1_index, roi_2_index ))
1526 1d833b97 Florent Chuffart
					# 	print(roi_1)
1527 1d833b97 Florent Chuffart
					# 	print(roi_2)
1528 1d833b97 Florent Chuffart
					# 	print(roi)
1529 1d833b97 Florent Chuffart
					# }
1530 1d833b97 Florent Chuffart
			    rois = dfadd(rois,roi)
1531 1d833b97 Florent Chuffart
			  }			
1532 1d833b97 Florent Chuffart
			}
1533 1d833b97 Florent Chuffart
		}
1534 1d833b97 Florent Chuffart
	}
1535 1d833b97 Florent Chuffart
	print(length(rois[,1]))
1536 1d833b97 Florent Chuffart
	print(sum(rois$length))
1537 1d833b97 Florent Chuffart
	rois_1st_round = rois
1538 1d833b97 Florent Chuffart
	rois_2nd_round = NULL	
1539 1d833b97 Florent Chuffart
	rois_RM_YJM = get_inter_strain_rois("RM", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1540 1d833b97 Florent Chuffart
	for (roi_1_index in 1:length(rois_1st_round[,1])) {
1541 1d833b97 Florent Chuffart
		roi_1 = rois_1st_round[roi_1_index,]
1542 8e9facd8 Florent Chuffart
		translated_roi_1 = translate_roi(roi_1, "RM", config = config)
1543 1d833b97 Florent Chuffart
		t_b = min(translated_roi_1$begin, translated_roi_1$end)
1544 1d833b97 Florent Chuffart
		t_e = max(translated_roi_1$begin, translated_roi_1$end)
1545 1d833b97 Florent Chuffart
		roi_2_candidates = rois_RM_YJM[rois_RM_YJM$chr== translated_roi_1$chr & rois_RM_YJM$begin <= t_e & rois_RM_YJM$end >= t_b , ] ;  
1546 1d833b97 Florent Chuffart
		if (length(roi_2_candidates[,1]) > 0) {
1547 1d833b97 Florent Chuffart
			for(roi_2_index in 1:length(roi_2_candidates[,1])) {
1548 1d833b97 Florent Chuffart
				roi_2 = roi_2_candidates[roi_2_index,]
1549 1d833b97 Florent Chuffart
				roi = list(chr=translated_roi_1$chr, begin=max(t_b, roi_2$begin), end=min(t_e, roi_2$end), strain_ref="RM")
1550 1d833b97 Florent Chuffart
				roi[["length"]] = roi$end - roi$begin + 1
1551 1d833b97 Florent Chuffart
				if (roi$length >= min_cur_width) {
1552 1d833b97 Florent Chuffart
			    rois_2nd_round = dfadd(rois_2nd_round,roi)
1553 1d833b97 Florent Chuffart
			  }			
1554 1d833b97 Florent Chuffart
			}
1555 1d833b97 Florent Chuffart
		}
1556 1d833b97 Florent Chuffart
	}
1557 1d833b97 Florent Chuffart
	print(length(rois_2nd_round[,1]))
1558 1d833b97 Florent Chuffart
	print(sum(rois_2nd_round$length))
1559 1d833b97 Florent Chuffart
	rois = rois_2nd_round
1560 1d833b97 Florent Chuffart
1561 1d833b97 Florent Chuffart
	rois_translator_round = list()	
1562 1d833b97 Florent Chuffart
	for (roi_index in 1:length(rois[,1])) {
1563 1d833b97 Florent Chuffart
		roi = rois[roi_index,]
1564 8e9facd8 Florent Chuffart
		BY_roi  = translate_roi(roi, "BY", config = config)
1565 1d833b97 Florent Chuffart
		tmp_BY_roi = BY_roi 
1566 1d833b97 Florent Chuffart
		BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1567 1d833b97 Florent Chuffart
		BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1568 1d833b97 Florent Chuffart
		BY_roi$length = abs(BY_roi$length)
1569 1d833b97 Florent Chuffart
		rois_translator_round = dfadd(rois_translator_round, BY_roi)
1570 1d833b97 Florent Chuffart
	}
1571 1d833b97 Florent Chuffart
	rois = rois_translator_round
1572 1d833b97 Florent Chuffart
1573 1d833b97 Florent Chuffart
	rois_3rd_round = NULL	
1574 1d833b97 Florent Chuffart
	for (roi_index in 1:length(rois[,1])) {
1575 1d833b97 Florent Chuffart
	# for (roi_index in 1:2) {
1576 1d833b97 Florent Chuffart
		current_roi = rois[roi_index,]
1577 1d833b97 Florent Chuffart
		# print(roi_index)
1578 1d833b97 Florent Chuffart
	
1579 1d833b97 Florent Chuffart
	  to_be_check_rois = dfadd(NULL, current_roi)
1580 1d833b97 Florent Chuffart
		NEED_RERUN = TRUE
1581 1d833b97 Florent Chuffart
		while (NEED_RERUN) {
1582 1d833b97 Florent Chuffart
			# print("RERUN"),
1583 1d833b97 Florent Chuffart
			NEED_RERUN = FALSE
1584 1d833b97 Florent Chuffart
			to_be_check_again = NULL
1585 1d833b97 Florent Chuffart
			for (to_be_check_roi_index in 1:length(to_be_check_rois[,1])) {
1586 1d833b97 Florent Chuffart
				# print(to_be_check_rois)
1587 1d833b97 Florent Chuffart
				to_be_check_roi = to_be_check_rois[to_be_check_roi_index,]
1588 1d833b97 Florent Chuffart
		    combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"), c("RM", "BY"), c("YJM", "BY"), c("YJM", "RM"))
1589 1d833b97 Florent Chuffart
			  for (combi in combis) {
1590 1d833b97 Florent Chuffart
					# print(combi)
1591 1d833b97 Florent Chuffart
			    strain1 = combi[1]
1592 1d833b97 Florent Chuffart
		      strain2 = combi[2]
1593 8e9facd8 Florent Chuffart
					trans_roi = translate_roi(to_be_check_roi, strain1, config = config)
1594 1d833b97 Florent Chuffart
					lower_bound=min(trans_roi$begin, trans_roi$end)
1595 1d833b97 Florent Chuffart
					upper_bound=max(trans_roi$begin, trans_roi$end)			
1596 1d833b97 Florent Chuffart
          
1597 1d833b97 Florent Chuffart
          check_overlaping = structure(function(strain1 = "BY", strain2 = "RM", chr = NULL, lower_bound=NULL, upper_bound=NULL) {
1598 1d833b97 Florent Chuffart
            reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1599 1d833b97 Florent Chuffart
          	if (strain1 == strain2) {
1600 1d833b97 Florent Chuffart
          		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1601 1d833b97 Florent Chuffart
          		return(roi)
1602 1d833b97 Florent Chuffart
          	}
1603 1d833b97 Florent Chuffart
          	# Launch c2c file
1604 1d833b97 Florent Chuffart
          	if (reverse) {
1605 1d833b97 Florent Chuffart
          		c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
1606 1d833b97 Florent Chuffart
          	} else {
1607 1d833b97 Florent Chuffart
          		c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
1608 1d833b97 Florent Chuffart
          	}
1609 1d833b97 Florent Chuffart
          	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)	
1610 1d833b97 Florent Chuffart
          	# filtering it
1611 1d833b97 Florent Chuffart
            c2c = c2c[c2c$V6=="-",]
1612 1d833b97 Florent Chuffart
          	# Reverse
1613 1d833b97 Florent Chuffart
          	if (reverse) {
1614 1d833b97 Florent Chuffart
          		tmp_col = c2c$V1
1615 1d833b97 Florent Chuffart
          		c2c$V1 = c2c$V7
1616 1d833b97 Florent Chuffart
          		c2c$V7 = tmp_col
1617 1d833b97 Florent Chuffart
          		tmp_col = c2c$V2
1618 1d833b97 Florent Chuffart
          		c2c$V2 = c2c$V9
1619 1d833b97 Florent Chuffart
          		c2c$V9 = tmp_col
1620 1d833b97 Florent Chuffart
          		tmp_col = c2c$V3
1621 1d833b97 Florent Chuffart
          		c2c$V3 = c2c$V10
1622 1d833b97 Florent Chuffart
          		c2c$V10 = tmp_col
1623 1d833b97 Florent Chuffart
          	}
1624 1d833b97 Florent Chuffart
1625 1d833b97 Florent Chuffart
          	if (strain1 == "BY") {
1626 1d833b97 Florent Chuffart
          		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1627 1d833b97 Florent Chuffart
          	} else if (strain1 == "RM") {			
1628 1d833b97 Florent Chuffart
          	  chro_1 = paste("supercontig_1.",chr,sep="")
1629 1d833b97 Florent Chuffart
          	} else if (strain1 == "YJM") {			
1630 1d833b97 Florent Chuffart
          	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1631 1d833b97 Florent Chuffart
          	}
1632 1d833b97 Florent Chuffart
          	# print(chro_1)
1633 1d833b97 Florent Chuffart
          	if (!is.null(lower_bound) & !is.null(upper_bound)) {
1634 1d833b97 Florent Chuffart
              if (reverse) {
1635 1d833b97 Florent Chuffart
          	  	tmp_c2c = c2c[c2c$V1==chro_1 & ((c2c$V3>=lower_bound & c2c$V2<=upper_bound & c2c$V8==1) | (c2c$V2>=lower_bound & c2c$V3<=upper_bound & c2c$V8==-1)),]
1636 1d833b97 Florent Chuffart
          		} else {
1637 1d833b97 Florent Chuffart
          			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=lower_bound & c2c$V2<=upper_bound,]
1638 1d833b97 Florent Chuffart
          		}
1639 1d833b97 Florent Chuffart
          	} else {
1640 1d833b97 Florent Chuffart
            	tmp_c2c = c2c[c2c$V1 == chr,]
1641 1d833b97 Florent Chuffart
          	}
1642 1d833b97 Florent Chuffart
          	if (length(tmp_c2c[,1]) > 1) {
1643 1d833b97 Florent Chuffart
          		pbs = apply(t(1:(length(tmp_c2c[,1]) - 1)), 2, function(i){
1644 1d833b97 Florent Chuffart
          			# print(paste(i, "/", length(tmp_c2c[,1])))
1645 1d833b97 Florent Chuffart
          			apply(t((i+1):length(tmp_c2c[,1])), 2, function(j){
1646 1d833b97 Florent Chuffart
          				l1 = tmp_c2c[i,] 
1647 1d833b97 Florent Chuffart
          				b1 = min(l1$V2, l1$V3)
1648 1d833b97 Florent Chuffart
          				e1 = max(l1$V2, l1$V3)
1649 1d833b97 Florent Chuffart
          				l2 = tmp_c2c[j,]
1650 1d833b97 Florent Chuffart
          				b2 = min(l2$V2, l2$V3)
1651 1d833b97 Florent Chuffart
          				e2 = max(l2$V2, l2$V3)
1652 1d833b97 Florent Chuffart
          				if ((e1>=b2 & b1<=e2) | (e2>=b1 & b2<=e1)) {
1653 1d833b97 Florent Chuffart
          					print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ",chr, " [", b1, ",", e1, "] [", b2, ",", e2, "]", sep=""))				
1654 1d833b97 Florent Chuffart
          					pb = list(strain1, strain2, chr, b1, e1, b2, e2)					
1655 1d833b97 Florent Chuffart
          					pb
1656 1d833b97 Florent Chuffart
          				} else {
1657 1d833b97 Florent Chuffart
          					NULL
1658 1d833b97 Florent Chuffart
          				}
1659 1d833b97 Florent Chuffart
          			})
1660 1d833b97 Florent Chuffart
          		})
1661 1d833b97 Florent Chuffart
          		return(pbs)
1662 1d833b97 Florent Chuffart
          	}
1663 1d833b97 Florent Chuffart
1664 1d833b97 Florent Chuffart
          }, ex=function(){
1665 1d833b97 Florent Chuffart
          	source("src/nucleo_miner/yeast_strain_conversion.R"); 
1666 1d833b97 Florent Chuffart
          	pbs1 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=TRUE)
1667 1d833b97 Florent Chuffart
          	pbs3 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=TRUE)
1668 1d833b97 Florent Chuffart
          	pbs5 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=TRUE)
1669 1d833b97 Florent Chuffart
          	pbs2 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=FALSE)
1670 1d833b97 Florent Chuffart
          	pbs4 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=FALSE)
1671 1d833b97 Florent Chuffart
          	pbs6 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=FALSE)
1672 1d833b97 Florent Chuffart
          })
1673 1d833b97 Florent Chuffart
          
1674 1d833b97 Florent Chuffart
1675 1d833b97 Florent Chuffart
					res = check_overlaping(strain1 = strain1, strain2 = strain2, chr = trans_roi$chr, lower_bound=lower_bound, upper_bound=upper_bound) 
1676 1d833b97 Florent Chuffart
					if (!is.null(res)) {
1677 1d833b97 Florent Chuffart
						df_res = data.frame(matrix(unlist(res), ncol = 7, byrow=TRUE), stringsAsFactors=FALSE)		
1678 1d833b97 Florent Chuffart
						interval = df_res[1,]
1679 1d833b97 Florent Chuffart
						inter_min = as.numeric(max( min(interval$X4, interval$X5), min(interval$X6, interval$X7)))
1680 1d833b97 Florent Chuffart
						inter_max = as.numeric(min( max(interval$X4, interval$X5), max(interval$X6, interval$X7)))
1681 1d833b97 Florent Chuffart
						# print(paste("SPLIT ROI", roi_index, "for", combi[1], combi[2]))			
1682 1d833b97 Florent Chuffart
						new_roi1 = trans_roi
1683 1d833b97 Florent Chuffart
						new_roi2 = trans_roi
1684 1d833b97 Florent Chuffart
						new_roi1$begin = lower_bound
1685 1d833b97 Florent Chuffart
						new_roi1$end = inter_min - 1
1686 1d833b97 Florent Chuffart
						new_roi1$length = new_roi1$end - new_roi1$begin + 1
1687 1d833b97 Florent Chuffart
						new_roi2$begin = inter_max + 1
1688 1d833b97 Florent Chuffart
						new_roi2$end = upper_bound
1689 1d833b97 Florent Chuffart
						new_roi2$length = new_roi2$end - new_roi2$begin + 1
1690 1d833b97 Florent Chuffart
						if (new_roi1$length > min_cur_width) {
1691 8e9facd8 Florent Chuffart
							BY_roi  = translate_roi(new_roi1, "BY", config = config)
1692 1d833b97 Florent Chuffart
							tmp_BY_roi = BY_roi
1693 1d833b97 Florent Chuffart
							BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1694 1d833b97 Florent Chuffart
							BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1695 1d833b97 Florent Chuffart
							BY_roi$length = abs(BY_roi$length)
1696 1d833b97 Florent Chuffart
							to_be_check_again = dfadd(to_be_check_again, BY_roi)	
1697 1d833b97 Florent Chuffart
						}
1698 1d833b97 Florent Chuffart
						if (new_roi2$length > min_cur_width) {
1699 8e9facd8 Florent Chuffart
							BY_roi  = translate_roi(new_roi2, "BY", config = config)
1700 1d833b97 Florent Chuffart
							tmp_BY_roi = BY_roi
1701 1d833b97 Florent Chuffart
							BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1702 1d833b97 Florent Chuffart
							BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1703 1d833b97 Florent Chuffart
							BY_roi$length = abs(BY_roi$length)
1704 1d833b97 Florent Chuffart
							to_be_check_again = dfadd(to_be_check_again, BY_roi)	
1705 1d833b97 Florent Chuffart
						}
1706 1d833b97 Florent Chuffart
						if (to_be_check_roi_index < length(to_be_check_rois[,1])) {
1707 1d833b97 Florent Chuffart
							for (i in (to_be_check_roi_index + 1):length(to_be_check_rois[,1])) {
1708 1d833b97 Florent Chuffart
								to_be_check_again = dfadd(to_be_check_again, to_be_check_rois[i,])								
1709 1d833b97 Florent Chuffart
							}
1710 1d833b97 Florent Chuffart
						}
1711 1d833b97 Florent Chuffart
						NEED_RERUN = TRUE
1712 1d833b97 Florent Chuffart
						break
1713 1d833b97 Florent Chuffart
					}
1714 1d833b97 Florent Chuffart
				}
1715 1d833b97 Florent Chuffart
				if (NEED_RERUN) {
1716 1d833b97 Florent Chuffart
					to_be_check_rois = to_be_check_again
1717 1d833b97 Florent Chuffart
					break
1718 1d833b97 Florent Chuffart
				}
1719 1d833b97 Florent Chuffart
			}
1720 1d833b97 Florent Chuffart
		}
1721 1d833b97 Florent Chuffart
		
1722 1d833b97 Florent Chuffart
		checked_rois = to_be_check_rois
1723 1d833b97 Florent Chuffart
		for (checked_roi_index in 1:length(checked_rois[,1])) {
1724 1d833b97 Florent Chuffart
			rois_3rd_round = dfadd(rois_3rd_round, checked_rois[checked_roi_index,])	
1725 1d833b97 Florent Chuffart
		}
1726 1d833b97 Florent Chuffart
	}
1727 1d833b97 Florent Chuffart
1728 1d833b97 Florent Chuffart
	print(length(rois_3rd_round[,1]))
1729 1d833b97 Florent Chuffart
	print(sum(rois_3rd_round$length))
1730 1d833b97 Florent Chuffart
	rois = rois_3rd_round
1731 1d833b97 Florent Chuffart
	
1732 1d833b97 Florent Chuffart
	
1733 1d833b97 Florent Chuffart
	if (plot) {
1734 1d833b97 Florent Chuffart
		print(paste(length(rois$chr), "area of interest."))
1735 1d833b97 Florent Chuffart
	  # Plot rois
1736 8e9facd8 Florent Chuffart
	  genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[["BY"]], "fasta")   
1737 1d833b97 Florent Chuffart
		plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
1738 1d833b97 Florent Chuffart
		for (name in names(genome)) {
1739 1d833b97 Florent Chuffart
			if (TRUE) {
1740 1d833b97 Florent Chuffart
				chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[["BY"]][[name]]]], sep="")
1741 1d833b97 Florent Chuffart
			} else {			
1742 1d833b97 Florent Chuffart
				chr_ref = name
1743 1d833b97 Florent Chuffart
			}
1744 1d833b97 Florent Chuffart
			y_lev = as.integer(config$FASTA_INDEXES[["BY"]][[name]])
1745 1d833b97 Florent Chuffart
			lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
1746 1d833b97 Florent Chuffart
			text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)	
1747 1d833b97 Florent Chuffart
		}
1748 1d833b97 Florent Chuffart
	  col=1
1749 1d833b97 Florent Chuffart
	  for (roi_index in 1:length(rois$chr)) {
1750 1d833b97 Florent Chuffart
			roi = rois[roi_index,]	
1751 1d833b97 Florent Chuffart
			y_lev = as.integer(roi$chr) + 0.3
1752 1d833b97 Florent Chuffart
			lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
1753 1d833b97 Florent Chuffart
			text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
1754 1d833b97 Florent Chuffart
	  	col = col + 1
1755 1d833b97 Florent Chuffart
	  }	
1756 1d833b97 Florent Chuffart
	}
1757 1d833b97 Florent Chuffart
	return (rois)
1758 1d833b97 Florent Chuffart
}
1759 1d833b97 Florent Chuffart
1760 1d833b97 Florent Chuffart
1761 1d833b97 Florent Chuffart
1762 1d833b97 Florent Chuffart
1763 61914235 Florent Chuffart
build_replicates = structure(function(# Stage replicates data 
1764 61914235 Florent Chuffart
### This function loads in memory data corresponding to the given experiments.
1765 61914235 Florent Chuffart
expe, ##<< a list of vector corresponding to vector of replicates.
1766 61914235 Florent Chuffart
roi, ##<< the region that we are interested in.
1767 61914235 Florent Chuffart
only_fetch=FALSE, ##<< filter or not inputs.
1768 61914235 Florent Chuffart
get_genome=FALSE,##<< Load or not corresponding genome.
1769 61914235 Florent Chuffart
all_samples, ##<< Global list of samples.
1770 61914235 Florent Chuffart
config=NULL ##<< GLOBAL config variable.
1771 61914235 Florent Chuffart
) {
1772 61914235 Florent Chuffart
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1773 61914235 Florent Chuffart
  	samples=list()
1774 61914235 Florent Chuffart
  	for (i in samples_ids) {
1775 61914235 Florent Chuffart
  		sample = as.list(all_samples[all_samples$id==i,])
1776 a0b91fee Florent Chuffart
      sample$orig_roi = roi
1777 8e9facd8 Florent Chuffart
      sample$roi = translate_roi(roi, sample$strain, config = config)
1778 61914235 Florent Chuffart
  		if (get_genome) {
1779 61914235 Florent Chuffart
  			# Get Genome
1780 61914235 Florent Chuffart
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1781 61914235 Florent Chuffart
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]		
1782 61914235 Florent Chuffart
  		}
1783 61914235 Florent Chuffart
  		# Get inputs
1784 61914235 Florent Chuffart
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1785 61914235 Florent Chuffart
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE) 
1786 61914235 Florent Chuffart
  		sample$total_reads = sum(sample$inputs[,4]) 	
1787 61914235 Florent Chuffart
  		if (!only_fetch) {
1788 61914235 Florent Chuffart
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1789 61914235 Florent Chuffart
  	  }
1790 61914235 Florent Chuffart
  	  # Get TF outputs for Mnase_Seq samples
1791 61914235 Florent Chuffart
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {	
1792 61914235 Florent Chuffart
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1793 61914235 Florent Chuffart
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1794 61914235 Florent Chuffart
  			if (!only_fetch) {
1795 61914235 Florent Chuffart
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1796 61914235 Florent Chuffart
    		}
1797 61914235 Florent Chuffart
  		}
1798 61914235 Florent Chuffart
  		samples[[length(samples) + 1]] = sample		
1799 61914235 Florent Chuffart
  	}
1800 61914235 Florent Chuffart
  	return(samples)
1801 61914235 Florent Chuffart
  }
1802 61914235 Florent Chuffart
	replicates = list()
1803 61914235 Florent Chuffart
	for(samples_ids in expe) {
1804 61914235 Florent Chuffart
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1805 61914235 Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
1806 61914235 Florent Chuffart
	}
1807 61914235 Florent Chuffart
	return(replicates)
1808 61914235 Florent Chuffart
  }, ex = function() {
1809 61914235 Florent Chuffart
    # library(rjson)
1810 61914235 Florent Chuffart
    # library(nucleominer)
1811 61914235 Florent Chuffart
    # 
1812 61914235 Florent Chuffart
    # # Read config file
1813 61914235 Florent Chuffart
    # json_conf_file = "nucleo_miner_config.json"
1814 61914235 Florent Chuffart
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1815 61914235 Florent Chuffart
    # # Read sample file
1816 61914235 Florent Chuffart
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)  
1817 61914235 Florent Chuffart
    # # here are the sample ids in a list
1818 61914235 Florent Chuffart
    # expes = list(c(1))
1819 61914235 Florent Chuffart
    # # here is the region that we wnt to see the coverage
1820 61914235 Florent Chuffart
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY") 
1821 61914235 Florent Chuffart
    # # it displays the corverage
1822 61914235 Florent Chuffart
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1823 61914235 Florent Chuffart
    # out = watch_samples(replicates, config$READ_LENGTH, 
1824 61914235 Florent Chuffart
    #       plot_coverage = TRUE,  
1825 61914235 Florent Chuffart
    #       plot_squared_reads = FALSE,  
1826 61914235 Florent Chuffart
    #       plot_ref_genome = FALSE, 
1827 61914235 Florent Chuffart
    #       plot_arrow_raw_reads = FALSE,  
1828 61914235 Florent Chuffart
    #       plot_arrow_nuc_reads = FALSE,  
1829 61914235 Florent Chuffart
    #       plot_gaussian_reads = FALSE,  
1830 61914235 Florent Chuffart
    #       plot_gaussian_unified_reads = FALSE,  
1831 61914235 Florent Chuffart
    #       plot_ellipse_nucs = FALSE,  
1832 61914235 Florent Chuffart
    #       plot_wp_nucs = FALSE,  
1833 61914235 Florent Chuffart
    #       plot_wp_nuc_model = FALSE,  
1834 61914235 Florent Chuffart
    #       plot_common_nucs = FALSE,  
1835 61914235 Florent Chuffart
    #       height = 50)
1836 61914235 Florent Chuffart
  })
1837 61914235 Florent Chuffart
1838 61914235 Florent Chuffart
1839 61914235 Florent Chuffart
1840 61914235 Florent Chuffart
1841 61914235 Florent Chuffart
1842 61914235 Florent Chuffart
1843 61914235 Florent Chuffart
1844 61914235 Florent Chuffart
1845 61914235 Florent Chuffart
1846 61914235 Florent Chuffart
1847 61914235 Florent Chuffart
1848 61914235 Florent Chuffart
1849 61914235 Florent Chuffart
1850 61914235 Florent Chuffart
1851 61914235 Florent Chuffart
1852 61914235 Florent Chuffart
1853 61914235 Florent Chuffart
1854 61914235 Florent Chuffart
1855 61914235 Florent Chuffart
1856 61914235 Florent Chuffart
1857 61914235 Florent Chuffart
1858 61914235 Florent Chuffart
1859 61914235 Florent Chuffart
1860 61914235 Florent Chuffart
1861 61914235 Florent Chuffart
1862 61914235 Florent Chuffart
1863 61914235 Florent Chuffart
1864 61914235 Florent Chuffart
1865 61914235 Florent Chuffart
1866 61914235 Florent Chuffart
1867 61914235 Florent Chuffart
1868 61914235 Florent Chuffart
1869 61914235 Florent Chuffart
1870 1d833b97 Florent Chuffart
1871 1d833b97 Florent Chuffart
1872 1d833b97 Florent Chuffart
1873 1d833b97 Florent Chuffart
1874 1d833b97 Florent Chuffart
1875 1d833b97 Florent Chuffart
1876 1d833b97 Florent Chuffart
1877 1d833b97 Florent Chuffart
1878 1d833b97 Florent Chuffart
1879 1d833b97 Florent Chuffart
1880 1d833b97 Florent Chuffart
1881 1d833b97 Florent Chuffart
1882 1d833b97 Florent Chuffart
1883 1d833b97 Florent Chuffart
1884 1d833b97 Florent Chuffart
1885 1d833b97 Florent Chuffart
1886 1d833b97 Florent Chuffart
1887 1d833b97 Florent Chuffart
1888 1d833b97 Florent Chuffart
1889 1d833b97 Florent Chuffart
1890 1d833b97 Florent Chuffart
1891 1d833b97 Florent Chuffart
1892 1d833b97 Florent Chuffart
1893 1d833b97 Florent Chuffart
1894 1d833b97 Florent Chuffart
1895 1d833b97 Florent Chuffart
1896 1d833b97 Florent Chuffart
1897 1d833b97 Florent Chuffart
1898 1d833b97 Florent Chuffart
1899 1d833b97 Florent Chuffart
1900 1d833b97 Florent Chuffart
1901 1d833b97 Florent Chuffart
1902 1d833b97 Florent Chuffart
1903 1d833b97 Florent Chuffart
1904 6b5a528c Florent Chuffart
1905 6b5a528c Florent Chuffart
1906 6b5a528c Florent Chuffart
1907 6b5a528c Florent Chuffart
1908 6b5a528c Florent Chuffart
1909 6b5a528c Florent Chuffart
1910 6b5a528c Florent Chuffart
1911 6b5a528c Florent Chuffart
1912 6b5a528c Florent Chuffart
1913 6b5a528c Florent Chuffart
1914 6b5a528c Florent Chuffart
1915 6b5a528c Florent Chuffart
1916 6b5a528c Florent Chuffart
1917 6b5a528c Florent Chuffart
1918 6b5a528c Florent Chuffart
1919 6b5a528c Florent Chuffart
1920 6b5a528c Florent Chuffart
1921 6b5a528c Florent Chuffart
1922 6b5a528c Florent Chuffart
1923 6b5a528c Florent Chuffart
1924 6b5a528c Florent Chuffart
1925 6b5a528c Florent Chuffart
1926 6b5a528c Florent Chuffart
1927 6b5a528c Florent Chuffart
1928 6b5a528c Florent Chuffart
1929 6b5a528c Florent Chuffart
1930 6b5a528c Florent Chuffart
1931 6b5a528c Florent Chuffart
1932 6b5a528c Florent Chuffart
1933 6b5a528c Florent Chuffart
1934 6b5a528c Florent Chuffart
1935 6b5a528c Florent Chuffart
1936 6b5a528c Florent Chuffart
1937 6b5a528c Florent Chuffart
1938 6b5a528c Florent Chuffart
perform_anovas = function(# Performaing ANOVAs
1939 6b5a528c Florent Chuffart
### Counts reads and Performs ANOVAS for each common nucleosomes involved.
1940 6b5a528c Florent Chuffart
replicates, ##<< Set of replicates, each replicate is a list of samples (ideally 3). Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}. In the \emph{perform_anovas} contexte, we need 4 replicates (4 * (3 samples)): 2 strains * (1 marker + 1 input (Mnase_Seq)). 
1941 6b5a528c Florent Chuffart
aligned_inter_strain_nucs, ##<< List of common nucleosomes.
1942 6b5a528c Florent Chuffart
inputs_name="Mnase_Seq", ##<< Name of the input.
1943 6b5a528c Florent Chuffart
plot_anova_boxes=FALSE ##<< Plot (or not) boxplot for each nuc.
1944 6b5a528c Florent Chuffart
) {
1945 6b5a528c Florent Chuffart
	anova_results = NULL
1946 6b5a528c Florent Chuffart
	for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
1947 6b5a528c Florent Chuffart
		inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
1948 6b5a528c Florent Chuffart
	  # counting reads
1949 6b5a528c Florent Chuffart
		my_data = NULL
1950 6b5a528c Florent Chuffart
		for (replicate_rank in 1:length(replicates)) {
1951 6b5a528c Florent Chuffart
			samples = replicates[[replicate_rank]]
1952 6b5a528c Florent Chuffart
			strain = samples[[1]]$strain
1953 6b5a528c Florent Chuffart
			marker = samples[[1]]$marker
1954 6b5a528c Florent Chuffart
			for (sample_rank in 1:length(samples)) {
1955 6b5a528c Florent Chuffart
				sample = samples[[sample_rank]]
1956 6b5a528c Florent Chuffart
				nuc_width = as.integer(inter_strain_nuc[[paste("upper_bound_",strain,sep="")]]) - inter_strain_nuc[[paste("lower_bound_",strain,sep="")]]
1957 6b5a528c Florent Chuffart
				nuc_reads = filter_tf_inputs(sample$inputs, inter_strain_nuc[[paste("chr_",strain,sep="")]], inter_strain_nuc[[paste("lower_bound_",strain,sep="")]], inter_strain_nuc[[paste("upper_bound_",strain,sep="")]], nuc_width - 1)
1958 6b5a528c Florent Chuffart
				indic = sum(nuc_reads[,4]) * 1000000/sample$total_reads / nuc_width 
1959 6b5a528c Florent Chuffart
				my_data = dfadd(my_data, list(strain = strain, marker = marker, indic = indic))
1960 6b5a528c Florent Chuffart
			}
1961 6b5a528c Florent Chuffart
		}
1962 6b5a528c Florent Chuffart
		my_data$strain = as.factor(my_data$strain)
1963 6b5a528c Florent Chuffart
		my_data$marker = as.factor(my_data$marker)
1964 6b5a528c Florent Chuffart
	
1965 6b5a528c Florent Chuffart
		strain_ref1 = replicates[[1]][[1]]$strain
1966 6b5a528c Florent Chuffart
		strain_ref2 = replicates[[2]][[1]]$strain 
1967 6b5a528c Florent Chuffart
		marker = replicates[[length(replicates)]][[1]]$marker				
1968 6b5a528c Florent Chuffart
1969 6b5a528c Florent Chuffart
	  # Collecting anova results
1970 6b5a528c Florent Chuffart
	  anova_result = list()
1971 6b5a528c Florent Chuffart
		# nucs info
1972 6b5a528c Florent Chuffart
		for (name in names(inter_strain_nuc)) {
1973 6b5a528c Florent Chuffart
			anova_result[[name]] = inter_strain_nuc[[name]]
1974 6b5a528c Florent Chuffart
		}
1975 6b5a528c Florent Chuffart
	  #
1976 6b5a528c Florent Chuffart
		for (strain in c(strain_ref1, strain_ref2)) {
1977 6b5a528c Florent Chuffart
			for (manip in c(inputs_name, marker)) {
1978 6b5a528c Florent Chuffart
				replicat = 1
1979 6b5a528c Florent Chuffart
				for(indic in my_data[my_data$strain == strain & my_data$marker == manip,]$indic) {
1980 6b5a528c Florent Chuffart
					anova_result[[paste(strain, manip, replicat, sep="_")]] = indic
1981 6b5a528c Florent Chuffart
					replicat = replicat + 1
1982 6b5a528c Florent Chuffart
				}			
1983 6b5a528c Florent Chuffart
			}	
1984 6b5a528c Florent Chuffart
		}
1985 6b5a528c Florent Chuffart
1986 6b5a528c Florent Chuffart
	  # Compute ANOVAs
1987 6b5a528c Florent Chuffart
		mnase_data = my_data[my_data$marker == inputs_name,]
1988 6b5a528c Florent Chuffart
		mnase_aov = aov(indic~strain,mnase_data)
1989 6b5a528c Florent Chuffart
		mnase_effects = model.tables(mnase_aov)$tables
1990 6b5a528c Florent Chuffart
		mnase_pvalues = summary(mnase_aov)[[1]]$Pr
1991 6b5a528c Florent Chuffart
		# boxplot(indic~ma*st,mnase_data)
1992 6b5a528c Florent Chuffart
		anova_result[["mnase_st"]] = mnase_effects$strain[1]
1993 6b5a528c Florent Chuffart
		anova_result[["mnase_st_pvalue"]] = mnase_pvalues[1]	
1994 6b5a528c Florent Chuffart
		# 
1995 6b5a528c Florent Chuffart
		marker_data = my_data[my_data$marker == marker,]
1996 6b5a528c Florent Chuffart
		marker_aov = aov(indic~strain,marker_data)
1997 6b5a528c Florent Chuffart
		marker_effects = model.tables(marker_aov)$tables
1998 6b5a528c Florent Chuffart
		marker_pvalues = summary(marker_aov)[[1]]$Pr
1999 6b5a528c Florent Chuffart
		# boxplot(indic~ma*st,marker_data)
2000 6b5a528c Florent Chuffart
		anova_result[["marker_st"]] = marker_effects$strain[1]
2001 6b5a528c Florent Chuffart
		anova_result[["marker_st_pvalue"]] = marker_pvalues[1]
2002 6b5a528c Florent Chuffart
		# 
2003 6b5a528c Florent Chuffart
		st1_data = my_data[my_data$strain == strain_ref1,]
2004 6b5a528c Florent Chuffart
		st1_aov = aov(indic~marker,st1_data)
2005 6b5a528c Florent Chuffart
		st1_effects = model.tables(st1_aov)$tables
2006 6b5a528c Florent Chuffart
		st1_pvalues = summary(st1_aov)[[1]]$Pr
2007 6b5a528c Florent Chuffart
		# boxplot(indic~ma*st,st1_data)
2008 6b5a528c Florent Chuffart
		anova_result[["st1_ma"]]=st1_effects$ma[inputs_name]
2009 6b5a528c Florent Chuffart
		anova_result[["st1_ma_pvalue"]]=st1_pvalues[1]
2010 6b5a528c Florent Chuffart
		# 
2011 6b5a528c Florent Chuffart
		st2_data = my_data[my_data$strain == strain_ref2,]
2012 6b5a528c Florent Chuffart
		st2_aov = aov(indic~marker,st2_data)
2013 6b5a528c Florent Chuffart
		st2_effects = model.tables(st2_aov)$tables
2014 6b5a528c Florent Chuffart
		st2_pvalues = summary(st2_aov)[[1]]$Pr
2015 6b5a528c Florent Chuffart
		# boxplot(indic~ma*st,st2_data)
2016 6b5a528c Florent Chuffart
		anova_result[["st2_ma"]]=st2_effects$ma[inputs_name]
2017 6b5a528c Florent Chuffart
		anova_result[["st2_ma_pvalue"]]=st2_pvalues[1]
2018 6b5a528c Florent Chuffart
		# 
2019 6b5a528c Florent Chuffart
		correl_data = my_data
2020 6b5a528c Florent Chuffart
		correl_aov = aov(indic~strain*marker,correl_data)
2021 6b5a528c Florent Chuffart
		correl_effects = model.tables(correl_aov)$tables
2022 6b5a528c Florent Chuffart
		correl_pvalues = summary(correl_aov)[[1]]$Pr
2023 6b5a528c Florent Chuffart
		if (plot_anova_boxes) {
2024 6b5a528c Florent Chuffart
			x11()
2025 6b5a528c Florent Chuffart
			boxplot(indic~marker*strain,correl_data)
2026 6b5a528c Florent Chuffart
		}
2027 6b5a528c Florent Chuffart
		anova_result[["correl_st"]]=correl_effects$strain[1]
2028 6b5a528c Florent Chuffart
		anova_result[["correl_ma"]]=correl_effects$ma[inputs_name]
2029 6b5a528c Florent Chuffart
		anova_result[["correl_st_ma"]]=correl_effects$"strain:marker"[1,1]
2030 6b5a528c Florent Chuffart
		anova_result[["correl_st_pvalue"]]=correl_pvalues[1]
2031 6b5a528c Florent Chuffart
		anova_result[["correl_ma_pvalue"]]=correl_pvalues[2]
2032 6b5a528c Florent Chuffart
		anova_result[["correl_st_ma_pvalue"]]=correl_pvalues[3]
2033 6b5a528c Florent Chuffart
		
2034 6b5a528c Florent Chuffart
		anova_results = dfadd(anova_results, anova_result)
2035 6b5a528c Florent Chuffart
	}
2036 6b5a528c Florent Chuffart
	return(anova_results)
2037 6b5a528c Florent Chuffart
### Returns ANOVA results and comunted reads.
2038 6b5a528c Florent Chuffart
}
2039 6b5a528c Florent Chuffart
2040 6b5a528c Florent Chuffart
2041 6b5a528c Florent Chuffart
watch_samples = function(# Watching analysis of samples 
2042 6b5a528c Florent Chuffart
### This function allows to view analysis for a particuler region of the genome.
2043 6b5a528c Florent Chuffart
replicates, ##<< replicates under the form...
2044 6b5a528c Florent Chuffart
read_length, ##<< length of the reads
2045 6b5a528c Florent Chuffart
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
2046 6b5a528c Florent Chuffart
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
2047 6b5a528c Florent Chuffart
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
2048 6b5a528c Florent Chuffart
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
2049 6b5a528c Florent Chuffart
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
2050 6b5a528c Florent Chuffart
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
2051 6b5a528c Florent Chuffart
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
2052 6b5a528c Florent Chuffart
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
2053 6b5a528c Florent Chuffart
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
2054 6b5a528c Florent Chuffart
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
2055 6b5a528c Florent Chuffart
plot_common_nucs = TRUE,  ##<< Plot (or not) aligned reads.
2056 6b5a528c Florent Chuffart
plot_anovas = FALSE,  ##<< Plot (or not) scatter for each nuc.
2057 6b5a528c Florent Chuffart
plot_anova_boxes =  FALSE,  ##<< Plot (or not) boxplot for each nuc.
2058 6b5a528c Florent Chuffart
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
2059 6b5a528c Florent Chuffart
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed. 
2060 6b5a528c Florent Chuffart
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
2061 1d833b97 Florent Chuffart
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
2062 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
2063 6b5a528c Florent Chuffart
){
2064 6b5a528c Florent Chuffart
  returned_list = list()
2065 6b5a528c Florent Chuffart
  # Computing global display parameters
2066 6b5a528c Florent Chuffart
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
2067 6b5a528c Florent Chuffart
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
2068 6b5a528c Florent Chuffart
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
2069 6b5a528c Florent Chuffart
  } else {
2070 6b5a528c Florent Chuffart
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
2071 6b5a528c Florent Chuffart
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
2072 6b5a528c Florent Chuffart
  }
2073 6b5a528c Florent Chuffart
	base_glo = 0
2074 6b5a528c Florent Chuffart
	nb_rank_glo = 0
2075 6b5a528c Florent Chuffart
  for (samples in replicates) {
2076 6b5a528c Florent Chuffart
  	nb_rank_glo = nb_rank_glo + length(samples)
2077 6b5a528c Florent Chuffart
  }
2078 6b5a528c Florent Chuffart
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
2079 6b5a528c Florent Chuffart
	y_min_glo = min(ylim_glo)
2080 6b5a528c Florent Chuffart
	y_max_glo = max(ylim_glo)
2081 6b5a528c Florent Chuffart
  delta_y_glo = y_max_glo - y_min_glo
2082 6b5a528c Florent Chuffart
  # Plot main frame
2083 935a568c Florent Chuffart
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab="#reads (per million reads)", xlab=paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr), main="NucleoMiner2" )
2084 6b5a528c Florent Chuffart
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
2085 6b5a528c Florent Chuffart
  # Go
2086 6b5a528c Florent Chuffart
	replicates_wp_nucs = list()
2087 6b5a528c Florent Chuffart
  for (replicate_rank in 1:length(replicates)) {
2088 6b5a528c Florent Chuffart
		# Computing replicate parameters
2089 6b5a528c Florent Chuffart
		nb_rank = length(samples)
2090 6b5a528c Florent Chuffart
		base = (replicate_rank-1) * height * nb_rank
2091 6b5a528c Florent Chuffart
		ylim = c(base, base + height * nb_rank)
2092 6b5a528c Florent Chuffart
		y_min = min(ylim)
2093 6b5a528c Florent Chuffart
		y_max = max(ylim)
2094 6b5a528c Florent Chuffart
	  delta_y = y_max - y_min
2095 6b5a528c Florent Chuffart
		samples = replicates[[replicate_rank]]
2096 6b5a528c Florent Chuffart
		for (sample_rank in 1:length(samples)) {
2097 6b5a528c Florent Chuffart
			# computing sample parameters
2098 6b5a528c Florent Chuffart
			sample = samples[[sample_rank]]
2099 6b5a528c Florent Chuffart
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
2100 6b5a528c Florent Chuffart
			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
2101 6b5a528c Florent Chuffart
2102 6b5a528c Florent Chuffart
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2103 6b5a528c Florent Chuffart
			  x_min = sample$roi[["begin"]]
2104 6b5a528c Florent Chuffart
			  x_max = sample$roi[["end"]]
2105 6b5a528c Florent Chuffart
		  } else {
2106 6b5a528c Florent Chuffart
			  x_min = - sample$roi[["begin"]]
2107 6b5a528c Florent Chuffart
			  x_max = - sample$roi[["end"]]
2108 6b5a528c Florent Chuffart
		  }		
2109 6b5a528c Florent Chuffart
			shift = x_min_glo - x_min
2110 6b5a528c Florent Chuffart
	    # Plot Genome seq
2111 6b5a528c Florent Chuffart
			if (plot_ref_genome) {
2112 6b5a528c Florent Chuffart
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
2113 6b5a528c Florent Chuffart
		  }
2114 6b5a528c Florent Chuffart
			# Plot reads
2115 6b5a528c Florent Chuffart
			reads = sample$inputs
2116 6b5a528c Florent Chuffart
			signs = sign_from_strand(reads[,3])	
2117 6b5a528c Florent Chuffart
			if (plot_arrow_raw_reads) {
2118 6b5a528c Florent Chuffart
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, 
2119 6b5a528c Florent Chuffart
				col=1, length=0.15/nb_rank)          	
2120 6b5a528c Florent Chuffart
			}
2121 6b5a528c Florent Chuffart
	    if (plot_squared_reads) {    	
2122 6b5a528c Florent Chuffart
        # require(plotrix)
2123 6b5a528c Florent Chuffart
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
2124 6b5a528c Florent Chuffart
			}
2125 6b5a528c Florent Chuffart
	    if (plot_coverage) {    	
2126 a52f85d7 Florent Chuffart
        if (length(reads[,1]) != 0) {
2127 a52f85d7 Florent Chuffart
          step_h = sign(x_min) * signs * reads[,4]
2128 a52f85d7 Florent Chuffart
          step_b = sign(x_min) * reads[,2] + shift
2129 a52f85d7 Florent Chuffart
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
2130 a52f85d7 Florent Chuffart
          steps_x = min(step_b, step_e):max(step_b, step_e)          
2131 a52f85d7 Florent Chuffart
          steps_y = rep(0, length(steps_x))
2132 a52f85d7 Florent Chuffart
          for (i in 1:length(step_h)) {          
2133 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
2134 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
2135 a52f85d7 Florent Chuffart
          }
2136 a52f85d7 Florent Chuffart
          tmp_index = which(steps_y != 0)
2137 a52f85d7 Florent Chuffart
          steps_x = steps_x[tmp_index]
2138 a52f85d7 Florent Chuffart
          steps_y = steps_y[tmp_index]
2139 a52f85d7 Florent Chuffart
          tmp_current_level = 0
2140 a52f85d7 Florent Chuffart
          for (i in 1:length(steps_y)) {          
2141 a52f85d7 Florent Chuffart
            steps_y[i] = tmp_current_level + steps_y[i]
2142 a52f85d7 Florent Chuffart
            tmp_current_level = steps_y[i]
2143 a52f85d7 Florent Chuffart
          }
2144 a52f85d7 Florent Chuffart
          steps_y = c(0, steps_y)
2145 a52f85d7 Florent Chuffart
          steps_y = steps_y * 1000000/sample$total_reads
2146 a52f85d7 Florent Chuffart
        } else {
2147 a52f85d7 Florent Chuffart
          steps_y = c(0, 0, 0)
2148 a52f85d7 Florent Chuffart
          steps_x = c(sample$roi$begin, sample$roi$end)
2149 6b5a528c Florent Chuffart
        }
2150 8e9facd8 Florent Chuffart
        print(steps_x)
2151 8e9facd8 Florent Chuffart
        print(steps_y)
2152 6b5a528c Florent Chuffart
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
2153 6b5a528c Florent Chuffart
        abline(y_lev,0)
2154 6b5a528c Florent Chuffart
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
2155 6b5a528c Florent Chuffart
			}
2156 6b5a528c Florent Chuffart
			# Plot nucs
2157 6b5a528c Florent Chuffart
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
2158 6b5a528c Florent Chuffart
				nucs = sample$outputs
2159 6b5a528c Florent Chuffart
				if (length(nucs$center) > 0) {
2160 6b5a528c Florent Chuffart
					col = 1
2161 6b5a528c Florent Chuffart
		      for (i in 1:length(nucs$center)) {
2162 6b5a528c Florent Chuffart
						col = col + 1
2163 6b5a528c Florent Chuffart
		        nuc = nucs[i,]
2164 6b5a528c Florent Chuffart
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
2165 6b5a528c Florent Chuffart
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
2166 6b5a528c Florent Chuffart
						total_involved_reads = sum(involved_reads[,4]) 
2167 6b5a528c Florent Chuffart
						if (plot_arrow_nuc_reads ) {
2168 6b5a528c Florent Chuffart
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, 
2169 6b5a528c Florent Chuffart
							col=col, length=0.15/nb_rank)          	
2170 6b5a528c Florent Chuffart
						}
2171 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
2172 6b5a528c Florent Chuffart
  						flatted_reads = flat_reads(involved_reads, nuc$width)
2173 6b5a528c Florent Chuffart
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
2174 6b5a528c Florent Chuffart
		  			}
2175 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads ) {
2176 6b5a528c Florent Chuffart
							flatted_reads = flat_reads(involved_reads, nuc$width)
2177 6b5a528c Florent Chuffart
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
2178 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
2179 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
2180 6b5a528c Florent Chuffart
	          }
2181 6b5a528c Florent Chuffart
	          if (plot_gaussian_unified_reads ) {
2182 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=2)
2183 6b5a528c Florent Chuffart
	          }
2184 6b5a528c Florent Chuffart
	          if (plot_ellipse_nucs) {
2185 6b5a528c Florent Chuffart
				      # require(plotrix)
2186 883439d6 Florent Chuffart
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col) 
2187 6b5a528c Florent Chuffart
						}
2188 6b5a528c Florent Chuffart
		      }
2189 6b5a528c Florent Chuffart
		    } else {
2190 6b5a528c Florent Chuffart
		      print("WARNING! No nucs to print.")
2191 6b5a528c Florent Chuffart
		    }
2192 6b5a528c Florent Chuffart
			}
2193 6b5a528c Florent Chuffart
	  }
2194 6b5a528c Florent Chuffart
	  # Plot wp nucs
2195 883439d6 Florent Chuffart
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_common_nucs)) {
2196 6b5a528c Florent Chuffart
			if (samples[[1]]$marker == "Mnase_Seq") {
2197 6b5a528c Florent Chuffart
				if (is.null(aggregated_intra_strain_nucs)) {
2198 6b5a528c Florent Chuffart
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]			
2199 6b5a528c Florent Chuffart
				} else {
2200 6b5a528c Florent Chuffart
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
2201 6b5a528c Florent Chuffart
				}
2202 6b5a528c Florent Chuffart
		  } else {
2203 6b5a528c Florent Chuffart
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
2204 6b5a528c Florent Chuffart
		  }
2205 6b5a528c Florent Chuffart
			replicates_wp_nucs[[replicate_rank]] = wp_nucs
2206 6b5a528c Florent Chuffart
			for (wp_nuc in wp_nucs) {
2207 6b5a528c Florent Chuffart
				if (wp_nuc$wp){
2208 6b5a528c Florent Chuffart
					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2209 6b5a528c Florent Chuffart
					all_original_reads = c()
2210 6b5a528c Florent Chuffart
					for(initial_nuc in wp_nuc$nucs) {
2211 6b5a528c Florent Chuffart
						all_original_reads = c(all_original_reads, initial_nuc$original_reads)						
2212 6b5a528c Florent Chuffart
					}
2213 6b5a528c Florent Chuffart
					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2214 6b5a528c Florent Chuffart
					if (FALSE) {
2215 6b5a528c Florent Chuffart
					  rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col="#EEEEEE", border=1)
2216 6b5a528c Florent Chuffart
				  }
2217 6b5a528c Florent Chuffart
					if (plot_wp_nuc_model) {
2218 883439d6 Florent Chuffart
					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)				
2219 6b5a528c Florent Chuffart
				  }
2220 6b5a528c Florent Chuffart
				}
2221 6b5a528c Florent Chuffart
			}
2222 6b5a528c Florent Chuffart
		}
2223 6b5a528c Florent Chuffart
	}	
2224 6b5a528c Florent Chuffart
  
2225 6b5a528c Florent Chuffart
	if (plot_common_nucs | plot_anovas | plot_anova_boxes) {
2226 6b5a528c Florent Chuffart
		if (is.null(aligned_inter_strain_nucs)) {
2227 1d833b97 Florent Chuffart
			aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2228 6b5a528c Florent Chuffart
		}
2229 6b5a528c Florent Chuffart
		if (plot_common_nucs) {
2230 6b5a528c Florent Chuffart
      #Plot common wp nucs
2231 6b5a528c Florent Chuffart
      mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2232 6b5a528c Florent Chuffart
            for (replicate_rank in 1:length(replicates)) {
2233 6b5a528c Florent Chuffart
        nb_rank[[replicate_rank]] = length(samples)
2234 6b5a528c Florent Chuffart
        base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2235 6b5a528c Florent Chuffart
        ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2236 6b5a528c Florent Chuffart
        y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2237 6b5a528c Florent Chuffart
        y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2238 6b5a528c Florent Chuffart
        delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2239 6b5a528c Florent Chuffart
        mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2240 6b5a528c Florent Chuffart
      
2241 6b5a528c Florent Chuffart
        samples = replicates[[replicate_rank]]
2242 6b5a528c Florent Chuffart
        for (sample_rank in 1:length(samples)) {
2243 6b5a528c Florent Chuffart
          sample = samples[[sample_rank]]
2244 6b5a528c Florent Chuffart
          y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2245 6b5a528c Florent Chuffart
          if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2246 6b5a528c Florent Chuffart
            x_min[[replicate_rank]] = sample$roi[["begin"]]
2247 6b5a528c Florent Chuffart
            x_max[[replicate_rank]] = sample$roi[["end"]]
2248 6b5a528c Florent Chuffart
          } else {
2249 6b5a528c Florent Chuffart
            x_min[[replicate_rank]] = - sample$roi[["begin"]]
2250 6b5a528c Florent Chuffart
            x_max[[replicate_rank]] = - sample$roi[["end"]]
2251 6b5a528c Florent Chuffart
          }
2252 6b5a528c Florent Chuffart
          shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2253 6b5a528c Florent Chuffart
        }
2254 6b5a528c Florent Chuffart
      }  
2255 6b5a528c Florent Chuffart
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2256 6b5a528c Florent Chuffart
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2257 6b5a528c Florent Chuffart
        tmp_xs = tmp_ys = c()
2258 6b5a528c Florent Chuffart
        for (replicate_rank in 1:length(replicates)) {
2259 6b5a528c Florent Chuffart
          samples = replicates[[replicate_rank]]
2260 6b5a528c Florent Chuffart
          strain = samples[[1]]$strain
2261 6b5a528c Florent Chuffart
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2262 6b5a528c Florent Chuffart
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2263 6b5a528c Florent Chuffart
        }
2264 6b5a528c Florent Chuffart
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]]), cex=dev.size()[1]*200/(x_max[[1]]-x_min[[1]]), pch=19)          
2265 6b5a528c Florent Chuffart
      }    
2266 6b5a528c Florent Chuffart
		}
2267 6b5a528c Florent Chuffart
		if (plot_anovas | plot_anova_boxes) {
2268 6b5a528c Florent Chuffart
			anova_results = perform_anovas(replicates, aligned_inter_strain_nucs, plot_anova_boxes=plot_anova_boxes)
2269 6b5a528c Florent Chuffart
			thres = FDR(anova_results[,"correl_st_ma_pvalue"],0.05)
2270 6b5a528c Florent Chuffart
			if (is.na(thres)) {
2271 6b5a528c Florent Chuffart
				# Boneferroni multiple test
2272 6b5a528c Florent Chuffart
				thres = 0.05/length(anova_results[,1])
2273 6b5a528c Florent Chuffart
			}
2274 6b5a528c Florent Chuffart
			filtred_anova_results = anova_results[anova_results[,"correl_st_ma_pvalue"]<thres,]
2275 6b5a528c Florent Chuffart
			x11()
2276 6b5a528c Florent Chuffart
			if (plot_anovas) {
2277 6b5a528c Florent Chuffart
			  plot(anova_results[,"mnase_st"],anova_results[,"correl_st_ma"], pch=1, xlim=c(-0.07,0.07), ylim=c(-0.07,0.07),main="SNEPs" ,xlab = "strain effect", ylab = "snep effect")
2278 6b5a528c Florent Chuffart
	      points(x=filtred_anova_results[,"mnase_st"],y=filtred_anova_results[,"correl_st_ma"],col=2, pch="+")
2279 6b5a528c Florent Chuffart
		  }
2280 6b5a528c Florent Chuffart
 	  }
2281 6b5a528c Florent Chuffart
	}
2282 6b5a528c Florent Chuffart
  return(returned_list)
2283 6b5a528c Florent Chuffart
}
2284 6b5a528c Florent Chuffart
2285 6b5a528c Florent Chuffart
2286 6b5a528c Florent Chuffart