Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ b20637ed

Historique | Voir | Annoter | Télécharger (88,15 ko)

1
# get_content = function(# Get content from cached 
2
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
3
# obj, ##<< The object that we want its content.
4
# ... ##<< Parameters that will be passed to my_read
5
# ) {
6
#   UseMethod("get_content", obj)
7
#   ### Returns the cached content of the current object. 
8
# }
9
# 
10
# get_content.default = structure( function(# Get content from cached 
11
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
12
# obj, ##<< The object that we want its content.
13
# ... ##<< Parameters that will be passed to my_read
14
# ) {
15
#   if (inherits(try(NM_CACHE,TRUE), "try-error") || is.null(NM_CACHE)) {
16
#     NM_CACHE <<- list()
17
#   }
18
#   if (is.null(NM_CACHE[[obj$filename]])) {
19
#     print(paste("Loading file ",obj$filename, sep=""))
20
#     tmp_content = my_read(obj, ...)
21
#     print("affect it...")
22
#     NM_CACHE[[obj$filename]] <<- tmp_content
23
#     print("done.")
24
#   }  
25
#   return(NM_CACHE[[obj$filename]])
26
# ### Returns the cached content of the current object. 
27
# }, ex=function(){
28
#   # Create a dataframe
29
#   df = NULL
30
#   df = dfadd(df, list(key1 = "value1", key2 = "value2"))
31
#   df = dfadd(df, list(key1 = "value1'", key2 = "value3'"))
32
#   # Dump it into tmp file
33
#   write.table(df, file="/tmp/tmp.dump.table.tmp")
34
#   # Load it into cache using feature.
35
#   cached_content_object  = list(filename="/tmp/tmp.dump.table.tmp")
36
#   class(cached_content_object) = "table"  
37
#   # First time it will be load into cache
38
#   print(get_content(cached_content_object))
39
#   # Second time not
40
#   print(get_content(cached_content_object))
41
#   
42
# })
43
#   
44
# my_read = function(
45
# ### Abstract my_read function.  
46
#   obj, ...) {
47
#   UseMethod("my_read", obj)
48
# }
49
#   
50
# my_read.default = function(
51
# ### Default my_read function.  
52
#   obj, ...){
53
#   stop(paste("ERROR, my_read is not defined for any Objects (file ", obj$filename," )", sep=""))      
54
# }
55
# 
56
# my_read.fasta = function(
57
# ### my_read function for fasta files.  
58
#   obj, ...){
59
#   # require(seqinr)
60
#   return(read.fasta(obj$filename, ...))
61
# }
62
# 
63
# my_read.table = function(
64
# ### my_read function for table files.  
65
#   obj, ...){
66
#   if (rev(unlist(strsplit(obj$filename, ".", fixed=TRUE)))[1] == "gz") {
67
#     return(read.table(file=gzfile(obj$filename), ...))      
68
#   } else { 
69
#     return(read.table(file=obj$filename, ...))
70
#   }
71
# }  
72
# 
73
# my_read.cvs = function(
74
# ### my_read function for cvs files.  
75
#   obj, ...){
76
#   return(read.csv(file=obj$filename, ...))
77
# }  
78

    
79

    
80

    
81

    
82

    
83

    
84

    
85

    
86

    
87
FDR = structure(function#  False Discovery Rate
88
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
89
##author<< Gael Yvert, 
90
(
91
x, ##<< A vector x of independent p-values.
92
FDR ##<< The specified FDR.
93
) {
94
  x <- sort(na.omit(x))
95
  N = length(x)
96
  i = 1;
97
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
98
  if (i == 1)
99
    return (NA)
100
  else
101
    return( x[i-1] )
102
### Return the the corresponding cutoff.
103
}, ex=function(){
104
  print("example")
105
})
106

    
107

    
108

    
109
lod_score_vecs = structure(function # Likelihood ratio
110
### Compute the likelihood log of two set of value from two models Vs. a unique model.
111
(
112
x ,##<< First vector.
113
y ##<< Second vector.
114
) {
115
	if (length(x) <=1 | length(y) <= 1) {
116
		return(NA)
117
	}
118
  meanX = mean(x)
119
  sdX = sd(x)
120
  meanY = mean(y)
121
  sdY = sd(y)
122
  meanXY = mean(c(x,y))
123
  sdXY = sd(c(x,y))
124
  llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
125
  llY = sum(log(dnorm(y,mean=meanY,sd=sdY)))
126
  llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY)))
127
  ratio = -(llX + llY - llXY)
128
  return(ratio)
129
### Returns the likelihood ratio.
130
}, ex=function(){
131
  # LOD score for 2 set of values
132
  mean1=5; sd1=2; card2 = 250
133
  mean2=6; sd2=3; card1 = 200
134
  x1 = rnorm(card1, mean1, sd1)
135
  x2 = rnorm(card2, mean2, sd2)  
136
  min = floor(min(c(x1,x2)))
137
  max = ceiling(max(c(x1,x2)))
138
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
139
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
140
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
141
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
142
  lod_score_vecs(x1,x2)
143
 })
144

    
145
dfadd = structure(function# Adding list to a dataframe.
146
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
147
	(df, ##<<  A dataframe
148
		l ##<<  A list
149
	) {
150
  if (is.null(df)) {
151
    df = data.frame(l,stringsAsFactors=FALSE)
152
  } else {
153
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
154
  }
155
  return(df)
156
### Return the dataframe \emph{df}.
157
}, ex=function(){
158
		## Here dataframe is NULL
159
		print(df)
160
		df = NULL
161
		
162
		# Initialize df
163
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
164
		print(df)
165
		
166
		# Adding elements to df
167
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
168
		print(df)
169
})
170

    
171

    
172
sign_from_strand = function(
173
### Get the sign of strand 
174
strands) {
175
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
176
### If strand in forward then returns 1 else returns -1
177
}
178

    
179
flat_reads = function(
180
### Extract reads coordinates from TempleteFilter input sequence
181
reads, ##<< TemplateFilter input reads 
182
nuc_width ##<< Width used to shift F and R reads.
183
) {
184
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
185
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
186
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
187
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
188
### Returns a list of F reads, R reads and joint/shifted F and R reads.
189
}
190

    
191

    
192

    
193
filter_tf_outputs = function(# Filter TemplateFilter outputs
194
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlap threshold.
195
tf_outputs, ##<< TemplateFilter outputs.
196
chr, ##<< Chromosome observed, here chr is an integer.
197
x_min, ##<< Coordinate of the first bp observed.
198
x_max, ##<< Coordinate of the last bp observed.
199
nuc_width = 160, ##<< Nucleosome width.
200
ol_bp = 59, ##<< Overlap Threshold.
201
corr_thres = 0.5 ##<< Correlation threshold.
202
) {
203
  if (x_min < 0) {
204
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center > (-x_max - nuc_width/2) & tf_outputs$center <  (-x_min + nuc_width/2),]
205
	} else {
206
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center > (x_min - nuc_width/2) & tf_outputs$center < (x_max + nuc_width/2),]
207
  }
208
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
209
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
210
  tf_outputs = tf_outputs[tf_outputs$correlation >= corr_thres,]  
211
  tf_outputs = tf_outputs[order(tf_outputs$correlation,decreasing=TRUE),]  
212
  i = 1
213
  while (i <= length(tf_outputs[,1])) {    
214
    lb = tf_outputs[i,]$low 
215
    ub = tf_outputs[i,]$up
216
    tf_outputs = tf_outputs[!(tf_outputs$low <= (ub-ol_bp) & tf_outputs$up > ub) & !(tf_outputs$up >= (lb+ol_bp) & tf_outputs$low < lb),]
217
    i = i+1
218
  }
219
  return(tf_outputs)
220
### Returns filtered TemplateFilter Outputs
221
}
222

    
223

    
224

    
225
filter_tf_inputs = function(# Filter TemplateFilter inputs
226
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
227
inputs, ##<< TF inputs to be filtered.
228
chr, ##<< Chromosome observed, here chr is an integer.
229
x_min, ##<< Coordinate of the first bp observed.
230
x_max, ##<< Coordinate of the last bp observed.
231
nuc_width = 160, ##<< Nucleosome width.
232
only_f = FALSE, ##<< Filter only F reads.
233
only_r = FALSE ##<< Filter only R reads.
234
) {
235
	if (only_f) {
236
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
237
	} else if (only_r) {
238
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]			
239
	} else {
240
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
241
	}
242
	corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
243
  inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
244
	return(inputs)
245
### Returns filtred inputs.
246
}
247

    
248

    
249

    
250
get_comp_strand = function(
251
### Compute the complementatry strand.
252
strand ##<< The original strand.
253
) {
254
	apply(t(strand),2, function(n){
255
	  if (n=="a") {return("t")} 
256
		if (n=="t") {return("a")}
257
		if (n=="c") {return("g")} 
258
		if (n=="g") {return("c")} 
259
	})
260
### Returns the complementatry strand.
261
}
262

    
263

    
264
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes. 
265
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
266
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
267
lod_thres=-20, ##<< Log likelihood ration threshold.
268
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
269
){
270
	end_of_tracks = function(tracks) {
271
		if (length(tracks) == 0) {
272
			return(TRUE)
273
		}
274
	  for (lower_bound in tracks) {
275
			if(!is.na(lower_bound)) {
276
	      if (lower_bound < coord_max) {
277
	        return(FALSE)
278
	      }
279
	  	}
280
	  }
281
	  return(TRUE)
282
	}
283
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
284
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
285
			new_cluster$wp = TRUE
286
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
287
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
288
		  	clusters[[length(clusters) + 1]] = new_cluster
289
				# print(new_cluster)
290
		  }
291
		} else {
292
			new_cluster$wp = FALSE
293
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
294
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
295
			  clusters[[length(clusters) + 1]] = new_cluster
296
			}
297
		}
298
		return(clusters)
299
	}
300
	strain = samples[[1]]$strain
301
	lod_scores = c()
302
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
303
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
304
  # compute clusters
305
  clusters = list()
306
  cluster_contents = list()
307
  # Init reader
308
  indexes = c()
309
  track_readers = c()
310
  current_nuc = NULL
311
	lod_score = lod_thres - 1 
312
  # Read nucs from TF outputs  
313
  tf_outs = list()
314
	i = 1
315
  for (sample in samples) {
316
		# print(sample$roi$chr)
317
		# print(min_nuc_center)
318
		# print(max_nuc_center)
319
		# print(sample$outputs)
320
		# tf_outs[[i]] = filter_tf_outputs(sample$outputs, sample$roi$chr, min_nuc_center, max_nuc_center)
321
		# print(tf_outs[[i]])
322
		tf_outs[[i]] = sample$outputs
323
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]  
324
    indexes[i] = 1
325
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
326
      track_readers[i] = coord_max				
327
	  } else {
328
      track_readers[i] = tf_outs[[i]][indexes[i],]$center				
329
		}
330
		i = i+1		
331
  }
332
	# print(track_readers)
333
  new_cluster = NULL
334
  nb_nucs_in_cluster = 0
335
  nuc_from_track = c()
336
  for (i in 1:length(tf_outs)){
337
    nuc_from_track[i] = FALSE
338
  }
339
  # Start clustering
340
  while (!end_of_tracks(track_readers)) {
341
    new_center = min(track_readers)
342
		current_track = which(track_readers == new_center)[1]			
343
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
344
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
345
		new_nuc$inputs = samples[[current_track]]$inputs
346
		new_nuc$chr = samples[[current_track]]$roi$chr
347
		new_nuc$track = current_track
348
		
349
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width) 
350
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
351
		new_nuc$original_reads = flatted_reads[[3]]
352

    
353
    new_upper_bound = new_nuc$upper_bound
354

    
355
    if (!is.null(current_nuc)) {
356
			lod_score = lod_score_vecs(current_nuc$original_reads,new_nuc$original_reads)
357
			lod_scores = c(lod_scores,lod_score)
358
		}
359
		# print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
360
		if (is.na(lod_score)) {
361
			lod_score = lod_thres - 1 
362
		}
363
	  if (lod_score > lod_thres) {
364
			# Store lod_score
365
			new_nuc$lod_score = lod_score 			
366
      # aggregate to current cluster
367
      #   update bound
368
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
369
        new_cluster$upper_bound = new_nuc$upper_bound      
370
      }
371
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
372
        new_cluster$lower_bound = new_nuc$lower_bound         
373
      }
374
      #   add nucleosome to current cluster
375
      nuc_from_track[current_track] = TRUE
376
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1 
377
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
378
    } else {
379
			if (!is.null(new_cluster)) {
380
        # store old cluster
381
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
382
			}
383
      # Reinit current cluster composition stuff
384
      nb_nucs_in_cluster = 0
385
      nuc_from_track = c()
386
      for (i in 1:length(tf_outs)){
387
        nuc_from_track[i] = FALSE
388
      }
389
      # create new cluster        
390
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
391
      # update upper bound
392
      current_upper_bound = new_upper_bound
393
      # add nucleosome to current cluster
394
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1 
395
      nuc_from_track[current_track] = TRUE
396
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
397
				
398
		}
399
			
400
		current_nuc = new_nuc
401

    
402
    # update indexes
403
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {      
404
      indexes[current_track] = indexes[current_track] + 1
405
      # update track
406
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
407
    } else {
408
      # update track
409
      track_readers[current_track] = coord_max
410
    }
411
  }
412
  # store last cluster
413
  if (!is.null(new_cluster)) {
414
    # store old cluster
415
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
416
  }
417
	return(list(clusters, lod_scores))
418
### Returns a list of clusterized nucleosomes, and all computed lod scores.
419
}, ex=function(){	
420
	# Dealing with a region of interest
421
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
422
	samples = list()
423
	for (i in 1:3) {
424
		# Create TF output
425
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
426
		outputs = dfadd(NULL,tf_nuc)
427
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
428
		# Generate corresponding reads
429
		nb_reads = round(runif(1,170,230))
430
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
431
		u_reads = sort(unique(reads))
432
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
433
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
434
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
435
		u_reads = u_reads + shifts
436
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)), 
437
		                         "V2" = u_reads, 
438
														 "V3" = strands, 
439
														 "V4" = counts), stringsAsFactors=FALSE)
440
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
441
	}
442
	print(aggregate_intra_strain_nucs(samples))
443
})
444

    
445

    
446

    
447

    
448

    
449

    
450
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
451
### This function aligns nucs between two strains for a given genome region.
452
replicates, ##<< Set of replicates, ideally 3 per strain. 
453
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
454
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
455
corr_thres=0.5, ##<< Correlation threshold.
456
lod_thres=-100, ##<< LOD cut off.
457
config=NULL, ##<< GLOBAL config variable
458
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.	
459
) {
460
	if (length(replicates) < 2) {
461
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
462
	} else if (length(replicates) > 2) {
463
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")			
464
	}
465
	common_nuc = NULL
466
	lod_scores = c()
467
	chr = replicates[[1]][[1]]$roi$chr
468
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
469
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
470
	strain_ref1 = replicates[[1]][[1]]$strain
471
	strain_ref2 = replicates[[2]][[1]]$strain
472
	big_roi = replicates[[1]][[1]]$roi
473
  orig_big_roi = replicates[[1]][[1]]$orig_roi
474
	if(big_roi$end - big_roi$begin < 0) {
475
		tmp_begin = big_roi$begin
476
		big_roi$begin =  big_roi$end
477
		big_roi$end =  tmp_begin
478
	}
479
	# GO!
480
	if (is.null(wp_nucs_strain_ref1)) {
481
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
482
	}
483
	if (is.null(wp_nucs_strain_ref2)) {
484
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
485
  }
486
  # foo <<- wp_nucs_strain_ref1
487
  # print(apply(t(wp_nucs_strain_ref1), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
488
  # print(apply(t(wp_nucs_strain_ref2), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
489
	# dealing with matching_nas
490
	lws = c()
491
	ups = c()
492
	for (na in wp_nucs_strain_ref2) {
493
		lws = c(lws, na$lower_bound)
494
		ups = c(ups, na$upper_bound)
495
	}
496

    
497
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))			
498
	roi_strain_ref1 = NULL
499
	roi_strain_ref2 = NULL
500
	if (length(wp_nucs_strain_ref1) > 0) {
501
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
502
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
503
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
504
			# Filtering on Well Positionned
505
			if (nuc_strain_ref1$wp) {
506
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
507
				roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi=orig_big_roi, config=config) 
508
        if (!is.null(roi_strain_ref2)){
509
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
510
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
511
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
512
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
513
					for (index_nuc_strain_ref2 in matching_nas) {
514
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
515
						# Filtering on Well Positionned
516
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)             
517
						if (!is.null(translate_roi(nuc_strain_ref2_to_roi, strain_ref1, big_roi=orig_big_roi, config=config)) &  
518
                nuc_strain_ref2$wp) {
519
							# Filtering on correlation Score and collecting reads					
520
							SKIP = FALSE
521
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
522
							reads_strain_ref1 = c()
523
							for (nuc in nuc_strain_ref1$nucs){
524
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)							
525
								if (nuc$corr < corr_thres) {
526
									SKIP = TRUE
527
								}
528
							}
529
							reads_strain_ref2 = c()
530
							for (nuc in nuc_strain_ref2$nucs){
531
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)							
532
								if (nuc$corr < corr_thres) {
533
									SKIP = TRUE
534
								}
535
							}
536
							# Filtering on correlation Score						
537
							if (!SKIP) {
538
								# tranlation of reads into strain 2 coords
539
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
540
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
541
								lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2)
542
								lod_scores = c(lod_scores, lod_score)
543
								# Filtering on LOD Score						
544
								if (lod_score > lod_thres) {
545
									tmp_nuc = list()
546
									# strain_ref1
547
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
548
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
549
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
550
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
551
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
552
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
553
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
554
									# tmp_nuc[[paste("corr1_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[1]]$corr,5)
555
									# tmp_nuc[[paste("corr2_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[2]]$corr,5)
556
									# tmp_nuc[[paste("corr3_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[3]]$corr,5)
557
									# strain_ref2
558
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
559
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
560
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
561
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
562
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
563
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
564
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
565
									# tmp_nuc[[paste("corr1_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[1]]$corr,5)
566
									# tmp_nuc[[paste("corr2_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[2]]$corr,5)
567
									# tmp_nuc[[paste("corr3_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[3]]$corr,5)
568
									# common
569
									tmp_nuc[["lod_score"]] = signif(lod_score,5)
570
									# print(tmp_nuc)
571
									common_nuc = dfadd(common_nuc, tmp_nuc)
572
								}
573
							}
574
						}
575
					}				
576
				} else {
577
		      print("WARNING! No roi for strain ref 2.")
578
			  }
579
		  }
580
		}
581
		
582
		if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
583
			index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
584
			to_remove_list = c()			
585
			for (i in 1:length(index_redundant)) {	
586
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
587
				  to_remove = index_redundant[i]
588
				}	 else {
589
					to_remove = index_redundant[i] + 1
590
			  } 		
591
				to_remove_list = c(to_remove_list, to_remove)
592
			}
593
			common_nuc = common_nuc[-to_remove_list,]
594
		}
595
		
596
		if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
597
			index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
598
			to_remove_list = c()			
599
			for (i in 1:length(index_redundant)) {	
600
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
601
				  to_remove = index_redundant[i]
602
				}	 else {
603
					to_remove = index_redundant[i] + 1
604
			  } 		
605
				to_remove_list = c(to_remove_list, to_remove)
606
			}
607
			common_nuc = common_nuc[-to_remove_list,]
608
		}
609
				
610
		return(list(common_nuc, lod_scores))
611
	} else {
612
		print("WARNING, no nucs for strain_ref1.")
613
		return(NULL)
614
	}
615
### Returns a list of clusterized nucleosomes, and all computed lod scores.
616
}, ex=function(){	
617

    
618
    # Define new translate_roi function...
619
    translate_roi = function(roi, strain2, big_roi=NULL, config=NULL) {
620
      return(roi)
621
    }
622
    # Binding it by uncomment follwing lines.
623
    unlockBinding("translate_roi", as.environment("package:nucleominer"))
624
    unlockBinding("translate_roi", getNamespace("nucleominer"))
625
    assign("translate_roi", translate_roi, "package:nucleominer")
626
    assign("translate_roi", translate_roi, getNamespace("nucleominer"))
627
    lockBinding("translate_roi", getNamespace("nucleominer"))
628
    lockBinding("translate_roi", as.environment("package:nucleominer"))  
629

    
630
	# Dealing with a region of interest
631
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
632
	roi2 = translate_roi(roi, roi$strain_ref1)
633
	replicates = list()
634
	for (j in 1:2) {
635
		samples = list()
636
		for (i in 1:3) {
637
			# Create TF output
638
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
639
			outputs = dfadd(NULL,tf_nuc)
640
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
641
			# Generate corresponding reads
642
			nb_reads = round(runif(1,170,230))
643
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
644
			u_reads = sort(unique(reads))
645
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
646
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
647
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
648
			u_reads = u_reads + shifts
649
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)), 
650
			                         "V2" = u_reads, 
651
															 "V3" = strands, 
652
															 "V4" = counts), stringsAsFactors=FALSE)
653
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
654
		}
655
		replicates[[length(replicates) + 1]] = samples
656
	}
657
	print(align_inter_strain_nucs(replicates))
658
})
659

    
660

    
661

    
662

    
663

    
664

    
665

    
666

    
667

    
668

    
669

    
670

    
671

    
672

    
673

    
674

    
675

    
676

    
677

    
678

    
679

    
680

    
681

    
682

    
683

    
684

    
685
fetch_mnase_replicates = function(# Prefetch data
686
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
687
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
688
roi, ##<< Region of interest.
689
all_samples, ##<< Global list of samples.
690
config=NULL, ##<< GLOBAL config variable
691
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
692
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
693
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
694
) {
695
	samples=list()
696
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
697
	for (i in samples_ids) {
698
		sample = as.list(all_samples[all_samples$id==i,])
699
    sample$orig_roi = roi
700
    sample$roi = translate_roi(roi, sample$strain, config = config)
701
		if (get_genome) {
702
			# Get Genome
703
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]		
704
		}
705
		# Get inputs
706
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE) 
707
		sample$total_reads = sum(sample$inputs[,4]) 	
708
		if (!only_fetch) {
709
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
710
	  }
711
	  # Get TF outputs for Mnase_Seq samples
712
		if (sample$marker == "Mnase_Seq" & get_ouputs) {	
713
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
714
			if (!only_fetch) {
715
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
716
  		}
717
		}
718
		samples[[length(samples) + 1]] = sample		
719
	}
720
  return(samples)
721
}
722

    
723
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
724
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
725
region1, ##<< Original regions. 
726
region2 ##<< Regions to substract.
727
) {
728
  rec_substract_region = function(region1, region2) {
729
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
730
    cur_fuzzy = region1[i,]
731
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
732
    if (length(inter_wp[,1]) > 0) {
733
      ret = c()
734
      for (j in 1:length(inter_wp[,1])) {
735
        cur_wp = inter_wp[j,]
736
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
737
          # remove cur_fuzzy
738
          ret = c()
739
          break
740
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
741
          # crop fuzzy 
742
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1 
743
          ret = cur_fuzzy
744
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
745
          # crop fuzzy 
746
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1 
747
          ret = cur_fuzzy
748
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
749
          # split fuzzy
750
          tmp_ret_fuzzy_1 = cur_fuzzy
751
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1 
752
          tmp_ret_fuzzy_2 = cur_fuzzy
753
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1 
754
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
755
          # print(ret)
756
          # ret = cur_fuzzy
757
          break
758
        } else {
759
          stop("WARNING NO ADAPTED CASE!")
760
        }          
761
      }
762
      return(ret)
763
    } else {
764
      return(cur_fuzzy)
765
    }
766
  })
767
  }
768
  non_inter_fuzzy = rec_substract_region(region1, region2)
769
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
770
  tmp_ulist = unlist(non_inter_fuzzy)
771
  tmp_names = names(tmp_ulist)[1:4]
772
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE) 
773
  names(non_inter_fuzzy) = tmp_names
774
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)     
775
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)     
776
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
777
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
778
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]      
779
  return(non_inter_fuzzy)
780
}
781

    
782
union_regions = function(# Aggregate regions that intersect themnselves.
783
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.  
784
regions ##<< The Regions to be aggregated
785
) {
786
  old_length = length(regions[,1])
787
  new_length = 0
788
    
789
  while (old_length != new_length) {
790
    regions = regions[order(regions$lower_bound), ]
791
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 0, TRUE)      
792
    vec_end_1 = which(regions$stop)
793
    if (length(vec_end_1) == 0) {
794
      vec_end_1 = c(length(regions$stop))      
795
    } 
796
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
797
      vec_end_1 = c(vec_end_1, length(regions$stop))
798
    }
799
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
800
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
801
      chr = regions$chr[vec_beg_1[i]]
802
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
803
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
804
      roi_index = regions$roi_index[vec_beg_1[i]] 
805
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, roi_index=roi_index))
806
      })
807
    union = collapse_regions(union)
808
    old_length = length(regions[,1])
809
    new_length = length(union[,1])
810
    regions = union
811
  }
812
  return(union)
813
}
814

    
815
remove_aligned_wp = function(# Remove wp nucs from common nucs list.
816
### It is based on common wp nucs index on nucs and region.  
817
strain_maps, ##<< Nuc maps.
818
roi_index, ##<< The region of interest index.
819
tmp_common_nucs, ##<< the list of wp nucs.
820
strain##<< The strain to consider.
821
){
822
  fuzzy_nucs = strain_maps[[strain]]
823
  fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$roi_index == roi_index,]
824
  fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
825
  if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
826
  if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
827
  anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
828
  fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
829
  return(fuzzy_nucs)
830
}
831

    
832
translate_regions = function(# Translate a list of regions from a strain ref to another.
833
### This function is an eloborated call to translate_roi.
834
regions, ##<< Regions to be translated. 
835
combi, ##<< Combination of strains.
836
roi_index, ##<< The region of interest index.
837
config=NULL, ##<< GLOBAL config variable
838
roi ##<< The region of interest.
839
) {
840
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
841
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
842
    big_roi =  roi
843
    trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], config = config, big_roi = big_roi) 
844
    data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), roi_index=roi_index))
845
    })
846
  return(collapse_regions(tr_regions))
847
}
848

    
849
collapse_regions = function(# reformat an "apply  manipulated" list of regions
850
### Utils to reformat an "apply  manipulated" list of regions
851
regions ##< a list of regions
852
) {
853
  regions = do.call(rbind, regions)      
854
  regions$chr = as.character(regions$chr)     
855
  regions$chr = as.numeric(regions$chr)     
856
  regions$lower_bound = as.numeric(regions$lower_bound)
857
  regions$upper_bound = as.numeric(regions$upper_bound)
858
  regions = regions[order(regions$lower_bound),]      
859
  return(regions)  
860
}
861

    
862
extract_wp = function(# Extract wp nucs from nuc map. 
863
### Function based on common wp nuc index and roi_index.
864
strain_maps, ##<< Nuc maps.
865
roi_index, ##<< The region of interest index.
866
strain, ##<< The strain to consider.
867
tmp_common_nucs ##<< the list of wp nucs.
868
) {
869
  wp_nucs = apply(t(tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]), 2, function(i) {
870
    tmp_wp_nucs = strain_maps[[strain]]
871
    tmp_wp_nucs = tmp_wp_nucs[tmp_wp_nucs$roi_index == roi_index & tmp_wp_nucs$index_nuc == i,]
872
    return(tmp_wp_nucs)
873
    })
874
  return(collapse_regions(wp_nucs))
875
}
876

    
877

    
878

    
879

    
880

    
881
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
882
### The fucntion is no more necessary since we remove "big_roi" bug in translate_roi function.
883
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
884
roi, ##<< The region of interest.
885
strain, ##<< The strain to consider.
886
config=NULL ##<< GLOBAL config variable
887
) {
888
  tr_roi = translate_roi(roi, strain, config = config)
889
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
890
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
891
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
892
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
893
  }
894
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
895
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
896
  }
897
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
898
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
899
  }
900
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
901
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
902
  }
903
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
904
  return(tmp_fuzzy_nucs)
905
}
906

    
907

    
908
get_fuzzy = function(# Compute the fuzzy nucs.
909
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting fuzzy regions. It will be take into account in the count read part og the pipeline.
910
combi, ##<< The strain combination to consider.
911
roi, ##<< The region of interest.
912
roi_index, ##<< The region of interest index.
913
strain_maps, ##<< Nuc maps.
914
common_nuc_results, ##<< Common wp nuc maps
915
config=NULL ##<< GLOBAL config variable
916
) {
917
  print(roi_index)
918
  PLOT = FALSE
919
  tmp_common_nucs = common_nuc_results[[paste(combi[1], combi[2], sep="_")]]
920
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$roi_index == roi_index, ]
921

    
922
  print(paste("Dealing with fuzzy from", combi[1]))
923
  tmp_fuzzy_nucs_1 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[1])
924
  tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1], config)
925
  if (length(tmp_fuzzy_nucs_1[,1]) == 0) {return(NULL)}
926
  agg_fuzzy_1 = union_regions(tmp_fuzzy_nucs_1)    
927
  if (PLOT) for (i in 1:length(agg_fuzzy_1[,1])) {
928
    lines(c(agg_fuzzy_1[i,]$lower_bound, agg_fuzzy_1[i,]$upper_bound), c(+3.1,+3.1), col=2)
929
  }
930

    
931
  print(paste("Dealing with fuzzy from ", combi[2]))
932
  tmp_fuzzy_nucs_2 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[2])
933
  if (length(tmp_fuzzy_nucs_2[,1]) == 0) {return(NULL)}
934
  agg_fuzzy_2 = union_regions(tmp_fuzzy_nucs_2)      
935
  agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2], config)
936
  tr_agg_fuzzy_2 = translate_regions(agg_fuzzy_2, combi, roi_index, roi=roi, config=config)
937
  tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2], config)
938
  # tr_agg_fuzzy_2 = union_regions(tr_agg_fuzzy_2)
939
  if (PLOT) for (i in 1:length(tr_agg_fuzzy_2[,1])) {
940
    lines(c(tr_agg_fuzzy_2[i,]$lower_bound, tr_agg_fuzzy_2[i,]$upper_bound), c(+3.3,+3.3), col=2)
941
  }
942
  
943
  print("Dealing with fuzzy from both...")
944
  all_fuzzy = union_regions(rbind(agg_fuzzy_1, tr_agg_fuzzy_2))
945
  if (PLOT) for (i in 1:length(all_fuzzy[,1])) {
946
    lines(c(all_fuzzy[i,]$lower_bound, all_fuzzy[i,]$upper_bound), c(+3.2, +3.2), col=1)
947
  }
948
  
949
  print(paste("Dealing with wp from", combi[1]))
950
  wp_nucs_1 = extract_wp(strain_maps, roi_index, combi[1], tmp_common_nucs)
951
  if (PLOT) for (i in 1:length(wp_nucs_1[,1])) {
952
    lines(c(wp_nucs_1[i,]$lower_bound, wp_nucs_1[i,]$upper_bound), c(+3.5,+3.5), col=3)
953
  }
954

    
955
  print(paste("Dealing with wp from", combi[2]))
956
  wp_nucs_2 = extract_wp(strain_maps, roi_index, combi[2], tmp_common_nucs)
957
  tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, roi_index, roi=roi, config=config)
958
  if (PLOT) for (i in 1:length(tr_wp_nucs_2[,1])) {
959
    lines(c(tr_wp_nucs_2[i,]$lower_bound, tr_wp_nucs_2[i,]$upper_bound), c(+3.7,+3.7), col=3)
960
  }
961

    
962
  print("Dealing with wp from both...")
963
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
964
  if (PLOT) for (i in 1:length(all_wp[,1])) {
965
    lines(c(all_wp[i,]$lower_bound, all_wp[i,]$upper_bound), c(+3.6, +3.6), col=1)
966
  }  
967

    
968
  print("Dealing with fuzzy and wp...")
969
  non_inter_fuzzy = substract_region(all_fuzzy, all_wp)
970
  if (is.null(non_inter_fuzzy)) { return(NULL) }
971
  
972
  non_inter_fuzzy$len = non_inter_fuzzy$upper_bound - non_inter_fuzzy$lower_bound
973
  # non_inter_fuzzy = non_inter_fuzzy[non_inter_fuzzy$len >= min_fuzz_width,]
974
  if (PLOT) for (i in 1:length(non_inter_fuzzy[,1])) {
975
    lines(c(non_inter_fuzzy[i,]$lower_bound, non_inter_fuzzy[i,]$upper_bound), c(+3.9, +3.9), col=1)
976
  }
977
  
978
  non_inter_fuzzy$index_nuc = 1:length(non_inter_fuzzy[,1])
979
  return (non_inter_fuzzy)
980
}
981

    
982

    
983

    
984
get_all_reads = function(# Retrieve Reads 
985
### Retrieve reads for a given marker, combi, form.
986
marker, ##<< The marker to considere.
987
combi, ##<< The starin combination to considere.
988
form="wp" ##<< The nuc form to considere.
989
) {
990
	all_reads = NULL
991
  for (manip in c("Mnase_Seq", marker)) {
992
    if (form == "fuzzy") {
993
		  out_filename = paste("results/2013-08/",combi[1],"_",combi[2],"_",manip,"_fuzzy_and_nbreads.tab",sep="")
994
  		tmp_res = read.table(file=out_filename, header=TRUE)				
995
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
996
      tmp_res$form = form
997
    } else if (form == "wp") {
998
		 	out_filename = paste("results/2013-08/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
999
  		tmp_res = read.table(file=out_filename, header=TRUE)				
1000
      tmp_res$form = form
1001
    } else if (form == "wpfuzzy") {
1002
		 	out_filename = paste("results/2013-08/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
1003
  		tmp_res = read.table(file=out_filename, header=TRUE)				
1004
      tmp_res$form = "wp"
1005
		  out_filename = paste("results/2013-08/",combi[1],"_",combi[2],"_",manip,"_fuzzy_and_nbreads.tab",sep="")
1006
  		tmp_res2 = read.table(file=out_filename, header=TRUE)				
1007
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
1008
      tmp_res2$form = "fuzzy"
1009
      tmp_res = rbind(tmp_res, tmp_res2)
1010
    }
1011
		if (is.null(all_reads)) {
1012
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
1013
		}     
1014
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
1015
		all_reads = cbind(all_reads, tmp_res)
1016
  }
1017
  return(all_reads)	
1018
}
1019

    
1020
get_design = function(# Build the design for deseq
1021
### This function build the design according sample properties.
1022
marker, ##<< The marker to considere.
1023
combi, ##<< The starin combination to considere.
1024
all_samples ##<< Global list of samples.
1025
) {
1026
  off1 = 0
1027
  off2 = 0
1028
	manips = c("Mnase_Seq", marker)
1029
	design_rownames = c()
1030
	design_manip = c()
1031
	design_strain = c()
1032
  off2index = function(off) {	
1033
  	switch(toString(off), 
1034
  		"1"=c(0,1,1),
1035
  	  "2"=c(1,0,1),
1036
    	"3"=c(1,1,0), 
1037
  		c(1,1,1)
1038
  		)	
1039
  }
1040
	for (manip in manips) {
1041
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
1042
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
1043
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
1044
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
1045
		} else {
1046
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
1047
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
1048
			}
1049
		}
1050
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))				
1051
		for (strain in combi) {			
1052
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
1053
			design_strain = c(design_strain, rep(strain, length(cols)))
1054
			design_rownames = c(design_rownames, cols)
1055
		}
1056
	}
1057
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
1058
	return(snep_design)	
1059
}
1060

    
1061
plot_dist_samples = function(# Plot the distribution of reads.
1062
### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
1063
strain, ##<< The strain to considere.
1064
marker, ##<< The marker to considere.
1065
res, ##<< Data
1066
all_samples, ##<< Global list of samples.
1067
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
1068
) {			
1069
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
1070
	snepCountTable = res[,cols]
1071
	snepDesign = data.frame(
1072
		row.names = cols,
1073
		manip = rep(marker, length(cols)),
1074
		strain = rep(strain, length(cols))
1075
		)
1076
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
1077
	sizeFactors = estimateSizeFactors(cdsFull)
1078
	# print(sizeFactors[[1]])
1079
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
1080
	if (NEWPLOT) {
1081
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))				
1082
		NEWPLOT = FALSE
1083
	}
1084
	for (it in 1:length(sample_ids)) {
1085
		sample_id = sample_ids[it]
1086
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1087
	}
1088
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1089
}
1090

    
1091
analyse_design = function(# Launch deseq methods.
1092
### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
1093
snep_design, ##<< The design to considere.
1094
reads ##<< The data to considere. 
1095
) {
1096
	snep_count_table = reads[, rownames(snep_design)]
1097
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1098
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1099
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1100

    
1101
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1102
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1103

    
1104
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1105
	return(list(fit1, fit0, snep_design, pvalsGLM))	
1106
}
1107

    
1108

    
1109

    
1110

    
1111

    
1112

    
1113

    
1114

    
1115
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1116
### This function uses 
1117
marker, ##<< The marker involved.
1118
combi, ##<< The strain combination involved.
1119
form, ##<< the nuc form involved. 
1120
all_samples ##<< Global list of samples.
1121
) {
1122
  # PRETREAT
1123
  d = get_design(marker, combi, all_samples)
1124
  reads = get_all_reads(marker, combi, form)
1125
  # RUN ANALYSE
1126
  tmp_analyse = analyse_design(d, reads)
1127
  # RESULTS
1128
	fit1 = tmp_analyse[[1]]
1129
	fit0 = tmp_analyse[[2]]
1130
  k = names(fit1)
1131
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1132
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1133
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1134
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1135
	snep_design = tmp_analyse[[3]]
1136
  print(snep_design)
1137
	fdr = 0.0001
1138
	thres = FDR(reads$pvalsGLM, fdr) 
1139
	reads$snep_index = reads$pvalsGLM < thres
1140
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = ""))
1141
  return(reads)
1142
  },  ex=function(){	
1143
    marker = "H3K4me1"
1144
    combi = c("BY", "YJM") 
1145
    form = "wpfuzzy" # "wp" | "fuzzy" | "wpfuzzy"
1146
    # foo = get_sneps(marker, combi, form)
1147
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1148
})
1149

    
1150

    
1151

    
1152

    
1153

    
1154

    
1155

    
1156

    
1157

    
1158

    
1159

    
1160

    
1161

    
1162

    
1163

    
1164

    
1165

    
1166

    
1167

    
1168

    
1169

    
1170

    
1171

    
1172

    
1173
ROM2ARAB = function(# Roman to Arabic pair list.
1174
### Util to convert Roman to Arabic
1175
){list(
1176
  "I" = 1,
1177
  "II" = 2,
1178
  "III" = 3,
1179
  "IV" = 4,
1180
  "V" = 5,
1181
  "VI" = 6,
1182
  "VII" = 7,
1183
  "VIII" = 8,
1184
  "IX" = 9,
1185
  "X" = 10,
1186
  "XI" = 11,
1187
  "XII" = 12,
1188
  "XIII" = 13,
1189
  "XIV" = 14,
1190
  "XV" = 15,
1191
  "XVI" = 16,
1192
  "XVII" = 17,
1193
  "XVIII" = 18,
1194
  "XIX" = 19,
1195
  "XX" = 20
1196
)}
1197

    
1198
switch_pairlist = structure(function(# Switch a pairlist
1199
### Take a pairlist key:value and return the switched pairlist value:key.
1200
l ##<< The pairlist to switch. 
1201
) {
1202
	ret = list()
1203
	for (name in names(l)) {
1204
		ret[[as.character(l[[name]])]] = name
1205
	}
1206
	ret
1207
### The switched pairlist.
1208
}, ex=function(){
1209
	l = list(key1 = "value1", key2 = "value2")
1210
	print(switch_pairlist(l))
1211
})
1212

    
1213
ARAB2ROM = function(# Arabic to Roman pair list.
1214
### Util to convert Arabicto Roman
1215
){switch_pairlist(ROM2ARAB())}
1216

    
1217

    
1218

    
1219

    
1220
translate_roi = structure(function(# Translate coords of a genome region.
1221
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1222
roi, ##<< Original genome region of interest.
1223
strain2, ##<< The strain in wich you want the genome region of interest.
1224
config=NULL, ##<< GLOBAL config variable
1225
big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1226
) {
1227
	strain1 = roi$strain_ref
1228
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1229
	if (strain1 == strain2) {
1230
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1231
		return(roi)
1232
	}
1233
	# Launch c2c file
1234
	if (reverse) {
1235
		c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
1236
	} else {
1237
		c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
1238
	}
1239
	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)	
1240
	# filtering it
1241
  c2c = c2c[c2c$V6=="-",]
1242
	# Reverse
1243
	if (reverse) {
1244
		tmp_col = c2c$V1
1245
		c2c$V1 = c2c$V7
1246
		c2c$V7 = tmp_col
1247
		tmp_col = c2c$V2
1248
		c2c$V2 = c2c$V9
1249
		c2c$V9 = tmp_col
1250
		tmp_col = c2c$V3
1251
		c2c$V3 = c2c$V10
1252
		c2c$V10 = tmp_col
1253
	}
1254
	# Restrict c2c to big_roi
1255
	# if (FALSE) {
1256
	if (!is.null(big_roi)) {
1257
		if (roi$strain_ref != big_roi$strain_ref) {
1258
      # print("WARNING, big_roi and roi not in the same strain_ref... translating big_roi.")
1259
      big_roi = translate_roi(big_roi, roi$strain_ref, config=config)
1260
    }
1261
    if (big_roi$end < big_roi$begin) {
1262
      tmp_var = big_roi$begin
1263
      big_roi$begin = big_roi$end 
1264
      big_roi$end = tmp_var        
1265
      big_roi$length = big_roi$length        
1266
    }    
1267
    if (big_roi$chr!=roi$chr | roi$end > big_roi$end | roi$end < big_roi$begin | roi$begin > big_roi$end | roi$begin < big_roi$begin) {
1268
      print("WARNING! Trying to translate a roi not included in a big_roi.")
1269
      return(NULL)
1270
    }
1271
		if (strain1 == "BY") {
1272
			big_chro_1 = paste("chr", ARAB2ROM()[[big_roi$chr]], sep="")
1273
		} else if (strain1 == "RM") {			
1274
		  big_chro_1 = paste("supercontig_1.",big_roi$chr,sep="")
1275
		} else if (strain1 == "YJM") {			
1276
		  big_chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[big_roi$chr]]
1277
		}
1278
		big_begin_1 = big_roi$begin
1279
	  big_end_1 = big_roi$end
1280
		c2c = c2c[c2c$V1==big_chro_1,]
1281
    if (length(c2c[c2c$V3 < big_begin_1 & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < big_begin_1 & c2c$V2 < c2c$V3,c("V2", "V3") ] = big_begin_1}
1282
    if (length(c2c[c2c$V2 > big_end_1   & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > big_end_1   & c2c$V2 < c2c$V3, c("V2", "V3")] = big_end_1}
1283
    if (length(c2c[c2c$V2 < big_begin_1 & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < big_begin_1 & c2c$V3 < c2c$V2,c("V2", "V3") ] = big_begin_1}
1284
    if (length(c2c[c2c$V3 > big_end_1   & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > big_end_1   & c2c$V3 < c2c$V2, c("V2", "V3")] = big_end_1}
1285
    c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1286
	}
1287
  #	Convert initial roi$chr into c2c format
1288
	if (strain1 == "BY") {
1289
		chro_1 = paste("chr", ARAB2ROM()[[roi$chr]], sep="")
1290
	} else if (strain1 == "RM") {			
1291
	  chro_1 = paste("supercontig_1.",roi$chr,sep="")
1292
	} else if (strain1 == "YJM") {			
1293
	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[roi$chr]]
1294
	}
1295
	begin_1 = roi$begin
1296
  end_1 = roi$end
1297
  # Computing equivalent strain_2 alignment coordinates	
1298
	if (reverse) {
1299
  	tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
1300
    tmptransfostop = c2c[c2c$V1==chro_1 &  ((c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1)),]
1301
	} else {
1302
		tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1303
	  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
1304
	}
1305
	# Never happend conditions ...	
1306
	{
1307
		if (length(tmptransfostart$V8) == 0) {
1308
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1309
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1310
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2>=begin_1,]
1311
  			begin_1 = min(tmp_c2c$V2)
1312
      } else {
1313
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1,]
1314
  			begin_1 = min(tmp_c2c$V3)
1315
      }
1316
			if (reverse) {
1317
		  	tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
1318
			} else {
1319
				tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1320
			}
1321
			if (length(tmptransfostart$V8) == 0) {
1322
				if (!is.null(big_roi)) {
1323
					return(NULL)
1324
					tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$begin & c2c$V2<=big_roi$begin,]
1325
				} else { 
1326
					# return(NULL)
1327
					# print(c2c[c2c$V1==chro_1 & c2c$V2<=end_1 & c2c$V3>=begin_1,])
1328
					# print(c2c[c2c$V1==chro_1,])
1329
					print(tmptransfostart)
1330
					print(tmptransfostop)
1331
					stop("Never happend condition 1.")					
1332
				}
1333
			}
1334
		} 
1335
		if (length(tmptransfostop$V8) == 0) {
1336
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1337
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1338
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3<=end_1,]
1339
  			end_1 = max(tmp_c2c$V3)
1340
      } else {
1341
  			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2<=end_1,]
1342
  			end_1 = max(tmp_c2c$V2)
1343
      }
1344
			if (reverse) {
1345
		    tmptransfostop = c2c[c2c$V1==chro_1 &  ((c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1)),]
1346
			} else {
1347
			  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
1348
			}
1349
			if (length(tmptransfostop$V8) == 0) {
1350
				if (!is.null(big_roi)) {
1351
					return(NULL)
1352
				  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$end & c2c$V2<=big_roi$end,]
1353
				} else {
1354
					# return(NULL)
1355
					print(c2c[c2c$V1==chro_1,])
1356
					print(tmptransfostart)
1357
					print(tmptransfostop)
1358
					stop("Never happend condition 2.")
1359
				} 
1360
			} 
1361
		}
1362
		if (length(tmptransfostart$V8) != 1) {
1363
			# tmptransfostart = tmptransfostart[1,]
1364
			# print("many start")
1365
			# print(c2c[c2c$V1==chro_1,])
1366
			tmptransfostart = tmptransfostart[tmptransfostart$V1==chro_1 & tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1367
			if (length(tmptransfostart$V8) != 1) {
1368
				# return(NULL)
1369
				print(tmptransfostart)
1370
				print(tmptransfostop)
1371
  			stop("Never happend condition 3.")
1372
			}
1373
		}
1374
		if (length(tmptransfostop$V8) != 1) {
1375
			# tmptransfostop = tmptransfostop[length(tmptransfostop$V8),]
1376
			# print("many stop")
1377
			# print(tmptransfostop)
1378
			# print(roi)
1379
		  tmptransfostop = tmptransfostop[tmptransfostop$V1==chro_1 & tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1380
			if (length(tmptransfostop$V8) != 1) {
1381
				# return(NULL)
1382
				print(tmptransfostart)
1383
				print(tmptransfostop)
1384
  			stop("Never happend condition 4.")
1385
			}
1386
		}		
1387
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1388
			print(tmptransfostart)
1389
			print(tmptransfostop)		
1390
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1391
		}
1392
	} 
1393
  # Deal with strand
1394
  if (tmptransfostart$V8 == 1) {
1395
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1396
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1397
  } else {
1398
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1399
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1400
  }
1401
	# Build returned roi
1402
	roi$strain_ref = strain2
1403
	if (roi$strain_ref == "BY") {
1404
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1405
	} else {
1406
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1407
	}
1408
  roi$begin = begin_2
1409
  roi$end = end_2
1410
	if (sign(roi$end - roi$begin) == 0) {
1411
		roi$length = 1			
1412
	} else {		
1413
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1414
	}
1415
  return(roi)  
1416
}, ex=function(){
1417
	# Define new translate_roi function...
1418
	translate_roi = function(roi, strain2, config) {
1419
		strain1 = roi$strain_ref
1420
		if (strain1 == strain2) {
1421
			return(roi)
1422
		} else {
1423
		  stop("Here is my new translate_roi function...")		
1424
		}	
1425
	}
1426
	# Binding it by uncomment follwing lines.
1427
	# unlockBinding("translate_roi", as.environment("package:nm"))
1428
	# unlockBinding("translate_roi", getNamespace("nm"))
1429
	# assign("translate_roi", translate_roi, "package:nm")
1430
	# assign("translate_roi", translate_roi, getNamespace("nm"))
1431
	# lockBinding("translate_roi", getNamespace("nm"))
1432
	# lockBinding("translate_roi", as.environment("package:nm"))	
1433
})
1434

    
1435

    
1436

    
1437

    
1438

    
1439

    
1440

    
1441
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1442
### CURs are regions that can be aligned between the genomes 
1443
diff_allowed = 10, ##<< the maximum indel width allowe din a CUR
1444
min_cur_width = 200, ##<< The minimum width of a CUR
1445
config = NULL, ##<< GLOBAL config variable
1446
plot = FALSE ##<< Plot CURs or not
1447
) {
1448
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 10, min_cur_width = 200, plot=FALSE) {
1449
  	c2c = get_content(config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]], "table", stringsAsFactors=FALSE)
1450
    # Filtering unagapped
1451
    c2c = c2c[c2c$V6=="-",]
1452
    # filtering some things (chr...)
1453
    # c2c = c2c[c2c$V1 == "chrIV",]
1454
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)] 
1455
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)] 
1456
  	# Plot diffs to define a threshold (diff_allowed)
1457
  	# hist(abs(c(diff2, diff)),breaks=c(0:2000, 200000000000), xlim=c(0,100))
1458
    # Filtering
1459
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1460
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1461

    
1462
    rois = NULL	
1463
  	for(i in 1:length(indexes_start)) {
1464
  		start = indexes_start[i]
1465
  		stop = indexes_stop[i]
1466
  		sub_c2c = c2c[start:stop,]
1467
  		if (strain1 == "BY") {
1468
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1469
  		} else {			
1470
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1471
  		}
1472
  		roi = list(chr=chr, begin=sub_c2c[1,]$V2, end=sub_c2c[length(sub_c2c$V1),]$V3, strain_ref=strain1)
1473
  		roi[["length"]] = roi$end - roi$begin
1474
  		if (roi$length >= min_cur_width) {
1475
        rois = dfadd(rois,roi)
1476
  	  }
1477
  		if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1478
  			print("*************** ERROR, non homogenous region! ********************")
1479
  		}
1480
  		# print(i)
1481
  		# print(roi)
1482
  		# print(sub_c2c)
1483
  		# print("________________________________________________________________")
1484
  	}
1485
  	if (plot) {
1486
  		print(paste(length(indexes_stop), "area of interest."))
1487
  	  # Plot rois
1488
  	  genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[strain1]], "fasta")   
1489
  		plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
1490
  		for (name in names(genome)) {
1491
  			if (strain1 == "BY") {
1492
  				chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[[strain1]][[name]]]], sep="")
1493
  			} else {			
1494
  				chr_ref = name
1495
  			}
1496
  			y_lev = as.integer(config$FASTA_INDEXES[[strain1]][[name]])
1497
  			lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
1498
  			text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)	
1499
  		}
1500
  	  col=1
1501
  	  for (roi_index in 1:length(rois$chr)) {
1502
  			roi = rois[roi_index,]	
1503
  			y_lev = as.integer(roi$chr) + 0.3
1504
  			lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
1505
  			text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
1506
  	  	col = col + 1
1507
  	  }	
1508
  	}
1509
  	return(rois)
1510
  }
1511
	rois = NULL	
1512
	rois_BY_RM = get_inter_strain_rois("BY", "RM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1513
	rois_BY_YJM = get_inter_strain_rois("BY", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1514
	for (roi_1_index in 1:length(rois_BY_RM[,1])) {
1515
		roi_1 = rois_BY_RM[roi_1_index,]
1516
		roi_2_candidates = rois_BY_YJM[rois_BY_YJM$chr== roi_1$chr & rois_BY_YJM$begin <= roi_1$end & rois_BY_YJM$end >= roi_1$begin , ] ;  
1517
		# print(length(roi_2_candidates[,1]))
1518
		if (length(roi_2_candidates[,1]) > 0) {
1519
			for(roi_2_index in 1:length(roi_2_candidates[,1])) {
1520
				roi_2 = roi_2_candidates[roi_2_index,]
1521
				roi = list(chr=roi_1$chr, begin=max(roi_1$begin, roi_2$begin), end=min(roi_1$end, roi_2$end), strain_ref="BY")
1522
				roi[["length"]] = roi$end - roi$begin + 1
1523
				if (roi$length >= min_cur_width) {
1524
					# if (length(rois[,1]) == 153) {
1525
					# 	print(paste(length(rois[,1]), roi_1_index, roi_2_index ))
1526
					# 	print(roi_1)
1527
					# 	print(roi_2)
1528
					# 	print(roi)
1529
					# }
1530
			    rois = dfadd(rois,roi)
1531
			  }			
1532
			}
1533
		}
1534
	}
1535
	print(length(rois[,1]))
1536
	print(sum(rois$length))
1537
	rois_1st_round = rois
1538
	rois_2nd_round = NULL	
1539
	rois_RM_YJM = get_inter_strain_rois("RM", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1540
	for (roi_1_index in 1:length(rois_1st_round[,1])) {
1541
		roi_1 = rois_1st_round[roi_1_index,]
1542
		translated_roi_1 = translate_roi(roi_1, "RM", config = config)
1543
		t_b = min(translated_roi_1$begin, translated_roi_1$end)
1544
		t_e = max(translated_roi_1$begin, translated_roi_1$end)
1545
		roi_2_candidates = rois_RM_YJM[rois_RM_YJM$chr== translated_roi_1$chr & rois_RM_YJM$begin <= t_e & rois_RM_YJM$end >= t_b , ] ;  
1546
		if (length(roi_2_candidates[,1]) > 0) {
1547
			for(roi_2_index in 1:length(roi_2_candidates[,1])) {
1548
				roi_2 = roi_2_candidates[roi_2_index,]
1549
				roi = list(chr=translated_roi_1$chr, begin=max(t_b, roi_2$begin), end=min(t_e, roi_2$end), strain_ref="RM")
1550
				roi[["length"]] = roi$end - roi$begin + 1
1551
				if (roi$length >= min_cur_width) {
1552
			    rois_2nd_round = dfadd(rois_2nd_round,roi)
1553
			  }			
1554
			}
1555
		}
1556
	}
1557
	print(length(rois_2nd_round[,1]))
1558
	print(sum(rois_2nd_round$length))
1559
	rois = rois_2nd_round
1560

    
1561
	rois_translator_round = list()	
1562
	for (roi_index in 1:length(rois[,1])) {
1563
		roi = rois[roi_index,]
1564
		BY_roi  = translate_roi(roi, "BY", config = config)
1565
		tmp_BY_roi = BY_roi 
1566
		BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1567
		BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1568
		BY_roi$length = abs(BY_roi$length)
1569
		rois_translator_round = dfadd(rois_translator_round, BY_roi)
1570
	}
1571
	rois = rois_translator_round
1572

    
1573
	rois_3rd_round = NULL	
1574
	for (roi_index in 1:length(rois[,1])) {
1575
	# for (roi_index in 1:2) {
1576
		current_roi = rois[roi_index,]
1577
		# print(roi_index)
1578
	
1579
	  to_be_check_rois = dfadd(NULL, current_roi)
1580
		NEED_RERUN = TRUE
1581
		while (NEED_RERUN) {
1582
			# print("RERUN"),
1583
			NEED_RERUN = FALSE
1584
			to_be_check_again = NULL
1585
			for (to_be_check_roi_index in 1:length(to_be_check_rois[,1])) {
1586
				# print(to_be_check_rois)
1587
				to_be_check_roi = to_be_check_rois[to_be_check_roi_index,]
1588
		    combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"), c("RM", "BY"), c("YJM", "BY"), c("YJM", "RM"))
1589
			  for (combi in combis) {
1590
					# print(combi)
1591
			    strain1 = combi[1]
1592
		      strain2 = combi[2]
1593
					trans_roi = translate_roi(to_be_check_roi, strain1, config = config)
1594
					lower_bound=min(trans_roi$begin, trans_roi$end)
1595
					upper_bound=max(trans_roi$begin, trans_roi$end)			
1596
          
1597
          check_overlaping = structure(function(strain1 = "BY", strain2 = "RM", chr = NULL, lower_bound=NULL, upper_bound=NULL) {
1598
            reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1599
          	if (strain1 == strain2) {
1600
          		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1601
          		return(roi)
1602
          	}
1603
          	# Launch c2c file
1604
          	if (reverse) {
1605
          		c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
1606
          	} else {
1607
          		c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
1608
          	}
1609
          	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)	
1610
          	# filtering it
1611
            c2c = c2c[c2c$V6=="-",]
1612
          	# Reverse
1613
          	if (reverse) {
1614
          		tmp_col = c2c$V1
1615
          		c2c$V1 = c2c$V7
1616
          		c2c$V7 = tmp_col
1617
          		tmp_col = c2c$V2
1618
          		c2c$V2 = c2c$V9
1619
          		c2c$V9 = tmp_col
1620
          		tmp_col = c2c$V3
1621
          		c2c$V3 = c2c$V10
1622
          		c2c$V10 = tmp_col
1623
          	}
1624

    
1625
          	if (strain1 == "BY") {
1626
          		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1627
          	} else if (strain1 == "RM") {			
1628
          	  chro_1 = paste("supercontig_1.",chr,sep="")
1629
          	} else if (strain1 == "YJM") {			
1630
          	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1631
          	}
1632
          	# print(chro_1)
1633
          	if (!is.null(lower_bound) & !is.null(upper_bound)) {
1634
              if (reverse) {
1635
          	  	tmp_c2c = c2c[c2c$V1==chro_1 & ((c2c$V3>=lower_bound & c2c$V2<=upper_bound & c2c$V8==1) | (c2c$V2>=lower_bound & c2c$V3<=upper_bound & c2c$V8==-1)),]
1636
          		} else {
1637
          			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=lower_bound & c2c$V2<=upper_bound,]
1638
          		}
1639
          	} else {
1640
            	tmp_c2c = c2c[c2c$V1 == chr,]
1641
          	}
1642
          	if (length(tmp_c2c[,1]) > 1) {
1643
          		pbs = apply(t(1:(length(tmp_c2c[,1]) - 1)), 2, function(i){
1644
          			# print(paste(i, "/", length(tmp_c2c[,1])))
1645
          			apply(t((i+1):length(tmp_c2c[,1])), 2, function(j){
1646
          				l1 = tmp_c2c[i,] 
1647
          				b1 = min(l1$V2, l1$V3)
1648
          				e1 = max(l1$V2, l1$V3)
1649
          				l2 = tmp_c2c[j,]
1650
          				b2 = min(l2$V2, l2$V3)
1651
          				e2 = max(l2$V2, l2$V3)
1652
          				if ((e1>=b2 & b1<=e2) | (e2>=b1 & b2<=e1)) {
1653
          					print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ",chr, " [", b1, ",", e1, "] [", b2, ",", e2, "]", sep=""))				
1654
          					pb = list(strain1, strain2, chr, b1, e1, b2, e2)					
1655
          					pb
1656
          				} else {
1657
          					NULL
1658
          				}
1659
          			})
1660
          		})
1661
          		return(pbs)
1662
          	}
1663

    
1664
          }, ex=function(){
1665
          	source("src/nucleo_miner/yeast_strain_conversion.R"); 
1666
          	pbs1 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=TRUE)
1667
          	pbs3 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=TRUE)
1668
          	pbs5 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=TRUE)
1669
          	pbs2 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=FALSE)
1670
          	pbs4 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=FALSE)
1671
          	pbs6 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=FALSE)
1672
          })
1673
          
1674

    
1675
					res = check_overlaping(strain1 = strain1, strain2 = strain2, chr = trans_roi$chr, lower_bound=lower_bound, upper_bound=upper_bound) 
1676
					if (!is.null(res)) {
1677
						df_res = data.frame(matrix(unlist(res), ncol = 7, byrow=TRUE), stringsAsFactors=FALSE)		
1678
						interval = df_res[1,]
1679
						inter_min = as.numeric(max( min(interval$X4, interval$X5), min(interval$X6, interval$X7)))
1680
						inter_max = as.numeric(min( max(interval$X4, interval$X5), max(interval$X6, interval$X7)))
1681
						# print(paste("SPLIT ROI", roi_index, "for", combi[1], combi[2]))			
1682
						new_roi1 = trans_roi
1683
						new_roi2 = trans_roi
1684
						new_roi1$begin = lower_bound
1685
						new_roi1$end = inter_min - 1
1686
						new_roi1$length = new_roi1$end - new_roi1$begin + 1
1687
						new_roi2$begin = inter_max + 1
1688
						new_roi2$end = upper_bound
1689
						new_roi2$length = new_roi2$end - new_roi2$begin + 1
1690
						if (new_roi1$length > min_cur_width) {
1691
							BY_roi  = translate_roi(new_roi1, "BY", config = config)
1692
							tmp_BY_roi = BY_roi
1693
							BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1694
							BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1695
							BY_roi$length = abs(BY_roi$length)
1696
							to_be_check_again = dfadd(to_be_check_again, BY_roi)	
1697
						}
1698
						if (new_roi2$length > min_cur_width) {
1699
							BY_roi  = translate_roi(new_roi2, "BY", config = config)
1700
							tmp_BY_roi = BY_roi
1701
							BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1702
							BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1703
							BY_roi$length = abs(BY_roi$length)
1704
							to_be_check_again = dfadd(to_be_check_again, BY_roi)	
1705
						}
1706
						if (to_be_check_roi_index < length(to_be_check_rois[,1])) {
1707
							for (i in (to_be_check_roi_index + 1):length(to_be_check_rois[,1])) {
1708
								to_be_check_again = dfadd(to_be_check_again, to_be_check_rois[i,])								
1709
							}
1710
						}
1711
						NEED_RERUN = TRUE
1712
						break
1713
					}
1714
				}
1715
				if (NEED_RERUN) {
1716
					to_be_check_rois = to_be_check_again
1717
					break
1718
				}
1719
			}
1720
		}
1721
		
1722
		checked_rois = to_be_check_rois
1723
		for (checked_roi_index in 1:length(checked_rois[,1])) {
1724
			rois_3rd_round = dfadd(rois_3rd_round, checked_rois[checked_roi_index,])	
1725
		}
1726
	}
1727

    
1728
	print(length(rois_3rd_round[,1]))
1729
	print(sum(rois_3rd_round$length))
1730
	rois = rois_3rd_round
1731
	
1732
	
1733
	if (plot) {
1734
		print(paste(length(rois$chr), "area of interest."))
1735
	  # Plot rois
1736
	  genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[["BY"]], "fasta")   
1737
		plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
1738
		for (name in names(genome)) {
1739
			if (TRUE) {
1740
				chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[["BY"]][[name]]]], sep="")
1741
			} else {			
1742
				chr_ref = name
1743
			}
1744
			y_lev = as.integer(config$FASTA_INDEXES[["BY"]][[name]])
1745
			lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
1746
			text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)	
1747
		}
1748
	  col=1
1749
	  for (roi_index in 1:length(rois$chr)) {
1750
			roi = rois[roi_index,]	
1751
			y_lev = as.integer(roi$chr) + 0.3
1752
			lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
1753
			text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
1754
	  	col = col + 1
1755
	  }	
1756
	}
1757
	return (rois)
1758
}
1759

    
1760

    
1761

    
1762

    
1763
build_replicates = structure(function(# Stage replicates data 
1764
### This function loads in memory data corresponding to the given experiments.
1765
expe, ##<< a list of vector corresponding to vector of replicates.
1766
roi, ##<< the region that we are interested in.
1767
only_fetch=FALSE, ##<< filter or not inputs.
1768
get_genome=FALSE,##<< Load or not corresponding genome.
1769
all_samples, ##<< Global list of samples.
1770
config=NULL ##<< GLOBAL config variable.
1771
) {
1772
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1773
  	samples=list()
1774
  	for (i in samples_ids) {
1775
  		sample = as.list(all_samples[all_samples$id==i,])
1776
      sample$orig_roi = roi
1777
      sample$roi = translate_roi(roi, sample$strain, config = config)
1778
  		if (get_genome) {
1779
  			# Get Genome
1780
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1781
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]		
1782
  		}
1783
  		# Get inputs
1784
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1785
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE) 
1786
  		sample$total_reads = sum(sample$inputs[,4]) 	
1787
  		if (!only_fetch) {
1788
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1789
  	  }
1790
  	  # Get TF outputs for Mnase_Seq samples
1791
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {	
1792
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1793
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1794
  			if (!only_fetch) {
1795
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1796
    		}
1797
  		}
1798
  		samples[[length(samples) + 1]] = sample		
1799
  	}
1800
  	return(samples)
1801
  }
1802
	replicates = list()
1803
	for(samples_ids in expe) {
1804
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1805
		replicates[[length(replicates) + 1]] = samples
1806
	}
1807
	return(replicates)
1808
  }, ex = function() {
1809
    # library(rjson)
1810
    # library(nucleominer)
1811
    # 
1812
    # # Read config file
1813
    # json_conf_file = "nucleo_miner_config.json"
1814
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1815
    # # Read sample file
1816
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)  
1817
    # # here are the sample ids in a list
1818
    # expes = list(c(1))
1819
    # # here is the region that we wnt to see the coverage
1820
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY") 
1821
    # # it displays the corverage
1822
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1823
    # out = watch_samples(replicates, config$READ_LENGTH, 
1824
    #       plot_coverage = TRUE,  
1825
    #       plot_squared_reads = FALSE,  
1826
    #       plot_ref_genome = FALSE, 
1827
    #       plot_arrow_raw_reads = FALSE,  
1828
    #       plot_arrow_nuc_reads = FALSE,  
1829
    #       plot_gaussian_reads = FALSE,  
1830
    #       plot_gaussian_unified_reads = FALSE,  
1831
    #       plot_ellipse_nucs = FALSE,  
1832
    #       plot_wp_nucs = FALSE,  
1833
    #       plot_wp_nuc_model = FALSE,  
1834
    #       plot_common_nucs = FALSE,  
1835
    #       height = 50)
1836
  })
1837

    
1838

    
1839

    
1840

    
1841

    
1842

    
1843

    
1844

    
1845

    
1846

    
1847

    
1848

    
1849

    
1850

    
1851

    
1852

    
1853

    
1854

    
1855

    
1856

    
1857

    
1858

    
1859

    
1860

    
1861

    
1862

    
1863

    
1864

    
1865

    
1866

    
1867

    
1868

    
1869

    
1870

    
1871

    
1872

    
1873

    
1874

    
1875

    
1876

    
1877

    
1878

    
1879

    
1880

    
1881

    
1882

    
1883

    
1884

    
1885

    
1886

    
1887

    
1888

    
1889

    
1890

    
1891

    
1892

    
1893

    
1894

    
1895

    
1896

    
1897

    
1898

    
1899

    
1900

    
1901

    
1902

    
1903

    
1904

    
1905

    
1906

    
1907

    
1908

    
1909

    
1910

    
1911

    
1912

    
1913

    
1914

    
1915

    
1916

    
1917

    
1918

    
1919

    
1920

    
1921

    
1922

    
1923

    
1924

    
1925

    
1926

    
1927

    
1928

    
1929

    
1930

    
1931

    
1932

    
1933

    
1934

    
1935

    
1936

    
1937

    
1938
perform_anovas = function(# Performaing ANOVAs
1939
### Counts reads and Performs ANOVAS for each common nucleosomes involved.
1940
replicates, ##<< Set of replicates, each replicate is a list of samples (ideally 3). Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}. In the \emph{perform_anovas} contexte, we need 4 replicates (4 * (3 samples)): 2 strains * (1 marker + 1 input (Mnase_Seq)). 
1941
aligned_inter_strain_nucs, ##<< List of common nucleosomes.
1942
inputs_name="Mnase_Seq", ##<< Name of the input.
1943
plot_anova_boxes=FALSE ##<< Plot (or not) boxplot for each nuc.
1944
) {
1945
	anova_results = NULL
1946
	for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
1947
		inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
1948
	  # counting reads
1949
		my_data = NULL
1950
		for (replicate_rank in 1:length(replicates)) {
1951
			samples = replicates[[replicate_rank]]
1952
			strain = samples[[1]]$strain
1953
			marker = samples[[1]]$marker
1954
			for (sample_rank in 1:length(samples)) {
1955
				sample = samples[[sample_rank]]
1956
				nuc_width = as.integer(inter_strain_nuc[[paste("upper_bound_",strain,sep="")]]) - inter_strain_nuc[[paste("lower_bound_",strain,sep="")]]
1957
				nuc_reads = filter_tf_inputs(sample$inputs, inter_strain_nuc[[paste("chr_",strain,sep="")]], inter_strain_nuc[[paste("lower_bound_",strain,sep="")]], inter_strain_nuc[[paste("upper_bound_",strain,sep="")]], nuc_width - 1)
1958
				indic = sum(nuc_reads[,4]) * 1000000/sample$total_reads / nuc_width 
1959
				my_data = dfadd(my_data, list(strain = strain, marker = marker, indic = indic))
1960
			}
1961
		}
1962
		my_data$strain = as.factor(my_data$strain)
1963
		my_data$marker = as.factor(my_data$marker)
1964
	
1965
		strain_ref1 = replicates[[1]][[1]]$strain
1966
		strain_ref2 = replicates[[2]][[1]]$strain 
1967
		marker = replicates[[length(replicates)]][[1]]$marker				
1968

    
1969
	  # Collecting anova results
1970
	  anova_result = list()
1971
		# nucs info
1972
		for (name in names(inter_strain_nuc)) {
1973
			anova_result[[name]] = inter_strain_nuc[[name]]
1974
		}
1975
	  #
1976
		for (strain in c(strain_ref1, strain_ref2)) {
1977
			for (manip in c(inputs_name, marker)) {
1978
				replicat = 1
1979
				for(indic in my_data[my_data$strain == strain & my_data$marker == manip,]$indic) {
1980
					anova_result[[paste(strain, manip, replicat, sep="_")]] = indic
1981
					replicat = replicat + 1
1982
				}			
1983
			}	
1984
		}
1985

    
1986
	  # Compute ANOVAs
1987
		mnase_data = my_data[my_data$marker == inputs_name,]
1988
		mnase_aov = aov(indic~strain,mnase_data)
1989
		mnase_effects = model.tables(mnase_aov)$tables
1990
		mnase_pvalues = summary(mnase_aov)[[1]]$Pr
1991
		# boxplot(indic~ma*st,mnase_data)
1992
		anova_result[["mnase_st"]] = mnase_effects$strain[1]
1993
		anova_result[["mnase_st_pvalue"]] = mnase_pvalues[1]	
1994
		# 
1995
		marker_data = my_data[my_data$marker == marker,]
1996
		marker_aov = aov(indic~strain,marker_data)
1997
		marker_effects = model.tables(marker_aov)$tables
1998
		marker_pvalues = summary(marker_aov)[[1]]$Pr
1999
		# boxplot(indic~ma*st,marker_data)
2000
		anova_result[["marker_st"]] = marker_effects$strain[1]
2001
		anova_result[["marker_st_pvalue"]] = marker_pvalues[1]
2002
		# 
2003
		st1_data = my_data[my_data$strain == strain_ref1,]
2004
		st1_aov = aov(indic~marker,st1_data)
2005
		st1_effects = model.tables(st1_aov)$tables
2006
		st1_pvalues = summary(st1_aov)[[1]]$Pr
2007
		# boxplot(indic~ma*st,st1_data)
2008
		anova_result[["st1_ma"]]=st1_effects$ma[inputs_name]
2009
		anova_result[["st1_ma_pvalue"]]=st1_pvalues[1]
2010
		# 
2011
		st2_data = my_data[my_data$strain == strain_ref2,]
2012
		st2_aov = aov(indic~marker,st2_data)
2013
		st2_effects = model.tables(st2_aov)$tables
2014
		st2_pvalues = summary(st2_aov)[[1]]$Pr
2015
		# boxplot(indic~ma*st,st2_data)
2016
		anova_result[["st2_ma"]]=st2_effects$ma[inputs_name]
2017
		anova_result[["st2_ma_pvalue"]]=st2_pvalues[1]
2018
		# 
2019
		correl_data = my_data
2020
		correl_aov = aov(indic~strain*marker,correl_data)
2021
		correl_effects = model.tables(correl_aov)$tables
2022
		correl_pvalues = summary(correl_aov)[[1]]$Pr
2023
		if (plot_anova_boxes) {
2024
			x11()
2025
			boxplot(indic~marker*strain,correl_data)
2026
		}
2027
		anova_result[["correl_st"]]=correl_effects$strain[1]
2028
		anova_result[["correl_ma"]]=correl_effects$ma[inputs_name]
2029
		anova_result[["correl_st_ma"]]=correl_effects$"strain:marker"[1,1]
2030
		anova_result[["correl_st_pvalue"]]=correl_pvalues[1]
2031
		anova_result[["correl_ma_pvalue"]]=correl_pvalues[2]
2032
		anova_result[["correl_st_ma_pvalue"]]=correl_pvalues[3]
2033
		
2034
		anova_results = dfadd(anova_results, anova_result)
2035
	}
2036
	return(anova_results)
2037
### Returns ANOVA results and comunted reads.
2038
}
2039

    
2040

    
2041
watch_samples = function(# Watching analysis of samples 
2042
### This function allows to view analysis for a particuler region of the genome.
2043
replicates, ##<< replicates under the form...
2044
read_length, ##<< length of the reads
2045
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
2046
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
2047
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
2048
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
2049
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
2050
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
2051
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
2052
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
2053
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
2054
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
2055
plot_common_nucs = TRUE,  ##<< Plot (or not) aligned reads.
2056
plot_anovas = FALSE,  ##<< Plot (or not) scatter for each nuc.
2057
plot_anova_boxes =  FALSE,  ##<< Plot (or not) boxplot for each nuc.
2058
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
2059
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed. 
2060
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
2061
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
2062
config=NULL ##<< GLOBAL config variable
2063
){
2064
  returned_list = list()
2065
  # Computing global display parameters
2066
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
2067
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
2068
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
2069
  } else {
2070
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
2071
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
2072
  }
2073
	base_glo = 0
2074
	nb_rank_glo = 0
2075
  for (samples in replicates) {
2076
  	nb_rank_glo = nb_rank_glo + length(samples)
2077
  }
2078
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
2079
	y_min_glo = min(ylim_glo)
2080
	y_max_glo = max(ylim_glo)
2081
  delta_y_glo = y_max_glo - y_min_glo
2082
  # Plot main frame
2083
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab="#reads (per million reads)", xlab=paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr), main="NucleoMiner2" )
2084
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
2085
  # Go
2086
	replicates_wp_nucs = list()
2087
  for (replicate_rank in 1:length(replicates)) {
2088
		# Computing replicate parameters
2089
		nb_rank = length(samples)
2090
		base = (replicate_rank-1) * height * nb_rank
2091
		ylim = c(base, base + height * nb_rank)
2092
		y_min = min(ylim)
2093
		y_max = max(ylim)
2094
	  delta_y = y_max - y_min
2095
		samples = replicates[[replicate_rank]]
2096
		for (sample_rank in 1:length(samples)) {
2097
			# computing sample parameters
2098
			sample = samples[[sample_rank]]
2099
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
2100
			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
2101

    
2102
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2103
			  x_min = sample$roi[["begin"]]
2104
			  x_max = sample$roi[["end"]]
2105
		  } else {
2106
			  x_min = - sample$roi[["begin"]]
2107
			  x_max = - sample$roi[["end"]]
2108
		  }		
2109
			shift = x_min_glo - x_min
2110
	    # Plot Genome seq
2111
			if (plot_ref_genome) {
2112
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
2113
		  }
2114
			# Plot reads
2115
			reads = sample$inputs
2116
			signs = sign_from_strand(reads[,3])	
2117
			if (plot_arrow_raw_reads) {
2118
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, 
2119
				col=1, length=0.15/nb_rank)          	
2120
			}
2121
	    if (plot_squared_reads) {    	
2122
        # require(plotrix)
2123
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
2124
			}
2125
	    if (plot_coverage) {    	
2126
        if (length(reads[,1]) != 0) {
2127
          step_h = sign(x_min) * signs * reads[,4]
2128
          step_b = sign(x_min) * reads[,2] + shift
2129
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
2130
          steps_x = min(step_b, step_e):max(step_b, step_e)          
2131
          steps_y = rep(0, length(steps_x))
2132
          for (i in 1:length(step_h)) {          
2133
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
2134
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
2135
          }
2136
          tmp_index = which(steps_y != 0)
2137
          steps_x = steps_x[tmp_index]
2138
          steps_y = steps_y[tmp_index]
2139
          tmp_current_level = 0
2140
          for (i in 1:length(steps_y)) {          
2141
            steps_y[i] = tmp_current_level + steps_y[i]
2142
            tmp_current_level = steps_y[i]
2143
          }
2144
          steps_y = c(0, steps_y)
2145
          steps_y = steps_y * 1000000/sample$total_reads
2146
        } else {
2147
          steps_y = c(0, 0, 0)
2148
          steps_x = c(sample$roi$begin, sample$roi$end)
2149
        }
2150
        print(steps_x)
2151
        print(steps_y)
2152
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
2153
        abline(y_lev,0)
2154
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
2155
			}
2156
			# Plot nucs
2157
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
2158
				nucs = sample$outputs
2159
				if (length(nucs$center) > 0) {
2160
					col = 1
2161
		      for (i in 1:length(nucs$center)) {
2162
						col = col + 1
2163
		        nuc = nucs[i,]
2164
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
2165
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
2166
						total_involved_reads = sum(involved_reads[,4]) 
2167
						if (plot_arrow_nuc_reads ) {
2168
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, 
2169
							col=col, length=0.15/nb_rank)          	
2170
						}
2171
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
2172
  						flatted_reads = flat_reads(involved_reads, nuc$width)
2173
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
2174
		  			}
2175
	          if (plot_gaussian_reads ) {
2176
							flatted_reads = flat_reads(involved_reads, nuc$width)
2177
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
2178
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
2179
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
2180
	          }
2181
	          if (plot_gaussian_unified_reads ) {
2182
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=2)
2183
	          }
2184
	          if (plot_ellipse_nucs) {
2185
				      # require(plotrix)
2186
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col) 
2187
						}
2188
		      }
2189
		    } else {
2190
		      print("WARNING! No nucs to print.")
2191
		    }
2192
			}
2193
	  }
2194
	  # Plot wp nucs
2195
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_common_nucs)) {
2196
			if (samples[[1]]$marker == "Mnase_Seq") {
2197
				if (is.null(aggregated_intra_strain_nucs)) {
2198
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]			
2199
				} else {
2200
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
2201
				}
2202
		  } else {
2203
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
2204
		  }
2205
			replicates_wp_nucs[[replicate_rank]] = wp_nucs
2206
			for (wp_nuc in wp_nucs) {
2207
				if (wp_nuc$wp){
2208
					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2209
					all_original_reads = c()
2210
					for(initial_nuc in wp_nuc$nucs) {
2211
						all_original_reads = c(all_original_reads, initial_nuc$original_reads)						
2212
					}
2213
					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2214
					if (FALSE) {
2215
					  rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col="#EEEEEE", border=1)
2216
				  }
2217
					if (plot_wp_nuc_model) {
2218
					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)				
2219
				  }
2220
				}
2221
			}
2222
		}
2223
	}	
2224
  
2225
	if (plot_common_nucs | plot_anovas | plot_anova_boxes) {
2226
		if (is.null(aligned_inter_strain_nucs)) {
2227
			aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2228
		}
2229
		if (plot_common_nucs) {
2230
      #Plot common wp nucs
2231
      mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2232
            for (replicate_rank in 1:length(replicates)) {
2233
        nb_rank[[replicate_rank]] = length(samples)
2234
        base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2235
        ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2236
        y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2237
        y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2238
        delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2239
        mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2240
      
2241
        samples = replicates[[replicate_rank]]
2242
        for (sample_rank in 1:length(samples)) {
2243
          sample = samples[[sample_rank]]
2244
          y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2245
          if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2246
            x_min[[replicate_rank]] = sample$roi[["begin"]]
2247
            x_max[[replicate_rank]] = sample$roi[["end"]]
2248
          } else {
2249
            x_min[[replicate_rank]] = - sample$roi[["begin"]]
2250
            x_max[[replicate_rank]] = - sample$roi[["end"]]
2251
          }
2252
          shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2253
        }
2254
      }  
2255
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2256
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2257
        tmp_xs = tmp_ys = c()
2258
        for (replicate_rank in 1:length(replicates)) {
2259
          samples = replicates[[replicate_rank]]
2260
          strain = samples[[1]]$strain
2261
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2262
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2263
        }
2264
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]]), cex=dev.size()[1]*200/(x_max[[1]]-x_min[[1]]), pch=19)          
2265
      }    
2266
		}
2267
		if (plot_anovas | plot_anova_boxes) {
2268
			anova_results = perform_anovas(replicates, aligned_inter_strain_nucs, plot_anova_boxes=plot_anova_boxes)
2269
			thres = FDR(anova_results[,"correl_st_ma_pvalue"],0.05)
2270
			if (is.na(thres)) {
2271
				# Boneferroni multiple test
2272
				thres = 0.05/length(anova_results[,1])
2273
			}
2274
			filtred_anova_results = anova_results[anova_results[,"correl_st_ma_pvalue"]<thres,]
2275
			x11()
2276
			if (plot_anovas) {
2277
			  plot(anova_results[,"mnase_st"],anova_results[,"correl_st_ma"], pch=1, xlim=c(-0.07,0.07), ylim=c(-0.07,0.07),main="SNEPs" ,xlab = "strain effect", ylab = "snep effect")
2278
	      points(x=filtred_anova_results[,"mnase_st"],y=filtred_anova_results[,"correl_st_ma"],col=2, pch="+")
2279
		  }
2280
 	  }
2281
	}
2282
  return(returned_list)
2283
}
2284

    
2285

    
2286

    
2287