Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ 5badc2fd

Historique | Voir | Annoter | Télécharger (84,62 ko)

1
FDR = structure(function#  False Discovery Rate
2
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
3
##author<< Gael Yvert,
4
(
5
x, ##<< A vector x of independent p-values.
6
FDR ##<< The specified FDR.
7
) {
8
  x <- sort(na.omit(x))
9
  N = length(x)
10
  i = 1;
11
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
12
  if (i == 1)
13
    return (NA)
14
  else
15
    return( x[i-1] )
16
### Return the the corresponding cutoff.
17
}, ex=function(){
18
  print("example")
19
})
20

    
21

    
22

    
23
llr_score_nvecs = structure(function # Likelihood ratio
24
### Compute the log likelihood ratio of two or more set of value.
25
(
26
  xs ##<< list of vectors.
27
) {
28
  l = length(xs)
29
  if (l < 1) {
30
    return(NA)
31
  }
32
  if (l == 1) {
33
    return(1)
34
  }
35
  sumllX = 0
36
  for (i in 1:l) {
37
    x = xs[[i]]
38
  	if (length(x) <= 1) {
39
  		return(NA)
40
  	}
41
    meanX = mean(x)
42
    sdX = sd(x)
43
    llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
44
    sumllX = sumllX + llX
45
  }
46
  meanXglo = mean(unlist(xs))
47
  sdXglo = sd(unlist(xs))
48
  llXYZ = sum(log(dnorm(unlist(xs),mean=meanXglo,sd=sdXglo)))
49
  ratio = sumllX - llXYZ
50
  return(ratio)
51
### Returns the log likelihood ratio.
52
}, ex=function(){
53
  # LLR score for 2 set of values
54
  mean1=5; sd1=2; card2 = 250
55
  mean2=6; sd2=3; card1 = 200
56
  x1 = rnorm(card1, mean1, sd1)
57
  x2 = rnorm(card2, mean2, sd2)
58
  min = floor(min(c(x1,x2)))
59
  max = ceiling(max(c(x1,x2)))
60
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
61
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
62
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
63
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
64
  llr_score_nvecs(list(x1,x2))
65
 })
66

    
67
dfadd = structure(function# Adding list to a dataframe.
68
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
69
	(df, ##<<  A dataframe
70
		l ##<<  A list
71
	) {
72
  if (is.null(df)) {
73
    df = data.frame(l,stringsAsFactors=FALSE)
74
  } else {
75
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
76
  }
77
  return(df)
78
### Return the dataframe \emph{df}.
79
}, ex=function(){
80
		## Here dataframe is NULL
81
		print(df)
82
		df = NULL
83

    
84
		# Initialize df
85
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
86
		print(df)
87

    
88
		# Adding elements to df
89
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
90
		print(df)
91
})
92

    
93

    
94
sign_from_strand = function(
95
### Get the sign of strand
96
strands) {
97
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
98
### If strand in forward then returns 1 else returns -1
99
}
100

    
101
flat_reads = function(
102
### Extract reads coordinates from TempleteFilter input sequence
103
reads, ##<< TemplateFilter input reads
104
nuc_width ##<< Width used to shift F and R reads.
105
) {
106
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
107
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
108
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
109
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
110
### Returns a list of F reads, R reads and joint/shifted F and R reads.
111
}
112

    
113

    
114

    
115
filter_tf_outputs = function(# Filter TemplateFilter outputs
116
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlapping threshold.
117
tf_outputs, ##<< TemplateFilter outputs.
118
chr, ##<< Chromosome observed, here chr is an integer.
119
x_min, ##<< Coordinate of the first bp observed.
120
x_max, ##<< Coordinate of the last bp observed.
121
nuc_width = 160, ##<< Nucleosome width.
122
ol_bp = 59, ##<< Overlap Threshold.
123
corr_thres = 0.5 ##<< Correlation threshold.
124
) {
125
  if (x_min < 0) {
126
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= -x_max & tf_outputs$center + tf_outputs$width/2 <=  -x_min,]
127
	} else {
128
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= x_min & tf_outputs$center + tf_outputs$width/2 <= x_max,]
129
  }
130
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
131
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
132
  tf_outputs = tf_outputs[tf_outputs$correlation.score >= corr_thres,]
133
  tf_outputs = tf_outputs[order(tf_outputs$correlation.score, decreasing=TRUE),]
134
  i = 1
135
  while (i <= length(tf_outputs[,1])) {
136
    lb = tf_outputs[i,]$lower_bound
137
    ub = tf_outputs[i,]$upper_bound
138
    tf_outputs = tf_outputs[!(tf_outputs$lower_bound <= (ub-ol_bp) & tf_outputs$upper_bound > ub) & !(tf_outputs$upper_bound >= (lb+ol_bp) & tf_outputs$lower_bound < lb),]
139
    i = i+1
140
  }
141
  return(tf_outputs)
142
### Returns filtered TemplateFilter Outputs
143
}
144

    
145

    
146

    
147
filter_tf_inputs = function(# Filter TemplateFilter inputs
148
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
149
inputs, ##<< TF inputs to be filtered.
150
chr, ##<< Chromosome observed, here chr is an integer.
151
x_min, ##<< Coordinate of the first bp observed.
152
x_max, ##<< Coordinate of the last bp observed.
153
nuc_width = 160, ##<< Nucleosome width.
154
only_f = FALSE, ##<< Filter only F reads.
155
only_r = FALSE, ##<< Filter only R reads.
156
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
157
) {
158
	if (only_f) {
159
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
160
	} else if (only_r) {
161
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
162
	} else {
163
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
164
	}
165
  if (!filter_for_coverage) {
166
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
167
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
168
  }
169
	return(inputs)
170
### Returns filtred inputs.
171
}
172

    
173

    
174

    
175
get_comp_strand = function(
176
### Compute the complementatry strand.
177
strand ##<< The original strand.
178
) {
179
	apply(t(strand),2, function(n){
180
	  if (n=="a") {return("t")}
181
		if (n=="t") {return("a")}
182
		if (n=="c") {return("g")}
183
		if (n=="g") {return("c")}
184
	})
185
### Returns the complementatry strand.
186
}
187

    
188

    
189
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
190
### This function aggregates nucleosomes from replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center (dyad). A chain of nucleosomes is builts across all replicates. Adjacent nucleosomes of the chain are compared two by two. Comparison is based on a log likelihood ratio (LLR1). depending on the LLR1 value nucleosomes are merged (low LLR) or separated (high LLR). Finally the function returns a list of clusters and all computed llr_scores. Each cluster ows an attribute wp for "well positioned". This attribute is set to TRUE if the cluster is composed of exactly one nucleosome of each sample.
191
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
192
llr_thres=20, ##<< Log likelihood ratio threshold to decide between merging and separating
193
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
194
){
195
	end_of_tracks = function(tracks) {
196
		if (length(tracks) == 0) {
197
			return(TRUE)
198
		}
199
	  for (lower_bound in tracks) {
200
			if(!is.na(lower_bound)) {
201
	      if (lower_bound < coord_max) {
202
	        return(FALSE)
203
	      }
204
	  	}
205
	  }
206
	  return(TRUE)
207
	}
208
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
209
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
210
			new_cluster$wp = TRUE
211
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
212
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
213
		  	clusters[[length(clusters) + 1]] = new_cluster
214
				# print(new_cluster)
215
		  }
216
		} else {
217
			new_cluster$wp = FALSE
218
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
219
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
220
			  clusters[[length(clusters) + 1]] = new_cluster
221
			}
222
		}
223
		return(clusters)
224
	}
225
	strain = samples[[1]]$strain
226
	llr_scores = c()
227
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
228
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
229
  # compute clusters
230
  clusters = list()
231
  cluster_contents = list()
232
  # Init reader
233
  indexes = c()
234
  track_readers = c()
235
  current_nuc = NULL
236
	llr_score = llr_thres + 1
237
  # Read nucs from TF outputs
238
  tf_outs = list()
239
	i = 1
240
  for (sample in samples) {
241
		tf_outs[[i]] = sample$outputs
242
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]
243
    indexes[i] = 1
244
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
245
      track_readers[i] = coord_max
246
	  } else {
247
      track_readers[i] = tf_outs[[i]][indexes[i],]$center
248
		}
249
		i = i+1
250
  }
251
	# print(track_readers)
252
  new_cluster = NULL
253
  nb_nucs_in_cluster = 0
254
  nuc_from_track = c()
255
  for (i in 1:length(tf_outs)){
256
    nuc_from_track[i] = FALSE
257
  }
258
  # Start clustering
259
  while (!end_of_tracks(track_readers)) {
260
    new_center = min(track_readers)
261
		current_track = which(track_readers == new_center)[1]
262
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
263
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
264
		new_nuc$inputs = samples[[current_track]]$inputs
265
		new_nuc$chr = samples[[current_track]]$roi$chr
266
		new_nuc$track = current_track
267

    
268
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width)
269
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
270
		new_nuc$original_reads = flatted_reads[[3]]
271

    
272
    new_upper_bound = new_nuc$upper_bound
273

    
274
    if (!is.null(current_nuc)) {
275
			llr_score = llr_score_nvecs(list(current_nuc$original_reads,new_nuc$original_reads))
276
			llr_scores = c(llr_scores,llr_score)
277
		}
278
		# print(paste(llr_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
279
		if (is.na(llr_score)) {
280
			llr_score = llr_thres + 1
281
		}
282
		# Store llr_score
283
		new_nuc$llr_score = llr_score
284
	  if (llr_score < llr_thres) {
285
      # aggregate to current cluster
286
      #   update bound
287
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
288
        new_cluster$upper_bound = new_nuc$upper_bound
289
      }
290
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
291
        new_cluster$lower_bound = new_nuc$lower_bound
292
      }
293
      #   add nucleosome to current cluster
294
      nuc_from_track[current_track] = TRUE
295
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
296
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
297
    } else {
298
			if (!is.null(new_cluster)) {
299
        # store old cluster
300
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
301
			}
302
      # Reinit current cluster composition stuff
303
      nb_nucs_in_cluster = 0
304
      nuc_from_track = c()
305
      for (i in 1:length(tf_outs)){
306
        nuc_from_track[i] = FALSE
307
      }
308
      # create new cluster
309
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
310
      # update upper bound
311
      current_upper_bound = new_upper_bound
312
      # add nucleosome to current cluster
313
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
314
      nuc_from_track[current_track] = TRUE
315
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
316

    
317
		}
318

    
319
		current_nuc = new_nuc
320

    
321
    # update indexes
322
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {
323
      indexes[current_track] = indexes[current_track] + 1
324
      # update track
325
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
326
    } else {
327
      # update track
328
      track_readers[current_track] = coord_max
329
    }
330
  }
331
  # store last cluster
332
  if (!is.null(new_cluster)) {
333
    # store old cluster
334
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
335
  }
336
	return(list(clusters, llr_scores))
337
### Returns a list of clusterized nucleosomes, and all computed llr scores.
338
}, ex=function(){
339
	# Dealing with a region of interest
340
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
341
	samples = list()
342
	for (i in 1:3) {
343
		# Create TF output
344
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
345
		outputs = dfadd(NULL,tf_nuc)
346
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
347
		# Generate corresponding reads
348
		nb_reads = round(runif(1,170,230))
349
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
350
		u_reads = sort(unique(reads))
351
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
352
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
353
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
354
		u_reads = u_reads + shifts
355
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
356
		                         "V2" = u_reads,
357
														 "V3" = strands,
358
														 "V4" = counts), stringsAsFactors=FALSE)
359
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
360
	}
361
	print(aggregate_intra_strain_nucs(samples))
362
})
363

    
364
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
365
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
366
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
367
cur_index ##<< the index of the roi involved
368
) {
369
	if  (length(partial_strain_maps) == 0 ){
370
		print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." ))
371
    tmp_strain_maps = list()
372
	} else {
373
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
374
			tmp_nuc = partial_strain_maps[[i]]
375
			tmp_nuc_as_list = list()
376
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
377
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
378
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
379
			tmp_nuc_as_list[["cur_index"]] = cur_index
380
			tmp_nuc_as_list[["index_nuc"]] = i
381
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
382
			all_original_reads = c()
383
			for (j in 1:length(tmp_nuc$nucs)) {
384
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
385
			}
386
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
387
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
388
			if (tmp_nuc$wp) {
389
				tmp_nuc_as_list[["llr_1"]] = signif(tmp_nuc$nucs[[2]]$llr_score,5)
390
				tmp_nuc_as_list[["llr_2"]] = signif(tmp_nuc$nucs[[3]]$llr_score,5)
391
			} else {
392
				tmp_nuc_as_list[["llr_1"]] = NA
393
				tmp_nuc_as_list[["llr_2"]] = NA
394
			}
395
      return(tmp_nuc_as_list)
396
    })
397
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
398
	}
399
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
400
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
401
}
402

    
403
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
404
### This function aligns nucleosomes between two strains for a given genome region.
405
replicates, ##<< Set of replicates, ideally 3 per strain.
406
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's NULL this list will be computed.
407
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's NULL this list will be computed.
408
corr_thres=0.5, ##<< Correlation threshold.
409
llr_thres=100, ##<< Log likelihood ratio threshold to decide between merging and separating
410
config=NULL, ##<< GLOBAL config variable
411
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
412
) {
413
	if (length(replicates) < 2) {
414
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
415
	} else if (length(replicates) > 2) {
416
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
417
	}
418
	common_nuc = NULL
419
	llr_scores = c()
420
	chr = replicates[[1]][[1]]$roi$chr
421
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
422
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
423
	strain_ref1 = replicates[[1]][[1]]$strain
424
	strain_ref2 = replicates[[2]][[1]]$strain
425
	big_cur = replicates[[1]][[1]]$roi
426
  orig_big_cur = replicates[[1]][[1]]$orig_roi
427
	if(big_cur$end - big_cur$begin < 0) {
428
		tmp_begin = big_cur$begin
429
		big_cur$begin =  big_cur$end
430
		big_cur$end =  tmp_begin
431
	}
432
	# GO!
433
	if (is.null(wp_nucs_strain_ref1)) {
434
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
435
	}
436
	if (is.null(wp_nucs_strain_ref2)) {
437
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
438
  }
439
	lws = c()
440
	ups = c()
441
	for (na in wp_nucs_strain_ref2) {
442
		lws = c(lws, na$lower_bound)
443
		ups = c(ups, na$upper_bound)
444
	}
445

    
446
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))
447
	roi_strain_ref1 = NULL
448
	roi_strain_ref2 = NULL
449
	if (length(wp_nucs_strain_ref1) > 0) {
450
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
451
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
452
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
453
			# Filtering on Well Positionned
454
			if (nuc_strain_ref1$wp) {
455
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
456
				roi_strain_ref2 = translate_cur(roi_strain_ref1, strain_ref2, big_cur=orig_big_cur, config=config)
457
        if (!is.null(roi_strain_ref2)){
458
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
459
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
460
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
461
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
462
					for (index_nuc_strain_ref2 in matching_nas) {
463
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
464
						# Filtering on Well Positionned
465
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)
466
						if (!is.null(translate_cur(nuc_strain_ref2_to_roi, strain_ref1, big_cur=orig_big_cur, config=config)) &
467
                nuc_strain_ref2$wp) {
468
							# Filtering on correlation Score and collecting reads
469
							SKIP = FALSE
470
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
471
							reads_strain_ref1 = c()
472
							for (nuc in nuc_strain_ref1$nucs){
473
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)
474
								if (nuc$corr < corr_thres) {
475
									SKIP = TRUE
476
								}
477
							}
478
							reads_strain_ref2 = c()
479
							for (nuc in nuc_strain_ref2$nucs){
480
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)
481
								if (nuc$corr < corr_thres) {
482
									SKIP = TRUE
483
								}
484
							}
485
							# Filtering on correlation Score
486
							if (!SKIP) {
487
								# tranlation of reads into strain 2 coords
488
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
489
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
490
								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
491
								llr_scores = c(llr_scores, llr_score)
492
								# Filtering on LLR Score
493
                if (llr_score < llr_thres) {
494
									tmp_nuc = list()
495
									# strain_ref1
496
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
497
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
498
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
499
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
500
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
501
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
502
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
503
									# strain_ref2
504
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
505
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
506
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
507
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
508
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
509
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
510
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
511
									# common
512
									tmp_nuc[["llr_score"]] = signif(llr_score,5)
513
                  tmp_nuc[["common_wp_pval"]] = signif(1-pchisq(2*tmp_nuc[["llr_score"]], df=2),5)
514
									tmp_nuc[["diff"]] = abs(abs((roi_strain_ref2$begin - roi_strain_ref2$end)) / 2 - abs((nuc_strain_ref2$lower_bound - nuc_strain_ref2$upper_bound)) / 2)
515
									common_nuc = dfadd(common_nuc, tmp_nuc)
516
                }
517
							}
518
						}
519
					}
520
				} else {
521
		      print("WARNING! No roi for strain ref 2.")
522
			  }
523
		  }
524
		}
525

    
526
    if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
527
      index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
528
      to_remove_list = c()
529
      for (i in 1:length(index_redundant)) {
530
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
531
          to_remove = index_redundant[i]
532
        }   else {
533
          to_remove = index_redundant[i] + 1
534
        }
535
        to_remove_list = c(to_remove_list, to_remove)
536
      }
537
      common_nuc = common_nuc[-to_remove_list,]
538
    }
539

    
540
    if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
541
      index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
542
      to_remove_list = c()
543
      for (i in 1:length(index_redundant)) {
544
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
545
          to_remove = index_redundant[i]
546
        }   else {
547
          to_remove = index_redundant[i] + 1
548
        }
549
        to_remove_list = c(to_remove_list, to_remove)
550
      }
551
      common_nuc = common_nuc[-to_remove_list,]
552
    }
553

    
554
		return(list(common_nuc, llr_scores))
555
	} else {
556
		print("WARNING, no nucs for strain_ref1.")
557
		return(NULL)
558
	}
559
### Returns a list of clusterized nucleosomes, and all computed llr scores.
560
}, ex=function(){
561

    
562
    # Define new translate_cur function...
563
    translate_cur = function(roi, strain2, big_cur=NULL, config=NULL) {
564
      return(roi)
565
    }
566
    # Binding it by uncomment follwing lines.
567
    unlockBinding("translate_cur", as.environment("package:nucleominer"))
568
    unlockBinding("translate_cur", getNamespace("nucleominer"))
569
    assign("translate_cur", translate_cur, "package:nucleominer")
570
    assign("translate_cur", translate_cur, getNamespace("nucleominer"))
571
    lockBinding("translate_cur", getNamespace("nucleominer"))
572
    lockBinding("translate_cur", as.environment("package:nucleominer"))
573

    
574
	# Dealing with a region of interest
575
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
576
	roi2 = translate_cur(roi, roi$strain_ref1)
577
	replicates = list()
578
	for (j in 1:2) {
579
		samples = list()
580
		for (i in 1:3) {
581
			# Create TF output
582
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
583
			outputs = dfadd(NULL,tf_nuc)
584
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
585
			# Generate corresponding reads
586
			nb_reads = round(runif(1,170,230))
587
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
588
			u_reads = sort(unique(reads))
589
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
590
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
591
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
592
			u_reads = u_reads + shifts
593
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
594
			                         "V2" = u_reads,
595
															 "V3" = strands,
596
															 "V4" = counts), stringsAsFactors=FALSE)
597
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
598
		}
599
		replicates[[length(replicates) + 1]] = samples
600
	}
601
	print(align_inter_strain_nucs(replicates))
602
})
603

    
604

    
605

    
606

    
607

    
608

    
609

    
610

    
611

    
612

    
613

    
614

    
615

    
616

    
617

    
618

    
619

    
620

    
621

    
622

    
623

    
624

    
625

    
626

    
627

    
628

    
629
fetch_mnase_replicates = function(# Prefetch data
630
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
631
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
632
roi, ##<< Region of interest.
633
all_samples, ##<< Global list of samples.
634
config=NULL, ##<< GLOBAL config variable
635
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
636
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
637
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
638
) {
639
	samples=list()
640
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
641
	for (i in samples_ids) {
642
		sample = as.list(all_samples[all_samples$id==i,])
643
    sample$orig_roi = roi
644
    sample$roi = translate_cur(roi, sample$strain, config = config)
645
		if (get_genome) {
646
			# Get Genome
647
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
648
		}
649
		# Get inputs
650
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
651
		sample$total_reads = sum(sample$inputs[,4])
652
		if (!only_fetch) {
653
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
654
	  }
655
	  # Get TF outputs for Mnase_Seq samples
656
		if (sample$marker == "Mnase_Seq" & get_ouputs) {
657
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
658
			if (!only_fetch) {
659
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
660
  		}
661
		}
662
		samples[[length(samples) + 1]] = sample
663
	}
664
  return(samples)
665
}
666

    
667
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
668
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
669
region1, ##<< Original regions.
670
region2 ##<< Regions to substract.
671
) {
672
  rec_substract_region = function(region1, region2) {
673
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
674
    cur_fuzzy = region1[i,]
675
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
676
    if (length(inter_wp[,1]) > 0) {
677
      ret = c()
678
      for (j in 1:length(inter_wp[,1])) {
679
        cur_wp = inter_wp[j,]
680
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
681
          # remove cur_fuzzy
682
          ret = c()
683
          break
684
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
685
          # crop fuzzy
686
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1
687
          ret = cur_fuzzy
688
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
689
          # crop fuzzy
690
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1
691
          ret = cur_fuzzy
692
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
693
          # split fuzzy
694
          tmp_ret_fuzzy_1 = cur_fuzzy
695
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1
696
          tmp_ret_fuzzy_2 = cur_fuzzy
697
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1
698
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
699
          # print(ret)
700
          # ret = cur_fuzzy
701
          break
702
        } else {
703
          stop("WARNING NO ADAPTED CASE!")
704
        }
705
      }
706
      return(ret)
707
    } else {
708
      return(cur_fuzzy)
709
    }
710
  })
711
  }
712
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
713
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
714
  tmp_ulist = unlist(non_inter_fuzzy)
715
  tmp_names = names(tmp_ulist)[1:4]
716
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE)
717
  names(non_inter_fuzzy) = tmp_names
718
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)
719
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)
720
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
721
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
722
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]
723
  return(non_inter_fuzzy)
724
}
725

    
726
union_regions = function(# Aggregate regions that intersect themselves.
727
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
728
regions ##<< The Regions to be aggregated
729
) {
730
  if (is.null(regions)) {return(regions)}
731
  if (nrow(regions) == 0) {return(regions)}
732
  old_length = length(regions[,1])
733
  new_length = 0
734

    
735
  while (old_length != new_length) {
736
    regions = regions[order(regions$lower_bound), ]
737
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE)
738
    vec_end_1 = which(regions$stop)
739
    if (length(vec_end_1) == 0) {
740
      vec_end_1 = c(length(regions$stop))
741
    }
742
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
743
      vec_end_1 = c(vec_end_1, length(regions$stop))
744
    }
745
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
746
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
747
      chr = regions$chr[vec_beg_1[i]]
748
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
749
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
750
      cur_index = regions$cur_index[vec_beg_1[i]]
751
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, cur_index=cur_index))
752
      })
753
    union = collapse_regions(union)
754
    old_length = length(regions[,1])
755
    new_length = length(union[,1])
756
    regions = union
757
  }
758
  return(union)
759
}
760

    
761
# remove_aligned_wp = function(# Remove wp nucs from common nucs list.
762
# ### It is based on common wp nucs index on nucs and region.
763
# strain_maps, ##<< Nuc maps.
764
# cur_index, ##<< The region of interest index.
765
# tmp_common_nucs, ##<< the list of wp nucs.
766
# strain##<< The strain to consider.
767
# ){
768
#   fuzzy_nucs = strain_maps[[strain]]
769
#   fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$cur_index == cur_index,]
770
#   fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
771
#   if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
772
#   if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
773
#   anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
774
#   fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
775
#   return(fuzzy_nucs)
776
# }
777

    
778
translate_regions = function(# Translate a list of regions from a strain ref to another.
779
### This function is an elaborated call to translate_cur.
780
regions, ##<< Regions to be translated.
781
combi, ##<< Combination of strains.
782
cur_index, ##<< The region of interest index.
783
config=NULL, ##<< GLOBAL config variable
784
roi ##<< The region of interest.
785
) {
786
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
787
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
788
    big_cur =  roi
789
    trs_tmp_regions_ref2 = translate_cur(tmp_regions_ref2, combi[1], config = config, big_cur = big_cur)
790
    if (is.null(trs_tmp_regions_ref2)) {
791
      return(NULL)
792
    } else {
793
      return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), cur_index=cur_index)))
794
    }
795
  })
796

    
797
  return(collapse_regions(tr_regions))
798
}
799

    
800
collapse_regions = function(# reformat an "apply  manipulated" list of regions
801
### Utils to reformat an "apply  manipulated" list of regions
802
regions ##< a list of regions
803
) {
804
  if (is.null(regions)) {
805
    return(NULL)
806
  } else {
807
    regions = do.call(rbind, regions)
808
    regions$chr = as.character(regions$chr)
809
    regions$chr = as.numeric(regions$chr)
810
    regions$lower_bound = as.numeric(regions$lower_bound)
811
    regions$upper_bound = as.numeric(regions$upper_bound)
812
    regions = regions[order(regions$lower_bound),]
813
    return(regions)
814
  }
815
}
816

    
817

    
818
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
819
### The fucntion is no more necessary since we remove "big_cur" bug in translate_cur function.
820
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
821
roi, ##<< The region of interest.
822
strain, ##<< The strain to consider.
823
config=NULL ##<< GLOBAL config variable
824
) {
825
  tr_roi = translate_cur(roi, strain, config = config)
826
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
827
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
828
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
829
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
830
  }
831
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
832
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
833
  }
834
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
835
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
836
  }
837
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
838
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
839
  }
840
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
841
  return(tmp_fuzzy_nucs)
842
}
843

    
844
get_all_reads = function(# Retrieve Reads
845
### Retrieve reads for a given marker, combi, form.
846
marker, ##<< The marker to considere.
847
combi, ##<< The starin combination to considere.
848
form="wp", ##<< The nuc form to considere.
849
config=NULL ##<< GLOBAL config variable
850
) {
851
	all_reads = NULL
852
  for (manip in c("Mnase_Seq", marker)) {
853
    if (form == "unr") {
854
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
855
  		tmp_res = read.table(file=out_filename, header=TRUE)
856
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
857
      tmp_res$form = form
858
    } else if (form == "wp") {
859
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
860
  		tmp_res = read.table(file=out_filename, header=TRUE)
861
      tmp_res$form = form
862
    } else if (form == "wpunr") {
863
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
864
  		tmp_res = read.table(file=out_filename, header=TRUE)
865
      tmp_res$form = "wp"
866
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
867
  		tmp_res2 = read.table(file=out_filename, header=TRUE)
868
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
869
      tmp_res2$form = "unr"
870
      tmp_res = rbind(tmp_res, tmp_res2)
871
    }
872
		if (is.null(all_reads)) {
873
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
874
		}
875
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
876
		all_reads = cbind(all_reads, tmp_res)
877
  }
878
  return(all_reads)
879
}
880

    
881
get_design = function(# Build the design for DESeq
882
### This function build the design according sample properties.
883
marker, ##<< The marker to considere.
884
combi, ##<< The starin combination to considere.
885
all_samples ##<< Global list of samples.
886
) {
887
  off1 = 0
888
  off2 = 0
889
	manips = c("Mnase_Seq", marker)
890
	design_rownames = c()
891
	design_manip = c()
892
	design_strain = c()
893
  off2index = function(off) {
894
  	switch(toString(off),
895
  		"1"=c(0,1,1),
896
  	  "2"=c(1,0,1),
897
    	"3"=c(1,1,0),
898
  		c(1,1,1)
899
  		)
900
  }
901
	for (manip in manips) {
902
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
903
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
904
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
905
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
906
		} else {
907
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
908
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
909
			}
910
		}
911
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))
912
		for (strain in combi) {
913
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
914
			design_strain = c(design_strain, rep(strain, length(cols)))
915
			design_rownames = c(design_rownames, cols)
916
		}
917
	}
918
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
919
	return(snep_design)
920
}
921

    
922
plot_dist_samples = function(# Plot the distribution of reads.
923
### This fuxntion use the DESeq nomalization feature to compare qualitatively the distribution.
924
strain, ##<< The strain to considere.
925
marker, ##<< The marker to considere.
926
res, ##<< Data
927
all_samples, ##<< Global list of samples.
928
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
929
) {
930
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
931
	snepCountTable = res[,cols]
932
	snepDesign = data.frame(
933
		row.names = cols,
934
		manip = rep(marker, length(cols)),
935
		strain = rep(strain, length(cols))
936
		)
937
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
938
	sizeFactors = estimateSizeFactors(cdsFull)
939
	# print(sizeFactors[[1]])
940
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
941
	if (NEWPLOT) {
942
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))
943
		NEWPLOT = FALSE
944
	}
945
	for (it in 1:length(sample_ids)) {
946
		sample_id = sample_ids[it]
947
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
948
	}
949
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
950
}
951

    
952
analyse_design = function(# Launch DESeq methods.
953
### This function is based on DESeq example. It normalizes data, fit data to GLM model with and without interaction term and compares the two models.
954
snep_design, ##<< The design to consider.
955
reads ##<< The data to consider.
956
) {
957
	snep_count_table = reads[, rownames(snep_design)]
958
	cdsFull = newCountDataSet(snep_count_table, snep_design)
959
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
960
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
961

    
962
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
963
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
964

    
965
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
966
	return(list(fit1, fit0, snep_design, pvalsGLM))
967
}
968

    
969

    
970

    
971

    
972

    
973

    
974

    
975

    
976
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
977
### This function uses
978
marker, ##<< The marker involved.
979
combi, ##<< The strain combination involved.
980
form, ##<< the nuc form involved.
981
all_samples, ##<< Global list of samples.
982
FDR = 0.0001, ## the specific False Discover Rate
983
config=NULL ##<< GLOBAL config variable
984
) {
985
  # PRETREAT
986
  snep_design = get_design(marker, combi, all_samples)
987
  reads = get_all_reads(marker, combi, form, config=config)
988
  # RUN ANALYSE
989
  tmp_analyse = analyse_design(snep_design, reads)
990
  # RESULTS
991
	fit1 = tmp_analyse[[1]]
992
	fit0 = tmp_analyse[[2]]
993
  k = names(fit1)
994
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
995
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
996
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
997
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
998
	snep_design = tmp_analyse[[3]]
999
  # print(snep_design)
1000
	thres = FDR(reads$pvalsGLM, FDR)
1001
	reads$snep_index = reads$pvalsGLM < thres
1002
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", FDR*100,"% of FDR.", sep = ""))
1003
  return(reads)
1004
  },  ex=function(){
1005
    marker = "H3K4me1"
1006
    combi = c("BY", "YJM")
1007
    form = "wpunr" # "wp" | "unr" | "wpunr"
1008
    # foo = get_sneps(marker, combi, form)
1009
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1010
})
1011

    
1012

    
1013

    
1014

    
1015

    
1016

    
1017

    
1018

    
1019

    
1020

    
1021

    
1022

    
1023

    
1024

    
1025

    
1026

    
1027

    
1028

    
1029

    
1030

    
1031

    
1032

    
1033

    
1034

    
1035
ROM2ARAB = function(# Roman to Arabic pair list.
1036
### Utility to convert Roman numbers into Arabic numbers
1037
){list(
1038
  "I" = 1,
1039
  "II" = 2,
1040
  "III" = 3,
1041
  "IV" = 4,
1042
  "V" = 5,
1043
  "VI" = 6,
1044
  "VII" = 7,
1045
  "VIII" = 8,
1046
  "IX" = 9,
1047
  "X" = 10,
1048
  "XI" = 11,
1049
  "XII" = 12,
1050
  "XIII" = 13,
1051
  "XIV" = 14,
1052
  "XV" = 15,
1053
  "XVI" = 16,
1054
  "XVII" = 17,
1055
  "XVIII" = 18,
1056
  "XIX" = 19,
1057
  "XX" = 20
1058
)}
1059

    
1060
switch_pairlist = structure(function(# Switch a pairlist
1061
### Take a pairlist key:value and return the switched pairlist value:key.
1062
l ##<< The pairlist to switch.
1063
) {
1064
	ret = list()
1065
	for (name in names(l)) {
1066
		ret[[as.character(l[[name]])]] = name
1067
	}
1068
	ret
1069
### The switched pairlist.
1070
}, ex=function(){
1071
	l = list(key1 = "value1", key2 = "value2")
1072
	print(switch_pairlist(l))
1073
})
1074

    
1075
ARAB2ROM = function(# Arabic to Roman pair list.
1076
### Utility to convert Arabic numbers to Roman numbers
1077
){switch_pairlist(ROM2ARAB())}
1078

    
1079

    
1080

    
1081

    
1082
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1083
### This fonction allows to access to a specific part of the c2c file.
1084
strain1, ##<< the key strain
1085
strain2, ##<< the target strain
1086
chr=NULL, ##<< if defined, the c2c will be filtered according to the chromosome value
1087
lower_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome upper than lower_bound
1088
upper_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome lower than upper_bound
1089
config=NULL##<<  GLOBAL config variable
1090
) {
1091
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1092
	# Launch c2c file
1093
	if (reverse) {
1094
		c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]
1095
	} else {
1096
		c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]
1097
	}
1098
	c2c = get_content(c2c_filename, "table", stringsAsFactors=FALSE)
1099
  # Filtering unagapped
1100
  c2c = c2c[c2c$V6=="-",]
1101
	# Reverse
1102
	if (reverse) {
1103
		tmp_col = c2c$V1
1104
		c2c$V1 = c2c$V7
1105
		c2c$V7 = tmp_col
1106
		tmp_col = c2c$V2
1107
		c2c$V2 = c2c$V9
1108
		c2c$V9 = tmp_col
1109
		tmp_col = c2c$V3
1110
		c2c$V3 = c2c$V10
1111
		c2c$V10 = tmp_col
1112
	}
1113
  if (!is.null(chr)) {
1114
  	if (strain1 == "BY") {
1115
  		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1116
  	} else if (strain1 == "RM") {
1117
  	  chro_1 = paste("supercontig_1.",chr,sep="")
1118
  	} else if (strain1 == "YJM") {
1119
  	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1120
  	}
1121
  	c2c = c2c[c2c$V1 == chro_1,]
1122
    if (!is.null(lower_bound)) {
1123
      if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound}
1124
      if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound}      
1125
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1126
    }
1127
    if (!is.null(upper_bound)) {
1128
      if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound}
1129
      if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound}      
1130
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1131
    }
1132
  }
1133
  return(c2c)
1134
# It retruns the appropriate c2c file part.
1135
}
1136

    
1137
translate_cur = structure(function(# Translate coords of a genome region.
1138
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1139
roi, ##<< Original genome region of interest.
1140
strain2, ##<< The strain in wich you want the genome region of interest.
1141
config=NULL, ##<< GLOBAL config variable
1142
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1143
) {
1144
	strain1 = roi$strain_ref
1145
	if (strain1 == strain2) {
1146
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1147
		return(roi)
1148
	}
1149

    
1150
	# Extract c2c file
1151
	if (!is.null(big_cur)) {
1152
  	# Dealing with big_cur
1153
		if (roi$strain_ref != big_cur$strain_ref) {
1154
      big_cur = translate_cur(big_cur, roi$strain_ref, config=config)
1155
    }
1156
    if (big_cur$end < big_cur$begin) {
1157
      tmp_var = big_cur$begin
1158
      big_cur$begin = big_cur$end
1159
      big_cur$end = tmp_var
1160
      big_cur$length = big_cur$end - big_cur$begin + 1
1161
    }
1162
    if (big_cur$chr!=roi$chr | roi$end > big_cur$end | roi$end < big_cur$begin | roi$begin > big_cur$end | roi$begin < big_cur$begin) {
1163
      print("WARNING! Trying to translate a roi not included in a big_cur.")
1164
      return(NULL)
1165
    }
1166
  	c2c = c2c_extraction(strain1, strain2, big_cur$chr, big_cur$begin, big_cur$end, config=config)
1167
  } else {
1168
    # No big_cur
1169
  	c2c = c2c_extraction(strain1, strain2, roi$chr, config=config)    
1170
  }
1171
  
1172
  #	Convert initial roi$chr into c2c format
1173
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1174
	begin_1 = roi$begin
1175
  end_1 = roi$end
1176
  if (reverse) {
1177
  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1178
    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1179
	} else {
1180
		tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1181
	  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1182
	}
1183

    
1184
	# Never happend conditions ...
1185
	{
1186
		if (length(tmptransfostart$V8) == 0) {
1187
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1188
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1189
  			tmp_c2c = c2c[c2c$V2>=begin_1,]
1190
  			begin_1 = min(tmp_c2c$V2)
1191
      } else {
1192
  			tmp_c2c = c2c[c2c$V3>=begin_1,]
1193
  			begin_1 = min(tmp_c2c$V3)
1194
      }
1195
			if (reverse) {
1196
		  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1197
			} else {
1198
				tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1199
			}
1200
			if (length(tmptransfostart$V8) == 0) {
1201
				if (!is.null(big_cur)) {
1202
					return(NULL)
1203
					tmptransfostart = c2c[c2c$V3>=big_cur$begin & c2c$V2<=big_cur$begin,]
1204
				} else {
1205
					print(tmptransfostart)
1206
					print(tmptransfostop)
1207
					stop("Never happend condition 1.")
1208
				}
1209
			}
1210
		}
1211
		if (length(tmptransfostop$V8) == 0) {
1212
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1213
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1214
  			tmp_c2c = c2c[c2c$V3<=end_1,]
1215
  			end_1 = max(tmp_c2c$V3)
1216
      } else {
1217
  			tmp_c2c = c2c[c2c$V2<=end_1,]
1218
  			end_1 = max(tmp_c2c$V2)
1219
      }
1220
			if (reverse) {
1221
		    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1222
			} else {
1223
			  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1224
			}
1225
			if (length(tmptransfostop$V8) == 0) {
1226
				if (!is.null(big_cur)) {
1227
					return(NULL)
1228
				  tmptransfostop = c2c[c2c$V3>=big_cur$end & c2c$V2<=big_cur$end,]
1229
				} else {
1230
					print(tmptransfostart)
1231
					print(tmptransfostop)
1232
					stop("Never happend condition 2.")
1233
				}
1234
			}
1235
		}
1236
		if (length(tmptransfostart$V8) != 1) {
1237
			# print("many start")
1238
			tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1239
			if (length(tmptransfostart$V8) != 1) {
1240
				print(tmptransfostart)
1241
				print(tmptransfostop)
1242
  			stop("Never happend condition 3.")
1243
			}
1244
		}
1245
		if (length(tmptransfostop$V8) != 1) {
1246
			# print("many stop")
1247
		  tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1248
			if (length(tmptransfostop$V8) != 1) {
1249
				print(tmptransfostart)
1250
				print(tmptransfostop)
1251
  			stop("Never happend condition 4.")
1252
			}
1253
		}
1254
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1255
			print(tmptransfostart)
1256
			print(tmptransfostop)
1257
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1258
		}
1259
	}
1260
  
1261
  # Deal with strand
1262
  if (tmptransfostart$V8 == 1) {
1263
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1264
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1265
  } else {
1266
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1267
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1268
  }
1269
	# Build returned roi
1270
	roi$strain_ref = strain2
1271
	if (roi$strain_ref == "BY") {
1272
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1273
	} else {
1274
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1275
	}
1276
  roi$begin = begin_2
1277
  roi$end = end_2
1278
	if (sign(roi$end - roi$begin) == 0) {
1279
		roi$length = 1
1280
	} else {
1281
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1282
	}
1283
  return(roi)
1284
}, ex=function(){
1285
	# Define new translate_cur function...
1286
	translate_cur = function(roi, strain2, config) {
1287
		strain1 = roi$strain_ref
1288
		if (strain1 == strain2) {
1289
			return(roi)
1290
		} else {
1291
		  stop("Here is my new translate_cur function...")
1292
		}
1293
	}
1294
	# Binding it by uncomment follwing lines.
1295
	# unlockBinding("translate_cur", as.environment("package:nm"))
1296
	# unlockBinding("translate_cur", getNamespace("nm"))
1297
	# assign("translate_cur", translate_cur, "package:nm")
1298
	# assign("translate_cur", translate_cur, getNamespace("nm"))
1299
	# lockBinding("translate_cur", getNamespace("nm"))
1300
	# lockBinding("translate_cur", as.environment("package:nm"))
1301
})
1302

    
1303

    
1304
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1305
### CURs are regions that can be aligned between the genomes
1306
diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
1307
min_cur_width = 4000, ##<< The minimum width of a CUR
1308
config = NULL ##<< GLOBAL config variable
1309
) {
1310

    
1311
  check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
1312
    c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1313
    check_homogeneity(c2c)
1314
  	if (length(c2c[,1]) == 0 ) {
1315
      stop("WARNING! checking overlapping for a region corresponding to an empty c2c.")
1316
    } else {
1317
  		lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)})
1318
  		upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)})
1319
  		tmp_index = order(lower_bounds)
1320
      lower_bounds = lower_bounds[tmp_index]
1321
      upper_bounds = upper_bounds[tmp_index]
1322
      tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)]
1323
      ov_index = which(tmp_diff < 0)
1324
      if(length(ov_index < 0) !=0 ) {
1325
        ov_index = ov_index[1]
1326
        print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep=""))
1327
        c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),]
1328
        print(c2c_corrupted)
1329
        return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1))
1330
      }
1331
      return(NULL)
1332
    }
1333
  }
1334

    
1335
  check_homogeneity = function(sub_c2c) {
1336
    tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3)
1337
    tmp_signs = tmp_signs[tmp_signs != 0]
1338
  	if (sum(tmp_signs[1]  != tmp_signs)) {
1339
  		print(paste("*************** ERROR, non homogenous region (sign)! ********************"))
1340
      print(tmp_signs)
1341
  	}
1342
    tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10)
1343
    tmp_signs2 = tmp_signs2[tmp_signs2 != 0]
1344
  	if (sum(tmp_signs2[1]  != tmp_signs2)) {
1345
  		print(paste("*************** ERROR, non homogenous region (sign2)! ********************"))
1346
      print(tmp_signs2)
1347
  	}
1348
  	if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1349
  		print("*************** ERROR, non homogenous region chrs or V8! ********************")
1350
  	}
1351
  }
1352

    
1353
  test_and_squeeze_rois = function(foo, config=NULL) {
1354
    is_it_ok = function(list1, list2) {
1355
      bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length))
1356
      ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0
1357
      if (!ok) {
1358
        print(bar[bar[,3] != 0 | bar[,6] != 0, ])
1359
      }
1360
      return (ok)
1361
    }
1362
    squeeze_rois = function(list1, list2) {
1363
      rois = apply(t(1:nrow(list1)), 2, function(i){
1364
        roi = list1[i,]
1365
        roi2 = list2[i,]
1366
        roi$begin = max(roi$begin, roi2$begin)
1367
        roi$end = min(roi$end, roi2$end)
1368
        roi$length =  roi$end - roi$begin + 1
1369
        return(roi)
1370
      })
1371
      return(do.call(rbind, rois))
1372
    }
1373
    # foo_orig = compute_inter_all_strain_curs2(config=config)
1374
    # foo = foo_orig
1375
    STOP = FALSE
1376
    nb_round = 0
1377
    while(!STOP) {
1378
      nb_round = nb_round + 1
1379
      print(paste("2-2 round #", nb_round, sep=""))
1380
      fooby = translate_curs(foo, "BY", config=config)
1381
      fooyjm = translate_curs(foo, "YJM", config=config)
1382
      fooyjmby = translate_curs(fooyjm, "BY", config=config)
1383
      if (!is_it_ok(fooby, fooyjmby)) {
1384
        print("case 1")
1385
        foo = squeeze_rois(fooby, fooyjmby)
1386
    		next
1387
      }
1388
      foorm = translate_curs(foo, "RM", config=config)
1389
      foormby = translate_curs(foorm, "BY", config=config)
1390
      if (!is_it_ok(fooby, foormby)) {
1391
        print("case 2")
1392
        foo = squeeze_rois(fooby, foormby)
1393
    		next
1394
      }
1395
      fooyjmrm = translate_curs(fooyjm, "RM", config=config)
1396
      fooyjmrmyjm = translate_curs(fooyjmrm, "YJM", config=config)
1397
      if (!is_it_ok(fooyjm, fooyjmrmyjm)) {
1398
        print("case 3")
1399
        foo = squeeze_rois(fooyjm, fooyjmrmyjm)
1400
        next
1401
      }
1402
      foormyjm = translate_curs(foorm, "YJM", config=config)
1403
      foormyjmrm = translate_curs(foormyjm, "RM", config=config)
1404
      if (!is_it_ok(foorm, foormyjmrm)) {
1405
        print("case 4")
1406
        foo = squeeze_rois(foorm, foormyjmrm)
1407
        next
1408
      }
1409
      foo = translate_curs(foo, "BY", config=config)
1410
      STOP = TRUE
1411
    }
1412
    STOP = FALSE
1413
    nb_round = 0
1414
    while(!STOP) {
1415
      nb_round = nb_round + 1
1416
      print(paste("3-3 round #", nb_round, sep=""))
1417
      fooby = translate_curs(foo, "BY", config=config)
1418
      foobyrm = translate_curs(fooby, "RM", config=config)
1419
      foobyrmyjm = translate_curs(foobyrm, "YJM", config=config)
1420
      foobyrmyjmby = translate_curs(foobyrmyjm, "BY", config=config)
1421
      if (!is_it_ok(fooby, foobyrmyjmby)) {
1422
        print("case 1")
1423
        foo = squeeze_rois(fooby, foobyrmyjmby)
1424
      }
1425
      fooby = translate_curs(foo, "BY", config=config)
1426
      foobyyjm = translate_curs(fooby, "YJM", config=config)
1427
      foobyyjmrm = translate_curs(foobyyjm, "RM", config=config)
1428
      foobyyjmrmby = translate_curs(foobyyjmrm, "BY", config=config)
1429
      if (!is_it_ok(fooby, foobyyjmrmby)) {
1430
        print("case 2")
1431
        foo = squeeze_rois(fooby, foobyyjmrmby)
1432
        next
1433
      }
1434
      foo = translate_curs(foo, "BY", config=config)
1435
      STOP = TRUE
1436
    }
1437
    print("end")
1438
    return(foo)
1439
  }
1440

    
1441
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) {
1442
    c2c = c2c_extraction(strain1, strain2, config=config)
1443
    # computing diffs
1444
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
1445
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
1446
    # Filtering
1447
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1448
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1449
    rois = apply(t(1:length(indexes_start)), 2, function(i) {
1450
      if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start)))
1451
      returned_rois = NULL
1452
  		start = indexes_start[i]
1453
  		stop = indexes_stop[i]
1454
  		sub_c2c = c2c[start:stop,]
1455
      check_homogeneity(sub_c2c)
1456
  		if (strain1 == "BY") {
1457
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1458
  		} else {
1459
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1460
  		}
1461
  		roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1)
1462
  		roi$length = roi$end - roi$begin + 1
1463
  		if (roi$length >= min_cur_width) {
1464
  			lower_bound = roi$begin
1465
  			upper_bound = roi$end
1466
        check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1467
        while(!is.null(check)) {
1468
          # print(check)
1469
      		roi1 = roi
1470
          roi1$end = check[[1]]
1471
      		roi1$length = roi1$end - roi1$begin + 1
1472
      		if (roi1$length >= min_cur_width) {
1473
            returned_rois = dfadd(returned_rois,roi1)
1474
          }
1475
          roi$begin = check[[2]]
1476
      		roi$length = roi$end - roi$begin + 1
1477
      		if (roi$length >= min_cur_width) {
1478
      			lower_bound = min(roi$begin, roi$end)
1479
      			upper_bound = max(roi$begin, roi$end)
1480
            check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1481
          } else {
1482
            check = NULL
1483
            roi = NULL
1484
          }
1485
        }
1486
        returned_rois = dfadd(returned_rois,roi)
1487
  	  }
1488
    })
1489
    rois = do.call(rbind,rois)
1490
    rois = rois[order(as.numeric(rois$chr), rois$begin), ]
1491
  	return(rois)
1492
  }
1493

    
1494
  translate_curs = function(rois, target_strain, config) {
1495
    tr_rois = apply(t(1:nrow(rois)), 2, function(i){
1496
      roi = rois[i,]
1497
      tr_roi = translate_cur(roi, target_strain, config=config)  
1498
      tmp_begin = min(tr_roi$begin, tr_roi$end)
1499
      tmp_end = max(tr_roi$begin, tr_roi$end)
1500
      tr_roi$begin = tmp_begin
1501
      tr_roi$end = tmp_end
1502
      tr_roi$length =  tr_roi$end - tr_roi$begin + 1
1503
      return(tr_roi)
1504
    })
1505
    tr_rois = do.call(rbind, tr_rois)
1506
    return(tr_rois)
1507
  }
1508

    
1509
  combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"))
1510
  rois = list()
1511
  for (combi in combis) {
1512
    strain1 = combi[1]
1513
    strain2 = combi[2]
1514
    print(paste(strain1, strain2))
1515
    rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1516
    strain1 = combi[2]
1517
    strain2 = combi[1]
1518
    print(paste(strain1, strain2))
1519
    rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1520
    tr_rois_rev = translate_curs(rois_rev, combi[1], config)      
1521
    region1 = rois_fwd
1522
    region2 = tr_rois_rev
1523
    rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev)
1524
  }
1525
  reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
1526
  reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
1527
  tr_reducted_1_rois = translate_curs(reducted_1_rois, "RM", config)  
1528
  reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]])
1529
  reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ]
1530
  reducted_rois = translate_curs(reducted_2_rois, "BY", config)  
1531
  reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
1532
  squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
1533
  return (squeezed_rois)
1534
}
1535

    
1536
intersect_region = function(# Returns the intersection of 2 list on regions.
1537
### This function...
1538
region1, ##<< Original regions.
1539
region2 ##<< Regions to intersect.
1540
) {
1541
  intersection = apply(t(1:nrow(region1)), 2, function(i) {
1542
    roi1 = region1[i, ]
1543
    sub_regions2 = region2[region2$chr == roi1$chr, ]
1544
    sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end |
1545
                                roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end |
1546
                                sub_regions2$begin < roi1$begin  & roi1$end < sub_regions2$end
1547
                                , ]
1548
    if (nrow(sub_regions2) == 0) {
1549
      print("removing a region")
1550
      return(NULL)
1551
    } else if (nrow(sub_regions2) > 1) {
1552
      print("more than one region in intersect_region")          
1553
      return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])})))
1554
    } else {
1555
      roi2 = sub_regions2[1,]
1556
      if (roi1$begin < roi2$begin) {
1557
        print("not the same begin")
1558
        roi1$begin = roi2$begin
1559
        roi1$length =  roi1$end - roi1$begin + 1
1560
      }
1561
      if (roi1$end > roi2$end) {
1562
        print("not the same end")
1563
        roi1$end = roi2$end
1564
        roi1$length =  roi1$end - roi1$begin + 1
1565
      }
1566
      return(roi1)
1567
    }         
1568
  })
1569
  return(do.call(rbind,intersection))
1570
}
1571

    
1572
build_replicates = structure(function(# Stage replicates data
1573
### This function loads in memory the data corresponding to the given experiments.
1574
expe, ##<< a list of vectors corresponding to replicates.
1575
roi, ##<< the region that we are interested in.
1576
only_fetch=FALSE, ##<< filter or not inputs.
1577
get_genome=FALSE,##<< Load or not corresponding genome.
1578
all_samples, ##<< Global list of samples.
1579
config=NULL ##<< GLOBAL config variable.
1580
) {
1581
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1582
  	samples=list()
1583
  	for (i in samples_ids) {
1584
  		sample = as.list(all_samples[all_samples$id==i,])
1585
      sample$orig_roi = roi
1586
      sample$roi = translate_cur(roi, sample$strain, config = config)
1587
  		if (get_genome) {
1588
  			# Get Genome
1589
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1590
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1591
  		}
1592
  		# Get inputs
1593
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1594
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE)
1595
  		sample$total_reads = sum(sample$inputs[,4])
1596
  		if (!only_fetch) {
1597
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
1598
  	  }
1599
  	  # Get TF outputs for Mnase_Seq samples
1600
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {
1601
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1602
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1603
  			if (!only_fetch) {
1604
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1605
    		}
1606
  		}
1607
  		samples[[length(samples) + 1]] = sample
1608
  	}
1609
  	return(samples)
1610
  }
1611
	replicates = list()
1612
	for(samples_ids in expe) {
1613
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1614
		replicates[[length(replicates) + 1]] = samples
1615
	}
1616
	return(replicates)
1617
  }, ex = function() {
1618
    # library(rjson)
1619
    # library(nucleominer)
1620
    #
1621
    # # Read config file
1622
    # json_conf_file = "nucleominer_config.json"
1623
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1624
    # # Read sample file
1625
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
1626
    # # here are the sample ids in a list
1627
    # expes = list(c(1))
1628
    # # here is the region that we wnt to see the coverage
1629
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY")
1630
    # # it displays the corverage
1631
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1632
    # out = watch_samples(replicates, config$READ_LENGTH,
1633
    #       plot_coverage = TRUE,
1634
    #       plot_squared_reads = FALSE,
1635
    #       plot_ref_genome = FALSE,
1636
    #       plot_arrow_raw_reads = FALSE,
1637
    #       plot_arrow_nuc_reads = FALSE,
1638
    #       plot_gaussian_reads = FALSE,
1639
    #       plot_gaussian_unified_reads = FALSE,
1640
    #       plot_ellipse_nucs = FALSE,
1641
    #       plot_wp_nucs = FALSE,
1642
    #       plot_wp_nuc_model = FALSE,
1643
    #       plot_common_nucs = FALSE,
1644
    #       height = 50)
1645
  })
1646

    
1647

    
1648

    
1649

    
1650

    
1651

    
1652

    
1653

    
1654

    
1655

    
1656

    
1657

    
1658

    
1659

    
1660

    
1661

    
1662

    
1663

    
1664

    
1665

    
1666

    
1667

    
1668

    
1669

    
1670

    
1671

    
1672

    
1673

    
1674

    
1675

    
1676

    
1677

    
1678

    
1679

    
1680

    
1681

    
1682

    
1683

    
1684

    
1685

    
1686

    
1687

    
1688

    
1689

    
1690

    
1691

    
1692

    
1693

    
1694

    
1695

    
1696

    
1697

    
1698

    
1699

    
1700

    
1701

    
1702

    
1703

    
1704

    
1705

    
1706

    
1707

    
1708

    
1709

    
1710

    
1711

    
1712

    
1713

    
1714

    
1715

    
1716

    
1717

    
1718

    
1719

    
1720

    
1721

    
1722

    
1723

    
1724

    
1725

    
1726

    
1727

    
1728

    
1729
watch_samples = function(# Watching analysis of samples
1730
### This function allows to view analysis for a particuler region of the genome.
1731
replicates, ##<< replicates under the form...
1732
read_length, ##<< length of the reads
1733
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
1734
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
1735
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
1736
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
1737
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
1738
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
1739
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
1740
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
1741
change_col = TRUE, ##<< Change the color of each nucleosome.
1742
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
1743
plot_fuzzy_nucs = TRUE,  ##<< Plot (or not) cluster of fuzzy
1744
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
1745
plot_common_nucs = FALSE,  ##<< Plot (or not) aligned reads.
1746
plot_common_unrs = FALSE,  ##<< Plot (or not) unaligned nucleosomal refgions (UNRs).
1747
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
1748
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
1749
plot_sample_id = FALSE, ##<<  Plot (or not) the sample id for each sample.
1750
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
1751
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
1752
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1753
main=NULL, ##<< main title of the produced plot
1754
xlab=NULL, ##<< xlab of the produced plot
1755
ylab="#reads (per million reads)", ##<< ylab of the produced plot
1756
config=NULL ##<< GLOBAL config variable
1757
){
1758
  returned_list = list()
1759
  # Computing global display parameters
1760
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
1761
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
1762
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
1763
  } else {
1764
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
1765
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
1766
  }
1767
	base_glo = 0
1768
	nb_rank_glo = 0
1769
  for (samples in replicates) {
1770
  	nb_rank_glo = nb_rank_glo + length(samples)
1771
  }
1772
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
1773
	y_min_glo = min(ylim_glo)
1774
	y_max_glo = max(ylim_glo)
1775
  delta_y_glo = y_max_glo - y_min_glo
1776
  # Plot main frame
1777
  if (is.null(xlab)) {
1778
    xlab = paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr)
1779
  }
1780
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab=ylab, xlab=xlab, main=main )
1781
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
1782
  # Go
1783
	replicates_wp_nucs = list()
1784
  wp_maps = list()
1785
  fuzzy_maps = list()
1786
  for (replicate_rank in 1:length(replicates)) {
1787
		# Computing replicate parameters
1788
		nb_rank = length(samples)
1789
		base = (replicate_rank-1) * height * nb_rank
1790
		ylim = c(base, base + height * nb_rank)
1791
		y_min = min(ylim)
1792
		y_max = max(ylim)
1793
	  delta_y = y_max - y_min
1794
		samples = replicates[[replicate_rank]]
1795
		for (sample_rank in 1:length(samples)) {
1796
			# computing sample parameters
1797
			sample = samples[[sample_rank]]
1798
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
1799
      if (plot_sample_id) {
1800
  			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
1801
      }
1802

    
1803
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
1804
			  x_min = sample$roi[["begin"]]
1805
			  x_max = sample$roi[["end"]]
1806
		  } else {
1807
			  x_min = - sample$roi[["begin"]]
1808
			  x_max = - sample$roi[["end"]]
1809
		  }
1810
			shift = x_min_glo - x_min
1811
	    # Plot Genome seq
1812
			if (plot_ref_genome) {
1813
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
1814
		  }
1815
			# Plot reads
1816
			reads = sample$inputs
1817
			signs = sign_from_strand(reads[,3])
1818
			if (plot_arrow_raw_reads) {
1819
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev,
1820
				col=1, length=0.15/nb_rank)
1821
			}
1822
	    if (plot_squared_reads) {
1823
        # require(plotrix)
1824
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
1825
			}
1826
	    if (plot_coverage) {
1827
        if (length(reads[,1]) != 0) {
1828
          step_h = sign(x_min) * signs * reads[,4]
1829
          step_b = sign(x_min) * reads[,2] + shift
1830
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
1831
          steps_x = min(step_b, step_e):max(step_b, step_e)
1832
          steps_y = rep(0, length(steps_x))
1833
          for (i in 1:length(step_h)) {
1834
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
1835
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
1836
          }
1837
          tmp_index = which(steps_y != 0)
1838
          steps_x = steps_x[tmp_index]
1839
          steps_y = steps_y[tmp_index]
1840
          tmp_current_level = 0
1841
          for (i in 1:length(steps_y)) {
1842
            steps_y[i] = tmp_current_level + steps_y[i]
1843
            tmp_current_level = steps_y[i]
1844
          }
1845
          steps_y = c(0, steps_y)
1846
          steps_y = steps_y * 1000000/sample$total_reads
1847
        } else {
1848
          steps_y = c(0, 0, 0)
1849
          steps_x = c(x_min, x_max)
1850
        }
1851
        # print(steps_x)
1852
        # print(steps_y)
1853
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
1854
        abline(y_lev,0)
1855
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
1856
			}
1857
			# Plot nucs
1858
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
1859
				nucs = sample$outputs
1860
				if (length(nucs$center) > 0) {
1861
					col = 1
1862
		      for (i in 1:length(nucs$center)) {
1863
            if (change_col) {
1864
  						col = col + 1
1865
              } else {
1866
                col = "blue"
1867
              }
1868
		        nuc = nucs[i,]
1869
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
1870
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
1871
						total_involved_reads = sum(involved_reads[,4])
1872
						if (plot_arrow_nuc_reads ) {
1873
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev,
1874
							col=col, length=0.15/nb_rank)
1875
						}
1876
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
1877
  						flatted_reads = flat_reads(involved_reads, nuc$width)
1878
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1879
		  			}
1880
	          if (plot_gaussian_reads ) {
1881
							flatted_reads = flat_reads(involved_reads, nuc$width)
1882
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1883
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
1884
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
1885
	          }
1886
	          if (plot_gaussian_unified_reads ) {
1887
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=1)
1888
	          }
1889
	          if (plot_ellipse_nucs) {
1890
				      # require(plotrix)
1891
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col)
1892
						}
1893
		      }
1894
		    } else {
1895
		      print("WARNING! No nucs to print.")
1896
		    }
1897
			}
1898
	  }
1899

    
1900
	  # Plot wp nucs
1901
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) {
1902
			if (samples[[1]]$marker == "Mnase_Seq") {
1903
				if (is.null(aggregated_intra_strain_nucs)) {
1904
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
1905
				} else {
1906
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
1907
				}
1908
		  } else {
1909
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
1910
		  }
1911
      if (plot_chain) {
1912
        tf_nucs = lapply(wp_nucs, function(nuc) {
1913
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
1914
            tmp_nuc = tmp_nuc[[1]]
1915
            tmp_nuc$inputs = NULL
1916
            tmp_nuc$original_reads = NULL
1917
            tmp_nuc$wp = nuc$wp
1918
            # print(tmp_nuc)
1919
            return(tmp_nuc)
1920
          })
1921
          return(do.call(rbind, bar))
1922
        })
1923
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
1924
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2
1925
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
1926
        tmp_y_prev = tmp_y[-length(tmp_y)]
1927
        tmp_y_next = tmp_y[-1]
1928
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
1929
        tmp_track = unlist(tf_nucs$track)
1930
        tmp_track_prev = tmp_track[-length(tmp_track)]
1931
        tmp_track_next = tmp_track[-1]
1932
        # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
1933
        if (is.null(config$TRACK_LLR_OFFSET)) {
1934
          config$TRACK_LLR_OFFSET = 0
1935
        }
1936
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LLR_OFFSET * 25
1937
        tmp_x_prev = tmp_x[-length(tmp_x)]
1938
        tmp_x_next = tmp_x[-1]
1939
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
1940
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
1941
        tmp_llr_inter =signif(unlist(tf_nucs$llr_score)[-1], 2)
1942
        new_tmp_x = c()
1943
        new_tmp_y = c()
1944
        index_odd = 1:length(tmp_x) * 2 - 1
1945
        index_even = (1:(length(tmp_x) - 1)) * 2
1946
        new_tmp_x[index_odd] = tmp_x
1947
        new_tmp_y[index_odd] = tmp_y
1948
        new_tmp_x[index_even] = tmp_x_inter
1949
        new_tmp_y[index_even] = tmp_y_inter
1950
        lines(new_tmp_x , new_tmp_y, lwd=2)
1951
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
1952
        points(tmp_x, tmp_y, cex=4, lwd=2)
1953
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
1954
        if (is.null(config$LEGEND_LLR_POS)) {
1955
          pos = 2
1956
        } else {
1957
          pos = config$LEGEND_LLR_POS
1958
        }
1959
        col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
1960
        text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) 
1961
        
1962
      }
1963

    
1964
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
1965
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
1966
        strain = samples[[1]]$strain
1967
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo")
1968
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
1969

    
1970
        if (plot_fuzzy_nucs) {
1971
          fuzzy_map = fuzzy_maps[[strain]]
1972
          if (!is.null(fuzzy_map)) {
1973
            if (nrow(fuzzy_map) > 0) {
1974
              rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1)                    
1975
            }
1976
          }
1977
        } 
1978

    
1979
        if (plot_wp_nucs) {
1980
    			for (wp_nuc in wp_nucs) {
1981
    				if (wp_nuc$wp){
1982
    					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
1983
    					if (plot_wp_nuc_model) {
1984
      					all_original_reads = c()
1985
      					for(initial_nuc in wp_nuc$nucs) {
1986
      						all_original_reads = c(all_original_reads, initial_nuc$original_reads)
1987
      					}
1988
      					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
1989
    					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
1990
    				  }
1991
  				  }
1992
  				}
1993
  			}
1994
      }
1995
		}
1996
	}
1997

    
1998
	if (plot_common_nucs) {
1999
    if (is.null(aligned_inter_strain_nucs)) {
2000
      aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2001
      if (!is.null(aligned_inter_strain_nucs)) {
2002
        aligned_inter_strain_nucs$cur_index = "foo" 
2003
      }
2004
    }
2005

    
2006
    #Plot common wp nucs
2007
    mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2008
    for (replicate_rank in 1:length(replicates)) {
2009
      nb_rank[[replicate_rank]] = length(samples)
2010
      base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2011
      ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2012
      y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2013
      y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2014
      delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2015
      mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2016

    
2017
      samples = replicates[[replicate_rank]]
2018
      for (sample_rank in 1:length(samples)) {
2019
        sample = samples[[sample_rank]]
2020
        y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2021
        if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2022
          x_min[[replicate_rank]] = sample$roi[["begin"]]
2023
          x_max[[replicate_rank]] = sample$roi[["end"]]
2024
        } else {
2025
          x_min[[replicate_rank]] = - sample$roi[["begin"]]
2026
          x_max[[replicate_rank]] = - sample$roi[["end"]]
2027
        }
2028
        shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2029
      }
2030
    }
2031
    print(aligned_inter_strain_nucs)
2032
    if (!is.null(aligned_inter_strain_nucs)) {
2033
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2034
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2035
        tmp_xs = tmp_ys = c()
2036
        for (replicate_rank in 1:length(replicates)) {
2037
          samples = replicates[[replicate_rank]]
2038
          strain = samples[[1]]$strain
2039
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2040
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2041
        }
2042
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]])*8, cex=dev.size()[1]*400/(x_max[[1]]-x_min[[1]]), pch=19)
2043
      }
2044
    }
2045
    
2046
    if (plot_common_unrs) {
2047
      combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain)
2048
      roi = as.list(samples[[1]]$roi)
2049
      cur_index = "foo"
2050
      common_nuc_results = list()
2051
      common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs
2052
      unrs = get_unrs(combi, roi, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) 
2053
      rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], border=4, lwd=10, col=adjustcolor(4, alpha.f = 0.05))        
2054
    }
2055

    
2056
	}
2057
  return(returned_list)
2058
}
2059

    
2060

    
2061

    
2062
get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain.
2063
### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions.
2064
wp_map, ##<< Well positionned nucleosomes map.
2065
roi, ##<< The region of interest.
2066
strain, ##<< The strain we want to extracvt the fuzzy map.
2067
config=NULL ##<< GLOBAL config variable.
2068
) {
2069
  fuzzy_map = wp_map[wp_map$wp==0, ]
2070
  if (nrow(fuzzy_map) > 0) {
2071
    fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,])
2072
    if (!is.null(fuzzy_map)) {
2073
      fuzzy_map = union_regions(fuzzy_map)
2074
      fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config)
2075
    }
2076
  }
2077
  return(fuzzy_map)
2078
}
2079

    
2080

    
2081

    
2082

    
2083

    
2084
get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs).
2085
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline.
2086
combi, ##<< The strain combination to consider.
2087
roi, ##<< The region of interest.
2088
cur_index, ##<< The region of interest index.
2089
wp_maps, ##<< Well positionned nucleosomes maps.
2090
fuzzy_maps, ##<< Fuzzy nucleosomes maps.
2091
common_nuc_results, ##<< Common wp nuc maps
2092
config=NULL ##<< GLOBAL config variable
2093
) {
2094
  # print(cur_index)
2095

    
2096
  tmp_combi_key = paste(combi[1], combi[2], sep="_")
2097
  tmp_common_nucs = common_nuc_results[[tmp_combi_key]]
2098
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$cur_index == cur_index, ]
2099

    
2100
  # print(paste("Dealing with unr from", combi[1]))
2101
  tmp_fuzzy = fuzzy_maps[[combi[1]]]
2102
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2103
  tmp_wp = wp_maps[[combi[1]]]
2104
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2105
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2106
  # Let's go!
2107
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2108
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2109
      return(NULL)
2110
    } else {
2111
      return (index_nuc)
2112
    }
2113
  }))
2114
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2115
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2116
  if (length(tmp_unr) != 0) {
2117
    tmp_unr = union_regions(tmp_unr)
2118
  }
2119
  tmp_unr_nucs_1 = tmp_unr
2120
  if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)}
2121
  agg_unr_1 = tmp_unr_nucs_1
2122

    
2123
  # print(paste("Dealing with unr from ", combi[2]))
2124
  tmp_fuzzy = fuzzy_maps[[combi[2]]]
2125
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2126
  tmp_wp = wp_maps[[combi[2]]]
2127
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2128
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2129
  # Let's go!
2130
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2131
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2132
      return(NULL)
2133
    } else {
2134
      return (index_nuc)
2135
    }
2136
  }))
2137
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2138
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2139
  if (length(tmp_unr) != 0) {
2140
    tmp_unr = union_regions(tmp_unr)
2141
  }
2142
  tmp_unr_nucs_2 = tmp_unr
2143
  if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)}
2144
  agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config)
2145
  tr_agg_unr_2 = translate_regions(agg_unr_2, combi, cur_index, roi=roi, config=config)
2146
  tr_agg_unr_2 = union_regions(tr_agg_unr_2)
2147

    
2148
  # print("Dealing with unr from both...")
2149
  all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2))
2150

    
2151
  # print(paste("Dealing with wp from", combi[1]))
2152
  tmp_wp = wp_maps[[combi[1]]]
2153
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2154
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2155
  # Let's go!
2156
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2157
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2158
      return (index_nuc)
2159
    } else {
2160
      return(NULL)
2161
    }
2162
  }))
2163
  wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2164
  
2165
  # print(paste("Dealing with wp from", combi[2]))
2166
  tmp_wp = wp_maps[[combi[2]]]
2167
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2168
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2169
  # Let's go!
2170
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2171
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2172
      return (index_nuc)
2173
    } else {
2174
      return(NULL)
2175
    }
2176
  }))
2177
  wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2178
  wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config)
2179
  if (nrow(wp_nucs_2) == 0) {
2180
    tr_wp_nucs_2 = wp_nucs_2
2181
  } else {
2182
    tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, cur_index, roi=roi, config=config)      
2183
  }
2184

    
2185
  # print("Dealing with wp from both...")
2186
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
2187

    
2188
  # print("Dealing with unr and wp...")
2189
  non_inter_unr = substract_region(all_unr, all_wp)
2190
  non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config)
2191
  if (is.null(non_inter_unr)) { return(NULL) }
2192
  non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound
2193
  min_unr_width = 75
2194
  non_inter_unr = non_inter_unr[non_inter_unr$len >= min_unr_width,]
2195

    
2196
  non_inter_unr$index_nuc = 1:length(non_inter_unr[,1])
2197
  return (non_inter_unr)
2198
}
2199

    
2200

    
2201