Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ 7e2d37e1

Historique | Voir | Annoter | Télécharger (87,37 ko)

1
# get_content = function(# Get content from cached
2
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
3
# obj, ##<< The object that we want its content.
4
# ... ##<< Parameters that will be passed to my_read
5
# ) {
6
#   UseMethod("get_content", obj)
7
#   ### Returns the cached content of the current object.
8
# }
9
#
10
# get_content.default = structure( function(# Get content from cached
11
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
12
# obj, ##<< The object that we want its content.
13
# ... ##<< Parameters that will be passed to my_read
14
# ) {
15
#   if (inherits(try(NM_CACHE,TRUE), "try-error") || is.null(NM_CACHE)) {
16
#     NM_CACHE <<- list()
17
#   }
18
#   if (is.null(NM_CACHE[[obj$filename]])) {
19
#     print(paste("Loading file ",obj$filename, sep=""))
20
#     tmp_content = my_read(obj, ...)
21
#     print("affect it...")
22
#     NM_CACHE[[obj$filename]] <<- tmp_content
23
#     print("done.")
24
#   }
25
#   return(NM_CACHE[[obj$filename]])
26
# ### Returns the cached content of the current object.
27
# }, ex=function(){
28
#   # Create a dataframe
29
#   df = NULL
30
#   df = dfadd(df, list(key1 = "value1", key2 = "value2"))
31
#   df = dfadd(df, list(key1 = "value1'", key2 = "value3'"))
32
#   # Dump it into tmp file
33
#   write.table(df, file="/tmp/tmp.dump.table.tmp")
34
#   # Load it into cache using feature.
35
#   cached_content_object  = list(filename="/tmp/tmp.dump.table.tmp")
36
#   class(cached_content_object) = "table"
37
#   # First time it will be load into cache
38
#   print(get_content(cached_content_object))
39
#   # Second time not
40
#   print(get_content(cached_content_object))
41
#
42
# })
43
#
44
# my_read = function(
45
# ### Abstract my_read function.
46
#   obj, ...) {
47
#   UseMethod("my_read", obj)
48
# }
49
#
50
# my_read.default = function(
51
# ### Default my_read function.
52
#   obj, ...){
53
#   stop(paste("ERROR, my_read is not defined for any Objects (file ", obj$filename," )", sep=""))
54
# }
55
#
56
# my_read.fasta = function(
57
# ### my_read function for fasta files.
58
#   obj, ...){
59
#   # require(seqinr)
60
#   return(read.fasta(obj$filename, ...))
61
# }
62
#
63
# my_read.table = function(
64
# ### my_read function for table files.
65
#   obj, ...){
66
#   if (rev(unlist(strsplit(obj$filename, ".", fixed=TRUE)))[1] == "gz") {
67
#     return(read.table(file=gzfile(obj$filename), ...))
68
#   } else {
69
#     return(read.table(file=obj$filename, ...))
70
#   }
71
# }
72
#
73
# my_read.cvs = function(
74
# ### my_read function for cvs files.
75
#   obj, ...){
76
#   return(read.csv(file=obj$filename, ...))
77
# }
78

    
79

    
80

    
81

    
82

    
83

    
84

    
85

    
86

    
87
FDR = structure(function#  False Discovery Rate
88
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
89
##author<< Gael Yvert,
90
(
91
x, ##<< A vector x of independent p-values.
92
FDR ##<< The specified FDR.
93
) {
94
  x <- sort(na.omit(x))
95
  N = length(x)
96
  i = 1;
97
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
98
  if (i == 1)
99
    return (NA)
100
  else
101
    return( x[i-1] )
102
### Return the the corresponding cutoff.
103
}, ex=function(){
104
  print("example")
105
})
106

    
107

    
108

    
109
llr_score_nvecs = structure(function # Likelihood ratio
110
### Compute the log likelihood ratio of two or more set of value.
111
(
112
  xs ##<< list of vectors.
113
) {
114
  l = length(xs)
115
  if (l < 1) {
116
    return(NA)
117
  }
118
  if (l == 1) {
119
    return(1)
120
  }
121
  sumllX = 0
122
  for (i in 1:l) {
123
    x = xs[[i]]
124
  	if (length(x) <= 1) {
125
  		return(NA)
126
  	}
127
    meanX = mean(x)
128
    sdX = sd(x)
129
    llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
130
    sumllX = sumllX + llX
131
  }
132
  meanXglo = mean(unlist(xs))
133
  sdXglo = sd(unlist(xs))
134
  llXYZ = sum(log(dnorm(unlist(xs),mean=meanXglo,sd=sdXglo)))
135
  ratio = sumllX - llXYZ
136
  return(ratio)
137
### Returns the log likelihood ratio.
138
}, ex=function(){
139
  # LOD score for 2 set of values
140
  mean1=5; sd1=2; card2 = 250
141
  mean2=6; sd2=3; card1 = 200
142
  x1 = rnorm(card1, mean1, sd1)
143
  x2 = rnorm(card2, mean2, sd2)
144
  min = floor(min(c(x1,x2)))
145
  max = ceiling(max(c(x1,x2)))
146
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
147
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
148
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
149
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
150
  llr_score_nvecs(list(x1,x2))
151
 })
152

    
153
dfadd = structure(function# Adding list to a dataframe.
154
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
155
	(df, ##<<  A dataframe
156
		l ##<<  A list
157
	) {
158
  if (is.null(df)) {
159
    df = data.frame(l,stringsAsFactors=FALSE)
160
  } else {
161
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
162
  }
163
  return(df)
164
### Return the dataframe \emph{df}.
165
}, ex=function(){
166
		## Here dataframe is NULL
167
		print(df)
168
		df = NULL
169

    
170
		# Initialize df
171
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
172
		print(df)
173

    
174
		# Adding elements to df
175
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
176
		print(df)
177
})
178

    
179

    
180
sign_from_strand = function(
181
### Get the sign of strand
182
strands) {
183
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
184
### If strand in forward then returns 1 else returns -1
185
}
186

    
187
flat_reads = function(
188
### Extract reads coordinates from TempleteFilter input sequence
189
reads, ##<< TemplateFilter input reads
190
nuc_width ##<< Width used to shift F and R reads.
191
) {
192
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
193
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
194
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
195
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
196
### Returns a list of F reads, R reads and joint/shifted F and R reads.
197
}
198

    
199

    
200

    
201
filter_tf_outputs = function(# Filter TemplateFilter outputs
202
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlap threshold.
203
tf_outputs, ##<< TemplateFilter outputs.
204
chr, ##<< Chromosome observed, here chr is an integer.
205
x_min, ##<< Coordinate of the first bp observed.
206
x_max, ##<< Coordinate of the last bp observed.
207
nuc_width = 160, ##<< Nucleosome width.
208
ol_bp = 59, ##<< Overlap Threshold.
209
corr_thres = 0.5 ##<< Correlation threshold.
210
) {
211
  if (x_min < 0) {
212
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= -x_max & tf_outputs$center + tf_outputs$width/2 <=  -x_min,]
213
	} else {
214
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= x_min & tf_outputs$center + tf_outputs$width/2 <= x_max,]
215
  }
216
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
217
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
218
  tf_outputs = tf_outputs[tf_outputs$correlation.score >= corr_thres,]
219
  tf_outputs = tf_outputs[order(tf_outputs$correlation.score, decreasing=TRUE),]
220
  i = 1
221
  while (i <= length(tf_outputs[,1])) {
222
    lb = tf_outputs[i,]$lower_bound
223
    ub = tf_outputs[i,]$upper_bound
224
    tf_outputs = tf_outputs[!(tf_outputs$lower_bound <= (ub-ol_bp) & tf_outputs$upper_bound > ub) & !(tf_outputs$upper_bound >= (lb+ol_bp) & tf_outputs$lower_bound < lb),]
225
    i = i+1
226
  }
227
  return(tf_outputs)
228
### Returns filtered TemplateFilter Outputs
229
}
230

    
231

    
232

    
233
filter_tf_inputs = function(# Filter TemplateFilter inputs
234
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
235
inputs, ##<< TF inputs to be filtered.
236
chr, ##<< Chromosome observed, here chr is an integer.
237
x_min, ##<< Coordinate of the first bp observed.
238
x_max, ##<< Coordinate of the last bp observed.
239
nuc_width = 160, ##<< Nucleosome width.
240
only_f = FALSE, ##<< Filter only F reads.
241
only_r = FALSE, ##<< Filter only R reads.
242
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
243
) {
244
	if (only_f) {
245
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
246
	} else if (only_r) {
247
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
248
	} else {
249
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
250
	}
251
  if (!filter_for_coverage) {
252
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
253
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
254
  }
255
	return(inputs)
256
### Returns filtred inputs.
257
}
258

    
259

    
260

    
261
get_comp_strand = function(
262
### Compute the complementatry strand.
263
strand ##<< The original strand.
264
) {
265
	apply(t(strand),2, function(n){
266
	  if (n=="a") {return("t")}
267
		if (n=="t") {return("a")}
268
		if (n=="c") {return("g")}
269
		if (n=="g") {return("c")}
270
	})
271
### Returns the complementatry strand.
272
}
273

    
274

    
275
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
276
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{llr_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
277
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
278
llr_thres=20, ##<< Log likelihood ration threshold.
279
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
280
){
281
	end_of_tracks = function(tracks) {
282
		if (length(tracks) == 0) {
283
			return(TRUE)
284
		}
285
	  for (lower_bound in tracks) {
286
			if(!is.na(lower_bound)) {
287
	      if (lower_bound < coord_max) {
288
	        return(FALSE)
289
	      }
290
	  	}
291
	  }
292
	  return(TRUE)
293
	}
294
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
295
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
296
			new_cluster$wp = TRUE
297
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
298
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
299
		  	clusters[[length(clusters) + 1]] = new_cluster
300
				# print(new_cluster)
301
		  }
302
		} else {
303
			new_cluster$wp = FALSE
304
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
305
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
306
			  clusters[[length(clusters) + 1]] = new_cluster
307
			}
308
		}
309
		return(clusters)
310
	}
311
	strain = samples[[1]]$strain
312
	llr_scores = c()
313
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
314
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
315
  # compute clusters
316
  clusters = list()
317
  cluster_contents = list()
318
  # Init reader
319
  indexes = c()
320
  track_readers = c()
321
  current_nuc = NULL
322
	llr_score = llr_thres + 1
323
  # Read nucs from TF outputs
324
  tf_outs = list()
325
	i = 1
326
  for (sample in samples) {
327
		# print(sample$roi$chr)
328
		# print(min_nuc_center)
329
		# print(max_nuc_center)
330
		# print(sample$outputs)
331
		# tf_outs[[i]] = filter_tf_outputs(sample$outputs, sample$roi$chr, min_nuc_center, max_nuc_center)
332
		# print(tf_outs[[i]])
333
		tf_outs[[i]] = sample$outputs
334
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]
335
    indexes[i] = 1
336
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
337
      track_readers[i] = coord_max
338
	  } else {
339
      track_readers[i] = tf_outs[[i]][indexes[i],]$center
340
		}
341
		i = i+1
342
  }
343
	# print(track_readers)
344
  new_cluster = NULL
345
  nb_nucs_in_cluster = 0
346
  nuc_from_track = c()
347
  for (i in 1:length(tf_outs)){
348
    nuc_from_track[i] = FALSE
349
  }
350
  # Start clustering
351
  while (!end_of_tracks(track_readers)) {
352
    new_center = min(track_readers)
353
		current_track = which(track_readers == new_center)[1]
354
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
355
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
356
		new_nuc$inputs = samples[[current_track]]$inputs
357
		new_nuc$chr = samples[[current_track]]$roi$chr
358
		new_nuc$track = current_track
359

    
360
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width)
361
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
362
		new_nuc$original_reads = flatted_reads[[3]]
363

    
364
    new_upper_bound = new_nuc$upper_bound
365

    
366
    if (!is.null(current_nuc)) {
367
			llr_score = llr_score_nvecs(list(current_nuc$original_reads,new_nuc$original_reads))
368
			llr_scores = c(llr_scores,llr_score)
369
		}
370
		# print(paste(llr_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
371
		if (is.na(llr_score)) {
372
			llr_score = llr_thres + 1
373
		}
374
		# Store llr_score
375
		new_nuc$llr_score = llr_score
376
	  if (llr_score < llr_thres) {
377
      # aggregate to current cluster
378
      #   update bound
379
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
380
        new_cluster$upper_bound = new_nuc$upper_bound
381
      }
382
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
383
        new_cluster$lower_bound = new_nuc$lower_bound
384
      }
385
      #   add nucleosome to current cluster
386
      nuc_from_track[current_track] = TRUE
387
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
388
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
389
    } else {
390
			if (!is.null(new_cluster)) {
391
        # store old cluster
392
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
393
			}
394
      # Reinit current cluster composition stuff
395
      nb_nucs_in_cluster = 0
396
      nuc_from_track = c()
397
      for (i in 1:length(tf_outs)){
398
        nuc_from_track[i] = FALSE
399
      }
400
      # create new cluster
401
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
402
      # update upper bound
403
      current_upper_bound = new_upper_bound
404
      # add nucleosome to current cluster
405
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
406
      nuc_from_track[current_track] = TRUE
407
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
408

    
409
		}
410

    
411
		current_nuc = new_nuc
412

    
413
    # update indexes
414
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {
415
      indexes[current_track] = indexes[current_track] + 1
416
      # update track
417
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
418
    } else {
419
      # update track
420
      track_readers[current_track] = coord_max
421
    }
422
  }
423
  # store last cluster
424
  if (!is.null(new_cluster)) {
425
    # store old cluster
426
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
427
  }
428
	return(list(clusters, llr_scores))
429
### Returns a list of clusterized nucleosomes, and all computed llr scores.
430
}, ex=function(){
431
	# Dealing with a region of interest
432
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
433
	samples = list()
434
	for (i in 1:3) {
435
		# Create TF output
436
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
437
		outputs = dfadd(NULL,tf_nuc)
438
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
439
		# Generate corresponding reads
440
		nb_reads = round(runif(1,170,230))
441
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
442
		u_reads = sort(unique(reads))
443
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
444
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
445
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
446
		u_reads = u_reads + shifts
447
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
448
		                         "V2" = u_reads,
449
														 "V3" = strands,
450
														 "V4" = counts), stringsAsFactors=FALSE)
451
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
452
	}
453
	print(aggregate_intra_strain_nucs(samples))
454
})
455

    
456
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
457
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
458
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
459
cur_index ##<< the index of the roi involved
460
) {
461
	if  (length(partial_strain_maps) == 0 ){
462
		print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." ))
463
    tmp_strain_maps = list()
464
	} else {
465
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
466
			tmp_nuc = partial_strain_maps[[i]]
467
			tmp_nuc_as_list = list()
468
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
469
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
470
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
471
			tmp_nuc_as_list[["cur_index"]] = cur_index
472
			tmp_nuc_as_list[["index_nuc"]] = i
473
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
474
			all_original_reads = c()
475
			for (j in 1:length(tmp_nuc$nucs)) {
476
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
477
			}
478
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
479
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
480
			if (tmp_nuc$wp) {
481
				tmp_nuc_as_list[["llr_1"]] = signif(tmp_nuc$nucs[[2]]$llr_score,5)
482
				tmp_nuc_as_list[["llr_2"]] = signif(tmp_nuc$nucs[[3]]$llr_score,5)
483
			} else {
484
				tmp_nuc_as_list[["llr_1"]] = NA
485
				tmp_nuc_as_list[["llr_2"]] = NA
486
			}
487
      return(tmp_nuc_as_list)
488
    })
489
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
490
	}
491
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
492
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
493
}
494

    
495
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
496
### This function aligns nucs between two strains for a given genome region.
497
replicates, ##<< Set of replicates, ideally 3 per strain.
498
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
499
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
500
corr_thres=0.5, ##<< Correlation threshold.
501
llr_thres=100, ##<< LOD cut off.
502
config=NULL, ##<< GLOBAL config variable
503
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
504
) {
505
	if (length(replicates) < 2) {
506
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
507
	} else if (length(replicates) > 2) {
508
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
509
	}
510
	common_nuc = NULL
511
	llr_scores = c()
512
	chr = replicates[[1]][[1]]$roi$chr
513
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
514
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
515
	strain_ref1 = replicates[[1]][[1]]$strain
516
	strain_ref2 = replicates[[2]][[1]]$strain
517
	big_cur = replicates[[1]][[1]]$roi
518
  orig_big_cur = replicates[[1]][[1]]$orig_roi
519
	if(big_cur$end - big_cur$begin < 0) {
520
		tmp_begin = big_cur$begin
521
		big_cur$begin =  big_cur$end
522
		big_cur$end =  tmp_begin
523
	}
524
	# GO!
525
	if (is.null(wp_nucs_strain_ref1)) {
526
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
527
	}
528
	if (is.null(wp_nucs_strain_ref2)) {
529
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
530
  }
531
	lws = c()
532
	ups = c()
533
	for (na in wp_nucs_strain_ref2) {
534
		lws = c(lws, na$lower_bound)
535
		ups = c(ups, na$upper_bound)
536
	}
537

    
538
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))
539
	roi_strain_ref1 = NULL
540
	roi_strain_ref2 = NULL
541
	if (length(wp_nucs_strain_ref1) > 0) {
542
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
543
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
544
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
545
			# Filtering on Well Positionned
546
			if (nuc_strain_ref1$wp) {
547
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
548
				roi_strain_ref2 = translate_cur(roi_strain_ref1, strain_ref2, big_cur=orig_big_cur, config=config)
549
        if (!is.null(roi_strain_ref2)){
550
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
551
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
552
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
553
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
554
					for (index_nuc_strain_ref2 in matching_nas) {
555
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
556
						# Filtering on Well Positionned
557
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)
558
						if (!is.null(translate_cur(nuc_strain_ref2_to_roi, strain_ref1, big_cur=orig_big_cur, config=config)) &
559
                nuc_strain_ref2$wp) {
560
							# Filtering on correlation Score and collecting reads
561
							SKIP = FALSE
562
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
563
							reads_strain_ref1 = c()
564
							for (nuc in nuc_strain_ref1$nucs){
565
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)
566
								if (nuc$corr < corr_thres) {
567
									SKIP = TRUE
568
								}
569
							}
570
							reads_strain_ref2 = c()
571
							for (nuc in nuc_strain_ref2$nucs){
572
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)
573
								if (nuc$corr < corr_thres) {
574
									SKIP = TRUE
575
								}
576
							}
577
							# Filtering on correlation Score
578
							if (!SKIP) {
579
								# tranlation of reads into strain 2 coords
580
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
581
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
582
								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
583
								llr_scores = c(llr_scores, llr_score)
584
								# Filtering on LOD Score
585
								if (llr_score < llr_thres) {
586
									tmp_nuc = list()
587
									# strain_ref1
588
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
589
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
590
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
591
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
592
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
593
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
594
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
595
									# tmp_nuc[[paste("corr1_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[1]]$corr,5)
596
									# tmp_nuc[[paste("corr2_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[2]]$corr,5)
597
									# tmp_nuc[[paste("corr3_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[3]]$corr,5)
598
									# strain_ref2
599
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
600
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
601
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
602
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
603
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
604
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
605
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
606
									# tmp_nuc[[paste("corr1_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[1]]$corr,5)
607
									# tmp_nuc[[paste("corr2_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[2]]$corr,5)
608
									# tmp_nuc[[paste("corr3_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[3]]$corr,5)
609
									# common
610
									tmp_nuc[["llr_score"]] = signif(llr_score,5)
611
									# print(tmp_nuc)
612
									common_nuc = dfadd(common_nuc, tmp_nuc)
613
								}
614
							}
615
						}
616
					}
617
				} else {
618
		      print("WARNING! No roi for strain ref 2.")
619
			  }
620
		  }
621
		}
622

    
623
		if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
624
			index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
625
			to_remove_list = c()
626
			for (i in 1:length(index_redundant)) {
627
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
628
				  to_remove = index_redundant[i]
629
				}	 else {
630
					to_remove = index_redundant[i] + 1
631
			  }
632
				to_remove_list = c(to_remove_list, to_remove)
633
			}
634
			common_nuc = common_nuc[-to_remove_list,]
635
		}
636

    
637
		if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
638
			index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
639
			to_remove_list = c()
640
			for (i in 1:length(index_redundant)) {
641
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
642
				  to_remove = index_redundant[i]
643
				}	 else {
644
					to_remove = index_redundant[i] + 1
645
			  }
646
				to_remove_list = c(to_remove_list, to_remove)
647
			}
648
			common_nuc = common_nuc[-to_remove_list,]
649
		}
650

    
651
		return(list(common_nuc, llr_scores))
652
	} else {
653
		print("WARNING, no nucs for strain_ref1.")
654
		return(NULL)
655
	}
656
### Returns a list of clusterized nucleosomes, and all computed llr scores.
657
}, ex=function(){
658

    
659
    # Define new translate_cur function...
660
    translate_cur = function(roi, strain2, big_cur=NULL, config=NULL) {
661
      return(roi)
662
    }
663
    # Binding it by uncomment follwing lines.
664
    unlockBinding("translate_cur", as.environment("package:nucleominer"))
665
    unlockBinding("translate_cur", getNamespace("nucleominer"))
666
    assign("translate_cur", translate_cur, "package:nucleominer")
667
    assign("translate_cur", translate_cur, getNamespace("nucleominer"))
668
    lockBinding("translate_cur", getNamespace("nucleominer"))
669
    lockBinding("translate_cur", as.environment("package:nucleominer"))
670

    
671
	# Dealing with a region of interest
672
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
673
	roi2 = translate_cur(roi, roi$strain_ref1)
674
	replicates = list()
675
	for (j in 1:2) {
676
		samples = list()
677
		for (i in 1:3) {
678
			# Create TF output
679
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
680
			outputs = dfadd(NULL,tf_nuc)
681
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
682
			# Generate corresponding reads
683
			nb_reads = round(runif(1,170,230))
684
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
685
			u_reads = sort(unique(reads))
686
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
687
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
688
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
689
			u_reads = u_reads + shifts
690
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
691
			                         "V2" = u_reads,
692
															 "V3" = strands,
693
															 "V4" = counts), stringsAsFactors=FALSE)
694
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
695
		}
696
		replicates[[length(replicates) + 1]] = samples
697
	}
698
	print(align_inter_strain_nucs(replicates))
699
})
700

    
701

    
702

    
703

    
704

    
705

    
706

    
707

    
708

    
709

    
710

    
711

    
712

    
713

    
714

    
715

    
716

    
717

    
718

    
719

    
720

    
721

    
722

    
723

    
724

    
725

    
726
fetch_mnase_replicates = function(# Prefetch data
727
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
728
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
729
roi, ##<< Region of interest.
730
all_samples, ##<< Global list of samples.
731
config=NULL, ##<< GLOBAL config variable
732
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
733
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
734
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
735
) {
736
	samples=list()
737
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
738
	for (i in samples_ids) {
739
		sample = as.list(all_samples[all_samples$id==i,])
740
    sample$orig_roi = roi
741
    sample$roi = translate_cur(roi, sample$strain, config = config)
742
		if (get_genome) {
743
			# Get Genome
744
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
745
		}
746
		# Get inputs
747
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
748
		sample$total_reads = sum(sample$inputs[,4])
749
		if (!only_fetch) {
750
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
751
	  }
752
	  # Get TF outputs for Mnase_Seq samples
753
		if (sample$marker == "Mnase_Seq" & get_ouputs) {
754
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
755
			if (!only_fetch) {
756
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
757
  		}
758
		}
759
		samples[[length(samples) + 1]] = sample
760
	}
761
  return(samples)
762
}
763

    
764
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
765
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
766
region1, ##<< Original regions.
767
region2 ##<< Regions to substract.
768
) {
769
  rec_substract_region = function(region1, region2) {
770
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
771
    cur_fuzzy = region1[i,]
772
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
773
    if (length(inter_wp[,1]) > 0) {
774
      ret = c()
775
      for (j in 1:length(inter_wp[,1])) {
776
        cur_wp = inter_wp[j,]
777
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
778
          # remove cur_fuzzy
779
          ret = c()
780
          break
781
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
782
          # crop fuzzy
783
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1
784
          ret = cur_fuzzy
785
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
786
          # crop fuzzy
787
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1
788
          ret = cur_fuzzy
789
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
790
          # split fuzzy
791
          tmp_ret_fuzzy_1 = cur_fuzzy
792
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1
793
          tmp_ret_fuzzy_2 = cur_fuzzy
794
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1
795
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
796
          # print(ret)
797
          # ret = cur_fuzzy
798
          break
799
        } else {
800
          stop("WARNING NO ADAPTED CASE!")
801
        }
802
      }
803
      return(ret)
804
    } else {
805
      return(cur_fuzzy)
806
    }
807
  })
808
  }
809
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
810
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
811
  tmp_ulist = unlist(non_inter_fuzzy)
812
  tmp_names = names(tmp_ulist)[1:4]
813
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE)
814
  names(non_inter_fuzzy) = tmp_names
815
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)
816
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)
817
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
818
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
819
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]
820
  return(non_inter_fuzzy)
821
}
822

    
823
union_regions = function(# Aggregate regions that intersect themnselves.
824
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
825
regions ##<< The Regions to be aggregated
826
) {
827
  if (is.null(regions)) {return(regions)}
828
  if (nrow(regions) == 0) {return(regions)}
829
  old_length = length(regions[,1])
830
  new_length = 0
831

    
832
  while (old_length != new_length) {
833
    regions = regions[order(regions$lower_bound), ]
834
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE)
835
    vec_end_1 = which(regions$stop)
836
    if (length(vec_end_1) == 0) {
837
      vec_end_1 = c(length(regions$stop))
838
    }
839
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
840
      vec_end_1 = c(vec_end_1, length(regions$stop))
841
    }
842
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
843
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
844
      chr = regions$chr[vec_beg_1[i]]
845
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
846
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
847
      cur_index = regions$cur_index[vec_beg_1[i]]
848
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, cur_index=cur_index))
849
      })
850
    union = collapse_regions(union)
851
    old_length = length(regions[,1])
852
    new_length = length(union[,1])
853
    regions = union
854
  }
855
  return(union)
856
}
857

    
858
# remove_aligned_wp = function(# Remove wp nucs from common nucs list.
859
# ### It is based on common wp nucs index on nucs and region.
860
# strain_maps, ##<< Nuc maps.
861
# cur_index, ##<< The region of interest index.
862
# tmp_common_nucs, ##<< the list of wp nucs.
863
# strain##<< The strain to consider.
864
# ){
865
#   fuzzy_nucs = strain_maps[[strain]]
866
#   fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$cur_index == cur_index,]
867
#   fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
868
#   if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
869
#   if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
870
#   anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
871
#   fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
872
#   return(fuzzy_nucs)
873
# }
874

    
875
translate_regions = function(# Translate a list of regions from a strain ref to another.
876
### This function is an eloborated call to translate_cur.
877
regions, ##<< Regions to be translated.
878
combi, ##<< Combination of strains.
879
cur_index, ##<< The region of interest index.
880
config=NULL, ##<< GLOBAL config variable
881
roi ##<< The region of interest.
882
) {
883
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
884
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
885
    big_cur =  roi
886
    trs_tmp_regions_ref2 = translate_cur(tmp_regions_ref2, combi[1], config = config, big_cur = big_cur)
887
    if (is.null(trs_tmp_regions_ref2)) {
888
      return(NULL)
889
    } else {
890
      return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), cur_index=cur_index)))
891
    }
892
  })
893

    
894
  return(collapse_regions(tr_regions))
895
}
896

    
897
collapse_regions = function(# reformat an "apply  manipulated" list of regions
898
### Utils to reformat an "apply  manipulated" list of regions
899
regions ##< a list of regions
900
) {
901
  if (is.null(regions)) {
902
    return(NULL)
903
  } else {
904
    regions = do.call(rbind, regions)
905
    regions$chr = as.character(regions$chr)
906
    regions$chr = as.numeric(regions$chr)
907
    regions$lower_bound = as.numeric(regions$lower_bound)
908
    regions$upper_bound = as.numeric(regions$upper_bound)
909
    regions = regions[order(regions$lower_bound),]
910
    return(regions)
911
  }
912
}
913

    
914

    
915
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
916
### The fucntion is no more necessary since we remove "big_cur" bug in translate_cur function.
917
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
918
roi, ##<< The region of interest.
919
strain, ##<< The strain to consider.
920
config=NULL ##<< GLOBAL config variable
921
) {
922
  tr_roi = translate_cur(roi, strain, config = config)
923
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
924
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
925
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
926
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
927
  }
928
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
929
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
930
  }
931
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
932
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
933
  }
934
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
935
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
936
  }
937
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
938
  return(tmp_fuzzy_nucs)
939
}
940

    
941
get_all_reads = function(# Retrieve Reads
942
### Retrieve reads for a given marker, combi, form.
943
marker, ##<< The marker to considere.
944
combi, ##<< The starin combination to considere.
945
form="wp", ##<< The nuc form to considere.
946
config=NULL ##<< GLOBAL config variable
947
) {
948
	all_reads = NULL
949
  for (manip in c("Mnase_Seq", marker)) {
950
    if (form == "unr") {
951
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
952
  		tmp_res = read.table(file=out_filename, header=TRUE)
953
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
954
      tmp_res$form = form
955
    } else if (form == "wp") {
956
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
957
  		tmp_res = read.table(file=out_filename, header=TRUE)
958
      tmp_res$form = form
959
    } else if (form == "wpunr") {
960
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
961
  		tmp_res = read.table(file=out_filename, header=TRUE)
962
      tmp_res$form = "wp"
963
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
964
  		tmp_res2 = read.table(file=out_filename, header=TRUE)
965
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
966
      tmp_res2$form = "unr"
967
      tmp_res = rbind(tmp_res, tmp_res2)
968
    }
969
		if (is.null(all_reads)) {
970
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
971
		}
972
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
973
		all_reads = cbind(all_reads, tmp_res)
974
  }
975
  return(all_reads)
976
}
977

    
978
get_design = function(# Build the design for deseq
979
### This function build the design according sample properties.
980
marker, ##<< The marker to considere.
981
combi, ##<< The starin combination to considere.
982
all_samples ##<< Global list of samples.
983
) {
984
  off1 = 0
985
  off2 = 0
986
	manips = c("Mnase_Seq", marker)
987
	design_rownames = c()
988
	design_manip = c()
989
	design_strain = c()
990
  off2index = function(off) {
991
  	switch(toString(off),
992
  		"1"=c(0,1,1),
993
  	  "2"=c(1,0,1),
994
    	"3"=c(1,1,0),
995
  		c(1,1,1)
996
  		)
997
  }
998
	for (manip in manips) {
999
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
1000
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
1001
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
1002
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
1003
		} else {
1004
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
1005
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
1006
			}
1007
		}
1008
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))
1009
		for (strain in combi) {
1010
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
1011
			design_strain = c(design_strain, rep(strain, length(cols)))
1012
			design_rownames = c(design_rownames, cols)
1013
		}
1014
	}
1015
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
1016
	return(snep_design)
1017
}
1018

    
1019
plot_dist_samples = function(# Plot the distribution of reads.
1020
### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
1021
strain, ##<< The strain to considere.
1022
marker, ##<< The marker to considere.
1023
res, ##<< Data
1024
all_samples, ##<< Global list of samples.
1025
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
1026
) {
1027
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
1028
	snepCountTable = res[,cols]
1029
	snepDesign = data.frame(
1030
		row.names = cols,
1031
		manip = rep(marker, length(cols)),
1032
		strain = rep(strain, length(cols))
1033
		)
1034
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
1035
	sizeFactors = estimateSizeFactors(cdsFull)
1036
	# print(sizeFactors[[1]])
1037
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
1038
	if (NEWPLOT) {
1039
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))
1040
		NEWPLOT = FALSE
1041
	}
1042
	for (it in 1:length(sample_ids)) {
1043
		sample_id = sample_ids[it]
1044
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1045
	}
1046
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1047
}
1048

    
1049
analyse_design = function(# Launch deseq methods.
1050
### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
1051
snep_design, ##<< The design to considere.
1052
reads ##<< The data to considere.
1053
) {
1054
	snep_count_table = reads[, rownames(snep_design)]
1055
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1056
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1057
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1058

    
1059
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1060
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1061

    
1062
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1063
	return(list(fit1, fit0, snep_design, pvalsGLM))
1064
}
1065

    
1066

    
1067

    
1068

    
1069

    
1070

    
1071

    
1072

    
1073
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1074
### This function uses
1075
marker, ##<< The marker involved.
1076
combi, ##<< The strain combination involved.
1077
form, ##<< the nuc form involved.
1078
all_samples, ##<< Global list of samples.
1079
config=NULL ##<< GLOBAL config variable
1080
) {
1081
  # PRETREAT
1082
  snep_design = get_design(marker, combi, all_samples)
1083
  reads = get_all_reads(marker, combi, form, config=config)
1084
  # RUN ANALYSE
1085
  tmp_analyse = analyse_design(snep_design, reads)
1086
  # RESULTS
1087
	fit1 = tmp_analyse[[1]]
1088
	fit0 = tmp_analyse[[2]]
1089
  k = names(fit1)
1090
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1091
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1092
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1093
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1094
	snep_design = tmp_analyse[[3]]
1095
  # print(snep_design)
1096
	fdr = 0.0001
1097
	thres = FDR(reads$pvalsGLM, fdr)
1098
	reads$snep_index = reads$pvalsGLM < thres
1099
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = ""))
1100
  return(reads)
1101
  },  ex=function(){
1102
    marker = "H3K4me1"
1103
    combi = c("BY", "YJM")
1104
    form = "wpunr" # "wp" | "unr" | "wpunr"
1105
    # foo = get_sneps(marker, combi, form)
1106
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1107
})
1108

    
1109

    
1110

    
1111

    
1112

    
1113

    
1114

    
1115

    
1116

    
1117

    
1118

    
1119

    
1120

    
1121

    
1122

    
1123

    
1124

    
1125

    
1126

    
1127

    
1128

    
1129

    
1130

    
1131

    
1132
ROM2ARAB = function(# Roman to Arabic pair list.
1133
### Util to convert Roman to Arabic
1134
){list(
1135
  "I" = 1,
1136
  "II" = 2,
1137
  "III" = 3,
1138
  "IV" = 4,
1139
  "V" = 5,
1140
  "VI" = 6,
1141
  "VII" = 7,
1142
  "VIII" = 8,
1143
  "IX" = 9,
1144
  "X" = 10,
1145
  "XI" = 11,
1146
  "XII" = 12,
1147
  "XIII" = 13,
1148
  "XIV" = 14,
1149
  "XV" = 15,
1150
  "XVI" = 16,
1151
  "XVII" = 17,
1152
  "XVIII" = 18,
1153
  "XIX" = 19,
1154
  "XX" = 20
1155
)}
1156

    
1157
switch_pairlist = structure(function(# Switch a pairlist
1158
### Take a pairlist key:value and return the switched pairlist value:key.
1159
l ##<< The pairlist to switch.
1160
) {
1161
	ret = list()
1162
	for (name in names(l)) {
1163
		ret[[as.character(l[[name]])]] = name
1164
	}
1165
	ret
1166
### The switched pairlist.
1167
}, ex=function(){
1168
	l = list(key1 = "value1", key2 = "value2")
1169
	print(switch_pairlist(l))
1170
})
1171

    
1172
ARAB2ROM = function(# Arabic to Roman pair list.
1173
### Util to convert Arabicto Roman
1174
){switch_pairlist(ROM2ARAB())}
1175

    
1176

    
1177

    
1178

    
1179
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1180
### This fonction allow to acces to a specific part of the c2c file.
1181
strain1, ##<< the key strain
1182
strain2, ##<< the target strain
1183
chr=NULL, ##<< if defined, the c2c will filtered according to the chromosome value
1184
lower_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome upper than lower_bound
1185
upper_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome lower than upper_bound
1186
config=NULL##<<  GLOBAL config variable
1187
) {
1188
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1189
	# Launch c2c file
1190
	if (reverse) {
1191
		c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]
1192
	} else {
1193
		c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]
1194
	}
1195
	c2c = get_content(c2c_filename, "table", stringsAsFactors=FALSE)
1196
  # Filtering unagapped
1197
  c2c = c2c[c2c$V6=="-",]
1198
	# Reverse
1199
	if (reverse) {
1200
		tmp_col = c2c$V1
1201
		c2c$V1 = c2c$V7
1202
		c2c$V7 = tmp_col
1203
		tmp_col = c2c$V2
1204
		c2c$V2 = c2c$V9
1205
		c2c$V9 = tmp_col
1206
		tmp_col = c2c$V3
1207
		c2c$V3 = c2c$V10
1208
		c2c$V10 = tmp_col
1209
	}
1210
  if (!is.null(chr)) {
1211
  	if (strain1 == "BY") {
1212
  		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1213
  	} else if (strain1 == "RM") {
1214
  	  chro_1 = paste("supercontig_1.",chr,sep="")
1215
  	} else if (strain1 == "YJM") {
1216
  	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1217
  	}
1218
  	c2c = c2c[c2c$V1 == chro_1,]
1219
    if (!is.null(lower_bound)) {
1220
      if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound}
1221
      if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound}      
1222
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1223
    }
1224
    if (!is.null(upper_bound)) {
1225
      if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound}
1226
      if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound}      
1227
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1228
    }
1229
  }
1230
  return(c2c)
1231
# It retruns the appropriate c2c file part.
1232
}
1233

    
1234
translate_cur = structure(function(# Translate coords of a genome region.
1235
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1236
roi, ##<< Original genome region of interest.
1237
strain2, ##<< The strain in wich you want the genome region of interest.
1238
config=NULL, ##<< GLOBAL config variable
1239
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1240
) {
1241
	strain1 = roi$strain_ref
1242
	if (strain1 == strain2) {
1243
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1244
		return(roi)
1245
	}
1246

    
1247
	# Extract c2c file
1248
	if (!is.null(big_cur)) {
1249
  	# Dealing with big_cur
1250
		if (roi$strain_ref != big_cur$strain_ref) {
1251
      big_cur = translate_cur(big_cur, roi$strain_ref, config=config)
1252
    }
1253
    if (big_cur$end < big_cur$begin) {
1254
      tmp_var = big_cur$begin
1255
      big_cur$begin = big_cur$end
1256
      big_cur$end = tmp_var
1257
      big_cur$length = big_cur$end - big_cur$begin + 1
1258
    }
1259
    if (big_cur$chr!=roi$chr | roi$end > big_cur$end | roi$end < big_cur$begin | roi$begin > big_cur$end | roi$begin < big_cur$begin) {
1260
      print("WARNING! Trying to translate a roi not included in a big_cur.")
1261
      return(NULL)
1262
    }
1263
  	c2c = c2c_extraction(strain1, strain2, big_cur$chr, big_cur$begin, big_cur$end, config=config)
1264
  } else {
1265
    # No big_cur
1266
  	c2c = c2c_extraction(strain1, strain2, roi$chr, config=config)    
1267
  }
1268
  
1269
  #	Convert initial roi$chr into c2c format
1270
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1271
	begin_1 = roi$begin
1272
  end_1 = roi$end
1273
  if (reverse) {
1274
  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1275
    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1276
	} else {
1277
		tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1278
	  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1279
	}
1280

    
1281
	# Never happend conditions ...
1282
	{
1283
		if (length(tmptransfostart$V8) == 0) {
1284
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1285
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1286
  			tmp_c2c = c2c[c2c$V2>=begin_1,]
1287
  			begin_1 = min(tmp_c2c$V2)
1288
      } else {
1289
  			tmp_c2c = c2c[c2c$V3>=begin_1,]
1290
  			begin_1 = min(tmp_c2c$V3)
1291
      }
1292
			if (reverse) {
1293
		  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1294
			} else {
1295
				tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1296
			}
1297
			if (length(tmptransfostart$V8) == 0) {
1298
				if (!is.null(big_cur)) {
1299
					return(NULL)
1300
					tmptransfostart = c2c[c2c$V3>=big_cur$begin & c2c$V2<=big_cur$begin,]
1301
				} else {
1302
					print(tmptransfostart)
1303
					print(tmptransfostop)
1304
					stop("Never happend condition 1.")
1305
				}
1306
			}
1307
		}
1308
		if (length(tmptransfostop$V8) == 0) {
1309
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1310
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1311
  			tmp_c2c = c2c[c2c$V3<=end_1,]
1312
  			end_1 = max(tmp_c2c$V3)
1313
      } else {
1314
  			tmp_c2c = c2c[c2c$V2<=end_1,]
1315
  			end_1 = max(tmp_c2c$V2)
1316
      }
1317
			if (reverse) {
1318
		    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1319
			} else {
1320
			  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1321
			}
1322
			if (length(tmptransfostop$V8) == 0) {
1323
				if (!is.null(big_cur)) {
1324
					return(NULL)
1325
				  tmptransfostop = c2c[c2c$V3>=big_cur$end & c2c$V2<=big_cur$end,]
1326
				} else {
1327
					print(tmptransfostart)
1328
					print(tmptransfostop)
1329
					stop("Never happend condition 2.")
1330
				}
1331
			}
1332
		}
1333
		if (length(tmptransfostart$V8) != 1) {
1334
			# print("many start")
1335
			tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1336
			if (length(tmptransfostart$V8) != 1) {
1337
				print(tmptransfostart)
1338
				print(tmptransfostop)
1339
  			stop("Never happend condition 3.")
1340
			}
1341
		}
1342
		if (length(tmptransfostop$V8) != 1) {
1343
			# print("many stop")
1344
		  tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1345
			if (length(tmptransfostop$V8) != 1) {
1346
				print(tmptransfostart)
1347
				print(tmptransfostop)
1348
  			stop("Never happend condition 4.")
1349
			}
1350
		}
1351
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1352
			print(tmptransfostart)
1353
			print(tmptransfostop)
1354
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1355
		}
1356
	}
1357
  
1358
  # Deal with strand
1359
  if (tmptransfostart$V8 == 1) {
1360
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1361
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1362
  } else {
1363
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1364
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1365
  }
1366
	# Build returned roi
1367
	roi$strain_ref = strain2
1368
	if (roi$strain_ref == "BY") {
1369
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1370
	} else {
1371
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1372
	}
1373
  roi$begin = begin_2
1374
  roi$end = end_2
1375
	if (sign(roi$end - roi$begin) == 0) {
1376
		roi$length = 1
1377
	} else {
1378
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1379
	}
1380
  return(roi)
1381
}, ex=function(){
1382
	# Define new translate_cur function...
1383
	translate_cur = function(roi, strain2, config) {
1384
		strain1 = roi$strain_ref
1385
		if (strain1 == strain2) {
1386
			return(roi)
1387
		} else {
1388
		  stop("Here is my new translate_cur function...")
1389
		}
1390
	}
1391
	# Binding it by uncomment follwing lines.
1392
	# unlockBinding("translate_cur", as.environment("package:nm"))
1393
	# unlockBinding("translate_cur", getNamespace("nm"))
1394
	# assign("translate_cur", translate_cur, "package:nm")
1395
	# assign("translate_cur", translate_cur, getNamespace("nm"))
1396
	# lockBinding("translate_cur", getNamespace("nm"))
1397
	# lockBinding("translate_cur", as.environment("package:nm"))
1398
})
1399

    
1400

    
1401
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1402
### CURs are regions that can be aligned between the genomes
1403
diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
1404
min_cur_width = 4000, ##<< The minimum width of a CUR
1405
config = NULL ##<< GLOBAL config variable
1406
) {
1407

    
1408
  check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
1409
    c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1410
    check_homogeneity(c2c)
1411
  	if (length(c2c[,1]) == 0 ) {
1412
      stop("WARNING! checking overlapping for a region corresponding to an empty c2c.")
1413
    } else {
1414
  		lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)})
1415
  		upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)})
1416
  		tmp_index = order(lower_bounds)
1417
      lower_bounds = lower_bounds[tmp_index]
1418
      upper_bounds = upper_bounds[tmp_index]
1419
      tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)]
1420
      ov_index = which(tmp_diff < 0)
1421
      if(length(ov_index < 0) !=0 ) {
1422
        ov_index = ov_index[1]
1423
        print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep=""))
1424
        c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),]
1425
        print(c2c_corrupted)
1426
        return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1))
1427
      }
1428
      return(NULL)
1429
    }
1430
  }
1431

    
1432
  check_homogeneity = function(sub_c2c) {
1433
    tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3)
1434
    tmp_signs = tmp_signs[tmp_signs != 0]
1435
  	if (sum(tmp_signs[1]  != tmp_signs)) {
1436
  		print(paste("*************** ERROR, non homogenous region (sign)! ********************"))
1437
      print(tmp_signs)
1438
  	}
1439
    tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10)
1440
    tmp_signs2 = tmp_signs2[tmp_signs2 != 0]
1441
  	if (sum(tmp_signs2[1]  != tmp_signs2)) {
1442
  		print(paste("*************** ERROR, non homogenous region (sign2)! ********************"))
1443
      print(tmp_signs2)
1444
  	}
1445
  	if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1446
  		print("*************** ERROR, non homogenous region chrs or V8! ********************")
1447
  	}
1448
  }
1449

    
1450
  test_and_squeeze_rois = function(foo, config=NULL) {
1451
    is_it_ok = function(list1, list2) {
1452
      bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length))
1453
      ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0
1454
      if (!ok) {
1455
        print(bar[bar[,3] != 0 | bar[,6] != 0, ])
1456
      }
1457
      return (ok)
1458
    }
1459
    squeeze_rois = function(list1, list2) {
1460
      rois = apply(t(1:nrow(list1)), 2, function(i){
1461
        roi = list1[i,]
1462
        roi2 = list2[i,]
1463
        roi$begin = max(roi$begin, roi2$begin)
1464
        roi$end = min(roi$end, roi2$end)
1465
        roi$length =  roi$end - roi$begin + 1
1466
        return(roi)
1467
      })
1468
      return(do.call(rbind, rois))
1469
    }
1470
    # foo_orig = compute_inter_all_strain_curs2(config=config)
1471
    # foo = foo_orig
1472
    STOP = FALSE
1473
    nb_round = 0
1474
    while(!STOP) {
1475
      nb_round = nb_round + 1
1476
      print(paste("2-2 round #", nb_round, sep=""))
1477
      fooby = translate_curs(foo, "BY", config=config)
1478
      fooyjm = translate_curs(foo, "YJM", config=config)
1479
      fooyjmby = translate_curs(fooyjm, "BY", config=config)
1480
      if (!is_it_ok(fooby, fooyjmby)) {
1481
        print("case 1")
1482
        foo = squeeze_rois(fooby, fooyjmby)
1483
    		next
1484
      }
1485
      foorm = translate_curs(foo, "RM", config=config)
1486
      foormby = translate_curs(foorm, "BY", config=config)
1487
      if (!is_it_ok(fooby, foormby)) {
1488
        print("case 2")
1489
        foo = squeeze_rois(fooby, foormby)
1490
    		next
1491
      }
1492
      fooyjmrm = translate_curs(fooyjm, "RM", config=config)
1493
      fooyjmrmyjm = translate_curs(fooyjmrm, "YJM", config=config)
1494
      if (!is_it_ok(fooyjm, fooyjmrmyjm)) {
1495
        print("case 3")
1496
        foo = squeeze_rois(fooyjm, fooyjmrmyjm)
1497
        next
1498
      }
1499
      foormyjm = translate_curs(foorm, "YJM", config=config)
1500
      foormyjmrm = translate_curs(foormyjm, "RM", config=config)
1501
      if (!is_it_ok(foorm, foormyjmrm)) {
1502
        print("case 4")
1503
        foo = squeeze_rois(foorm, foormyjmrm)
1504
        next
1505
      }
1506
      foo = translate_curs(foo, "BY", config=config)
1507
      STOP = TRUE
1508
    }
1509
    STOP = FALSE
1510
    nb_round = 0
1511
    while(!STOP) {
1512
      nb_round = nb_round + 1
1513
      print(paste("3-3 round #", nb_round, sep=""))
1514
      fooby = translate_curs(foo, "BY", config=config)
1515
      foobyrm = translate_curs(fooby, "RM", config=config)
1516
      foobyrmyjm = translate_curs(foobyrm, "YJM", config=config)
1517
      foobyrmyjmby = translate_curs(foobyrmyjm, "BY", config=config)
1518
      if (!is_it_ok(fooby, foobyrmyjmby)) {
1519
        print("case 1")
1520
        foo = squeeze_rois(fooby, foobyrmyjmby)
1521
      }
1522
      fooby = translate_curs(foo, "BY", config=config)
1523
      foobyyjm = translate_curs(fooby, "YJM", config=config)
1524
      foobyyjmrm = translate_curs(foobyyjm, "RM", config=config)
1525
      foobyyjmrmby = translate_curs(foobyyjmrm, "BY", config=config)
1526
      if (!is_it_ok(fooby, foobyyjmrmby)) {
1527
        print("case 2")
1528
        foo = squeeze_rois(fooby, foobyyjmrmby)
1529
        next
1530
      }
1531
      foo = translate_curs(foo, "BY", config=config)
1532
      STOP = TRUE
1533
    }
1534
    print("end")
1535
    return(foo)
1536
  }
1537

    
1538
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) {
1539
    c2c = c2c_extraction(strain1, strain2, config=config)
1540
    # computing diffs
1541
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
1542
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
1543
    # Filtering
1544
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1545
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1546
    rois = apply(t(1:length(indexes_start)), 2, function(i) {
1547
      if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start)))
1548
      returned_rois = NULL
1549
  		start = indexes_start[i]
1550
  		stop = indexes_stop[i]
1551
  		sub_c2c = c2c[start:stop,]
1552
      check_homogeneity(sub_c2c)
1553
  		if (strain1 == "BY") {
1554
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1555
  		} else {
1556
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1557
  		}
1558
  		roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1)
1559
  		roi$length = roi$end - roi$begin + 1
1560
  		if (roi$length >= min_cur_width) {
1561
  			lower_bound = roi$begin
1562
  			upper_bound = roi$end
1563
        check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1564
        while(!is.null(check)) {
1565
          # print(check)
1566
      		roi1 = roi
1567
          roi1$end = check[[1]]
1568
      		roi1$length = roi1$end - roi1$begin + 1
1569
      		if (roi1$length >= min_cur_width) {
1570
            returned_rois = dfadd(returned_rois,roi1)
1571
          }
1572
          roi$begin = check[[2]]
1573
      		roi$length = roi$end - roi$begin + 1
1574
      		if (roi$length >= min_cur_width) {
1575
      			lower_bound = min(roi$begin, roi$end)
1576
      			upper_bound = max(roi$begin, roi$end)
1577
            check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1578
          } else {
1579
            check = NULL
1580
            roi = NULL
1581
          }
1582
        }
1583
        returned_rois = dfadd(returned_rois,roi)
1584
  	  }
1585
    })
1586
    rois = do.call(rbind,rois)
1587
    rois = rois[order(as.numeric(rois$chr), rois$begin), ]
1588
  	return(rois)
1589
  }
1590

    
1591
  translate_curs = function(rois, target_strain, config) {
1592
    tr_rois = apply(t(1:nrow(rois)), 2, function(i){
1593
      roi = rois[i,]
1594
      tr_roi = translate_cur(roi, target_strain, config=config)  
1595
      tmp_begin = min(tr_roi$begin, tr_roi$end)
1596
      tmp_end = max(tr_roi$begin, tr_roi$end)
1597
      tr_roi$begin = tmp_begin
1598
      tr_roi$end = tmp_end
1599
      tr_roi$length =  tr_roi$end - tr_roi$begin + 1
1600
      return(tr_roi)
1601
    })
1602
    tr_rois = do.call(rbind, tr_rois)
1603
    return(tr_rois)
1604
  }
1605

    
1606
  combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"))
1607
  rois = list()
1608
  for (combi in combis) {
1609
    strain1 = combi[1]
1610
    strain2 = combi[2]
1611
    print(paste(strain1, strain2))
1612
    rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1613
    strain1 = combi[2]
1614
    strain2 = combi[1]
1615
    print(paste(strain1, strain2))
1616
    rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1617
    tr_rois_rev = translate_curs(rois_rev, combi[1], config)      
1618
    region1 = rois_fwd
1619
    region2 = tr_rois_rev
1620
    rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev)
1621
  }
1622
  reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
1623
  reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
1624
  tr_reducted_1_rois = translate_curs(reducted_1_rois, "RM", config)  
1625
  reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]])
1626
  reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ]
1627
  reducted_rois = translate_curs(reducted_2_rois, "BY", config)  
1628
  reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
1629
  squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
1630
  return (squeezed_rois)
1631
}
1632

    
1633
intersect_region = function(# Returns the intersection of 2 list on regions.
1634
### This function...
1635
region1, ##<< Original regions.
1636
region2 ##<< Regions to intersect.
1637
) {
1638
  intersection = apply(t(1:nrow(region1)), 2, function(i) {
1639
    roi1 = region1[i, ]
1640
    sub_regions2 = region2[region2$chr == roi1$chr, ]
1641
    sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end |
1642
                                roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end |
1643
                                sub_regions2$begin < roi1$begin  & roi1$end < sub_regions2$end
1644
                                , ]
1645
    if (nrow(sub_regions2) == 0) {
1646
      print("removing a region")
1647
      return(NULL)
1648
    } else if (nrow(sub_regions2) > 1) {
1649
      print("more than one region in intersect_region")          
1650
      return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])})))
1651
    } else {
1652
      roi2 = sub_regions2[1,]
1653
      if (roi1$begin < roi2$begin) {
1654
        print("not the same begin")
1655
        roi1$begin = roi2$begin
1656
        roi1$length =  roi1$end - roi1$begin + 1
1657
      }
1658
      if (roi1$end > roi2$end) {
1659
        print("not the same end")
1660
        roi1$end = roi2$end
1661
        roi1$length =  roi1$end - roi1$begin + 1
1662
      }
1663
      return(roi1)
1664
    }         
1665
  })
1666
  return(do.call(rbind,intersection))
1667
}
1668

    
1669
build_replicates = structure(function(# Stage replicates data
1670
### This function loads in memory data corresponding to the given experiments.
1671
expe, ##<< a list of vector corresponding to vector of replicates.
1672
roi, ##<< the region that we are interested in.
1673
only_fetch=FALSE, ##<< filter or not inputs.
1674
get_genome=FALSE,##<< Load or not corresponding genome.
1675
all_samples, ##<< Global list of samples.
1676
config=NULL ##<< GLOBAL config variable.
1677
) {
1678
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1679
  	samples=list()
1680
  	for (i in samples_ids) {
1681
  		sample = as.list(all_samples[all_samples$id==i,])
1682
      sample$orig_roi = roi
1683
      sample$roi = translate_cur(roi, sample$strain, config = config)
1684
  		if (get_genome) {
1685
  			# Get Genome
1686
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1687
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1688
  		}
1689
  		# Get inputs
1690
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1691
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE)
1692
  		sample$total_reads = sum(sample$inputs[,4])
1693
  		if (!only_fetch) {
1694
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
1695
  	  }
1696
  	  # Get TF outputs for Mnase_Seq samples
1697
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {
1698
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1699
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1700
  			if (!only_fetch) {
1701
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1702
    		}
1703
  		}
1704
  		samples[[length(samples) + 1]] = sample
1705
  	}
1706
  	return(samples)
1707
  }
1708
	replicates = list()
1709
	for(samples_ids in expe) {
1710
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1711
		replicates[[length(replicates) + 1]] = samples
1712
	}
1713
	return(replicates)
1714
  }, ex = function() {
1715
    # library(rjson)
1716
    # library(nucleominer)
1717
    #
1718
    # # Read config file
1719
    # json_conf_file = "nucleo_miner_config.json"
1720
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1721
    # # Read sample file
1722
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
1723
    # # here are the sample ids in a list
1724
    # expes = list(c(1))
1725
    # # here is the region that we wnt to see the coverage
1726
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY")
1727
    # # it displays the corverage
1728
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1729
    # out = watch_samples(replicates, config$READ_LENGTH,
1730
    #       plot_coverage = TRUE,
1731
    #       plot_squared_reads = FALSE,
1732
    #       plot_ref_genome = FALSE,
1733
    #       plot_arrow_raw_reads = FALSE,
1734
    #       plot_arrow_nuc_reads = FALSE,
1735
    #       plot_gaussian_reads = FALSE,
1736
    #       plot_gaussian_unified_reads = FALSE,
1737
    #       plot_ellipse_nucs = FALSE,
1738
    #       plot_wp_nucs = FALSE,
1739
    #       plot_wp_nuc_model = FALSE,
1740
    #       plot_common_nucs = FALSE,
1741
    #       height = 50)
1742
  })
1743

    
1744

    
1745

    
1746

    
1747

    
1748

    
1749

    
1750

    
1751

    
1752

    
1753

    
1754

    
1755

    
1756

    
1757

    
1758

    
1759

    
1760

    
1761

    
1762

    
1763

    
1764

    
1765

    
1766

    
1767

    
1768

    
1769

    
1770

    
1771

    
1772

    
1773

    
1774

    
1775

    
1776

    
1777

    
1778

    
1779

    
1780

    
1781

    
1782

    
1783

    
1784

    
1785

    
1786

    
1787

    
1788

    
1789

    
1790

    
1791

    
1792

    
1793

    
1794

    
1795

    
1796

    
1797

    
1798

    
1799

    
1800

    
1801

    
1802

    
1803

    
1804

    
1805

    
1806

    
1807

    
1808

    
1809

    
1810

    
1811

    
1812

    
1813

    
1814

    
1815

    
1816

    
1817

    
1818

    
1819

    
1820

    
1821

    
1822

    
1823

    
1824

    
1825

    
1826
watch_samples = function(# Watching analysis of samples
1827
### This function allows to view analysis for a particuler region of the genome.
1828
replicates, ##<< replicates under the form...
1829
read_length, ##<< length of the reads
1830
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
1831
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
1832
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
1833
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
1834
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
1835
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
1836
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
1837
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
1838
change_col = TRUE, ##<< Change the color of each nucleosome.
1839
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
1840
plot_fuzzy_nucs = TRUE,  ##<< Plot (or not) cluster of fuzzy
1841
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
1842
plot_common_nucs = FALSE,  ##<< Plot (or not) aligned reads.
1843
plot_common_unrs = FALSE,  ##<< Plot (or not) unaligned nucleosomal refgions (UNRs).
1844
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
1845
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
1846
plot_sample_id = FALSE, ##<<  Plot (or not) the sample id for each sample.
1847
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
1848
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
1849
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1850
main=NULL, ##<< main title of the produced plot
1851
xlab=NULL, ##<< xlab of the produced plot
1852
ylab="#reads (per million reads)", ##<< ylab of the produced plot
1853
config=NULL ##<< GLOBAL config variable
1854
){
1855
  returned_list = list()
1856
  # Computing global display parameters
1857
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
1858
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
1859
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
1860
  } else {
1861
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
1862
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
1863
  }
1864
	base_glo = 0
1865
	nb_rank_glo = 0
1866
  for (samples in replicates) {
1867
  	nb_rank_glo = nb_rank_glo + length(samples)
1868
  }
1869
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
1870
	y_min_glo = min(ylim_glo)
1871
	y_max_glo = max(ylim_glo)
1872
  delta_y_glo = y_max_glo - y_min_glo
1873
  # Plot main frame
1874
  if (is.null(xlab)) {
1875
    xlab = paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr)
1876
  }
1877
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab=ylab, xlab=xlab, main=main )
1878
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
1879
  # Go
1880
	replicates_wp_nucs = list()
1881
  wp_maps = list()
1882
  fuzzy_maps = list()
1883
  for (replicate_rank in 1:length(replicates)) {
1884
		# Computing replicate parameters
1885
		nb_rank = length(samples)
1886
		base = (replicate_rank-1) * height * nb_rank
1887
		ylim = c(base, base + height * nb_rank)
1888
		y_min = min(ylim)
1889
		y_max = max(ylim)
1890
	  delta_y = y_max - y_min
1891
		samples = replicates[[replicate_rank]]
1892
		for (sample_rank in 1:length(samples)) {
1893
			# computing sample parameters
1894
			sample = samples[[sample_rank]]
1895
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
1896
      if (plot_sample_id) {
1897
  			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
1898
      }
1899

    
1900
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
1901
			  x_min = sample$roi[["begin"]]
1902
			  x_max = sample$roi[["end"]]
1903
		  } else {
1904
			  x_min = - sample$roi[["begin"]]
1905
			  x_max = - sample$roi[["end"]]
1906
		  }
1907
			shift = x_min_glo - x_min
1908
	    # Plot Genome seq
1909
			if (plot_ref_genome) {
1910
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
1911
		  }
1912
			# Plot reads
1913
			reads = sample$inputs
1914
			signs = sign_from_strand(reads[,3])
1915
			if (plot_arrow_raw_reads) {
1916
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev,
1917
				col=1, length=0.15/nb_rank)
1918
			}
1919
	    if (plot_squared_reads) {
1920
        # require(plotrix)
1921
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
1922
			}
1923
	    if (plot_coverage) {
1924
        if (length(reads[,1]) != 0) {
1925
          step_h = sign(x_min) * signs * reads[,4]
1926
          step_b = sign(x_min) * reads[,2] + shift
1927
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
1928
          steps_x = min(step_b, step_e):max(step_b, step_e)
1929
          steps_y = rep(0, length(steps_x))
1930
          for (i in 1:length(step_h)) {
1931
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
1932
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
1933
          }
1934
          tmp_index = which(steps_y != 0)
1935
          steps_x = steps_x[tmp_index]
1936
          steps_y = steps_y[tmp_index]
1937
          tmp_current_level = 0
1938
          for (i in 1:length(steps_y)) {
1939
            steps_y[i] = tmp_current_level + steps_y[i]
1940
            tmp_current_level = steps_y[i]
1941
          }
1942
          steps_y = c(0, steps_y)
1943
          steps_y = steps_y * 1000000/sample$total_reads
1944
        } else {
1945
          steps_y = c(0, 0, 0)
1946
          steps_x = c(x_min, x_max)
1947
        }
1948
        # print(steps_x)
1949
        # print(steps_y)
1950
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
1951
        abline(y_lev,0)
1952
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
1953
			}
1954
			# Plot nucs
1955
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
1956
				nucs = sample$outputs
1957
				if (length(nucs$center) > 0) {
1958
					col = 1
1959
		      for (i in 1:length(nucs$center)) {
1960
            foo<<-nucs
1961
            if (change_col) {
1962
  						col = col + 1
1963
              } else {
1964
                col = "blue"
1965
              }
1966
		        nuc = nucs[i,]
1967
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
1968
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
1969
						total_involved_reads = sum(involved_reads[,4])
1970
						if (plot_arrow_nuc_reads ) {
1971
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev,
1972
							col=col, length=0.15/nb_rank)
1973
						}
1974
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
1975
  						flatted_reads = flat_reads(involved_reads, nuc$width)
1976
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1977
		  			}
1978
	          if (plot_gaussian_reads ) {
1979
							flatted_reads = flat_reads(involved_reads, nuc$width)
1980
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1981
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
1982
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
1983
	          }
1984
	          if (plot_gaussian_unified_reads ) {
1985
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=1)
1986
	          }
1987
	          if (plot_ellipse_nucs) {
1988
				      # require(plotrix)
1989
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col)
1990
						}
1991
		      }
1992
		    } else {
1993
		      print("WARNING! No nucs to print.")
1994
		    }
1995
			}
1996
	  }
1997

    
1998
	  # Plot wp nucs
1999
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) {
2000
			if (samples[[1]]$marker == "Mnase_Seq") {
2001
				if (is.null(aggregated_intra_strain_nucs)) {
2002
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
2003
				} else {
2004
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
2005
				}
2006
		  } else {
2007
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
2008
		  }
2009
      if (plot_chain) {
2010
        tf_nucs = lapply(wp_nucs, function(nuc) {
2011
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
2012
            tmp_nuc = tmp_nuc[[1]]
2013
            tmp_nuc$inputs = NULL
2014
            tmp_nuc$original_reads = NULL
2015
            tmp_nuc$wp = nuc$wp
2016
            # print(tmp_nuc)
2017
            return(tmp_nuc)
2018
          })
2019
          return(do.call(rbind, bar))
2020
        })
2021
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
2022
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2
2023
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
2024
        tmp_y_prev = tmp_y[-length(tmp_y)]
2025
        tmp_y_next = tmp_y[-1]
2026
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
2027
        tmp_track = unlist(tf_nucs$track)
2028
        tmp_track_prev = tmp_track[-length(tmp_track)]
2029
        tmp_track_next = tmp_track[-1]
2030
        # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
2031
        if (is.null(config$TRACK_LOD_OFFSET)) {
2032
          config$TRACK_LOD_OFFSET = 0
2033
        }
2034
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LOD_OFFSET * 25
2035
        tmp_x_prev = tmp_x[-length(tmp_x)]
2036
        tmp_x_next = tmp_x[-1]
2037
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
2038
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
2039
        tmp_llr_inter =signif(unlist(tf_nucs$llr_score)[-1], 2)
2040
        new_tmp_x = c()
2041
        new_tmp_y = c()
2042
        index_odd = 1:length(tmp_x) * 2 - 1
2043
        index_even = (1:(length(tmp_x) - 1)) * 2
2044
        new_tmp_x[index_odd] = tmp_x
2045
        new_tmp_y[index_odd] = tmp_y
2046
        new_tmp_x[index_even] = tmp_x_inter
2047
        new_tmp_y[index_even] = tmp_y_inter
2048
        lines(new_tmp_x , new_tmp_y, lw=2)
2049
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
2050
        points(tmp_x, tmp_y, cex=4, lw=2)
2051
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
2052
        if (is.null(config$LEGEND_LOD_POS)) {
2053
          pos = 2
2054
        } else {
2055
          pos = config$LEGEND_LOD_POS
2056
        }
2057
        col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
2058
        text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) 
2059
        
2060
      }
2061

    
2062
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
2063
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
2064
        strain = samples[[1]]$strain
2065
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo")
2066
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
2067

    
2068
        if (plot_fuzzy_nucs) {
2069
          fuzzy_map = fuzzy_maps[[strain]]
2070
          if (!is.null(fuzzy_map)) {
2071
            if (nrow(fuzzy_map) > 0) {
2072
              rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1)                    
2073
            }
2074
          }
2075
        } 
2076

    
2077
        if (plot_wp_nucs) {
2078
    			for (wp_nuc in wp_nucs) {
2079
    				if (wp_nuc$wp){
2080
    					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2081
    					if (plot_wp_nuc_model) {
2082
      					all_original_reads = c()
2083
      					for(initial_nuc in wp_nuc$nucs) {
2084
      						all_original_reads = c(all_original_reads, initial_nuc$original_reads)
2085
      					}
2086
      					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2087
    					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
2088
    				  }
2089
  				  }
2090
  				}
2091
  			}
2092
      }
2093
		}
2094
	}
2095

    
2096
	if (plot_common_nucs) {
2097
    if (is.null(aligned_inter_strain_nucs)) {
2098
      aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2099
      if (!is.null(aligned_inter_strain_nucs)) {
2100
        aligned_inter_strain_nucs$cur_index = "foo" 
2101
      }
2102
    }
2103

    
2104
    #Plot common wp nucs
2105
    mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2106
    for (replicate_rank in 1:length(replicates)) {
2107
      nb_rank[[replicate_rank]] = length(samples)
2108
      base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2109
      ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2110
      y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2111
      y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2112
      delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2113
      mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2114

    
2115
      samples = replicates[[replicate_rank]]
2116
      for (sample_rank in 1:length(samples)) {
2117
        sample = samples[[sample_rank]]
2118
        y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2119
        if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2120
          x_min[[replicate_rank]] = sample$roi[["begin"]]
2121
          x_max[[replicate_rank]] = sample$roi[["end"]]
2122
        } else {
2123
          x_min[[replicate_rank]] = - sample$roi[["begin"]]
2124
          x_max[[replicate_rank]] = - sample$roi[["end"]]
2125
        }
2126
        shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2127
      }
2128
    }
2129
    print(aligned_inter_strain_nucs)
2130
    if (!is.null(aligned_inter_strain_nucs)) {
2131
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2132
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2133
        tmp_xs = tmp_ys = c()
2134
        for (replicate_rank in 1:length(replicates)) {
2135
          samples = replicates[[replicate_rank]]
2136
          strain = samples[[1]]$strain
2137
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2138
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2139
        }
2140
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]])*8, cex=dev.size()[1]*400/(x_max[[1]]-x_min[[1]]), pch=19)
2141
      }
2142
    }
2143
    
2144
    if (plot_common_unrs) {
2145
      combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain)
2146
      roi = as.list(samples[[1]]$roi)
2147
      cur_index = "foo"
2148
      common_nuc_results = list()
2149
      common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs
2150
      unrs = get_unrs(combi, roi, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) 
2151
      rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], border=4, lw=10, col=adjustcolor(4, alpha.f = 0.05))        
2152
    }
2153

    
2154
	}
2155
  return(returned_list)
2156
}
2157

    
2158

    
2159

    
2160
get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain.
2161
### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions.
2162
wp_map, ##<< Well positionned nucleosomes map.
2163
roi, ##<< The region of interest.
2164
strain, ##<< The strain we want to extracvt the fuzzy map.
2165
config=NULL ##<< GLOBAL config variable.
2166
) {
2167
  fuzzy_map = wp_map[wp_map$wp==0, ]
2168
  if (nrow(fuzzy_map) > 0) {
2169
    fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,])
2170
    if (!is.null(fuzzy_map)) {
2171
      fuzzy_map = union_regions(fuzzy_map)
2172
      fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config)
2173
    }
2174
  }
2175
  return(fuzzy_map)
2176
}
2177

    
2178

    
2179

    
2180

    
2181

    
2182
get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs).
2183
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline.
2184
combi, ##<< The strain combination to consider.
2185
roi, ##<< The region of interest.
2186
cur_index, ##<< The region of interest index.
2187
wp_maps, ##<< Well positionned nucleosomes maps.
2188
fuzzy_maps, ##<< Fuzzy nucleosomes maps.
2189
common_nuc_results, ##<< Common wp nuc maps
2190
config=NULL ##<< GLOBAL config variable
2191
) {
2192
  # print(cur_index)
2193

    
2194
  tmp_combi_key = paste(combi[1], combi[2], sep="_")
2195
  tmp_common_nucs = common_nuc_results[[tmp_combi_key]]
2196
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$cur_index == cur_index, ]
2197

    
2198
  # print(paste("Dealing with unr from", combi[1]))
2199
  tmp_fuzzy = fuzzy_maps[[combi[1]]]
2200
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2201
  tmp_wp = wp_maps[[combi[1]]]
2202
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2203
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2204
  # Let's go!
2205
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2206
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2207
      return(NULL)
2208
    } else {
2209
      return (index_nuc)
2210
    }
2211
  }))
2212
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2213
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2214
  if (length(tmp_unr) != 0) {
2215
    tmp_unr = union_regions(tmp_unr)
2216
  }
2217
  tmp_unr_nucs_1 = tmp_unr
2218
  if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)}
2219
  agg_unr_1 = tmp_unr_nucs_1
2220

    
2221
  # print(paste("Dealing with unr from ", combi[2]))
2222
  tmp_fuzzy = fuzzy_maps[[combi[2]]]
2223
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2224
  tmp_wp = wp_maps[[combi[2]]]
2225
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2226
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2227
  # Let's go!
2228
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2229
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2230
      return(NULL)
2231
    } else {
2232
      return (index_nuc)
2233
    }
2234
  }))
2235
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2236
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2237
  if (length(tmp_unr) != 0) {
2238
    tmp_unr = union_regions(tmp_unr)
2239
  }
2240
  tmp_unr_nucs_2 = tmp_unr
2241
  if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)}
2242
  agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config)
2243
  tr_agg_unr_2 = translate_regions(agg_unr_2, combi, cur_index, roi=roi, config=config)
2244
  tr_agg_unr_2 = union_regions(tr_agg_unr_2)
2245

    
2246
  # print("Dealing with unr from both...")
2247
  all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2))
2248

    
2249
  # print(paste("Dealing with wp from", combi[1]))
2250
  tmp_wp = wp_maps[[combi[1]]]
2251
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2252
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2253
  # Let's go!
2254
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2255
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2256
      return (index_nuc)
2257
    } else {
2258
      return(NULL)
2259
    }
2260
  }))
2261
  wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2262
  
2263
  # print(paste("Dealing with wp from", combi[2]))
2264
  tmp_wp = wp_maps[[combi[2]]]
2265
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2266
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2267
  # Let's go!
2268
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2269
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2270
      return (index_nuc)
2271
    } else {
2272
      return(NULL)
2273
    }
2274
  }))
2275
  wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2276
  wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config)
2277
  if (nrow(wp_nucs_2) == 0) {
2278
    tr_wp_nucs_2 = wp_nucs_2
2279
  } else {
2280
    tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, cur_index, roi=roi, config=config)      
2281
  }
2282

    
2283
  # print("Dealing with wp from both...")
2284
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
2285

    
2286
  # print("Dealing with unr and wp...")
2287
  non_inter_unr = substract_region(all_unr, all_wp)
2288
  non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config)
2289
  if (is.null(non_inter_unr)) { return(NULL) }
2290
  non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound
2291
  min_unr_width = 75
2292
  non_inter_unr = non_inter_unr[non_inter_unr$len >= min_unr_width,]
2293

    
2294
  non_inter_unr$index_nuc = 1:length(non_inter_unr[,1])
2295
  return (non_inter_unr)
2296
}
2297

    
2298

    
2299