Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ 6e0010bc

Historique | Voir | Annoter | Télécharger (87,08 ko)

1
# get_content = function(# Get content from cached
2
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
3
# obj, ##<< The object that we want its content.
4
# ... ##<< Parameters that will be passed to my_read
5
# ) {
6
#   UseMethod("get_content", obj)
7
#   ### Returns the cached content of the current object.
8
# }
9
#
10
# get_content.default = structure( function(# Get content from cached
11
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
12
# obj, ##<< The object that we want its content.
13
# ... ##<< Parameters that will be passed to my_read
14
# ) {
15
#   if (inherits(try(NM_CACHE,TRUE), "try-error") || is.null(NM_CACHE)) {
16
#     NM_CACHE <<- list()
17
#   }
18
#   if (is.null(NM_CACHE[[obj$filename]])) {
19
#     print(paste("Loading file ",obj$filename, sep=""))
20
#     tmp_content = my_read(obj, ...)
21
#     print("affect it...")
22
#     NM_CACHE[[obj$filename]] <<- tmp_content
23
#     print("done.")
24
#   }
25
#   return(NM_CACHE[[obj$filename]])
26
# ### Returns the cached content of the current object.
27
# }, ex=function(){
28
#   # Create a dataframe
29
#   df = NULL
30
#   df = dfadd(df, list(key1 = "value1", key2 = "value2"))
31
#   df = dfadd(df, list(key1 = "value1'", key2 = "value3'"))
32
#   # Dump it into tmp file
33
#   write.table(df, file="/tmp/tmp.dump.table.tmp")
34
#   # Load it into cache using feature.
35
#   cached_content_object  = list(filename="/tmp/tmp.dump.table.tmp")
36
#   class(cached_content_object) = "table"
37
#   # First time it will be load into cache
38
#   print(get_content(cached_content_object))
39
#   # Second time not
40
#   print(get_content(cached_content_object))
41
#
42
# })
43
#
44
# my_read = function(
45
# ### Abstract my_read function.
46
#   obj, ...) {
47
#   UseMethod("my_read", obj)
48
# }
49
#
50
# my_read.default = function(
51
# ### Default my_read function.
52
#   obj, ...){
53
#   stop(paste("ERROR, my_read is not defined for any Objects (file ", obj$filename," )", sep=""))
54
# }
55
#
56
# my_read.fasta = function(
57
# ### my_read function for fasta files.
58
#   obj, ...){
59
#   # require(seqinr)
60
#   return(read.fasta(obj$filename, ...))
61
# }
62
#
63
# my_read.table = function(
64
# ### my_read function for table files.
65
#   obj, ...){
66
#   if (rev(unlist(strsplit(obj$filename, ".", fixed=TRUE)))[1] == "gz") {
67
#     return(read.table(file=gzfile(obj$filename), ...))
68
#   } else {
69
#     return(read.table(file=obj$filename, ...))
70
#   }
71
# }
72
#
73
# my_read.cvs = function(
74
# ### my_read function for cvs files.
75
#   obj, ...){
76
#   return(read.csv(file=obj$filename, ...))
77
# }
78

    
79

    
80

    
81

    
82

    
83

    
84

    
85

    
86

    
87
FDR = structure(function#  False Discovery Rate
88
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
89
##author<< Gael Yvert,
90
(
91
x, ##<< A vector x of independent p-values.
92
FDR ##<< The specified FDR.
93
) {
94
  x <- sort(na.omit(x))
95
  N = length(x)
96
  i = 1;
97
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
98
  if (i == 1)
99
    return (NA)
100
  else
101
    return( x[i-1] )
102
### Return the the corresponding cutoff.
103
}, ex=function(){
104
  print("example")
105
})
106

    
107

    
108

    
109
lod_score_vecs = structure(function # Likelihood ratio
110
### Compute the likelihood log of two set of value from two models Vs. a unique model.
111
(
112
x ,##<< First vector.
113
y ##<< Second vector.
114
) {
115
	if (length(x) <=1 | length(y) <= 1) {
116
		return(NA)
117
	}
118
  meanX = mean(x)
119
  sdX = sd(x)
120
  meanY = mean(y)
121
  sdY = sd(y)
122
  meanXY = mean(c(x,y))
123
  sdXY = sd(c(x,y))
124
  llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
125
  llY = sum(log(dnorm(y,mean=meanY,sd=sdY)))
126
  llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY)))
127
  ratio = llX + llY - llXY
128
  return(ratio)
129
### Returns the likelihood ratio.
130
}, ex=function(){
131
  # LOD score for 2 set of values
132
  mean1=5; sd1=2; card2 = 250
133
  mean2=6; sd2=3; card1 = 200
134
  x1 = rnorm(card1, mean1, sd1)
135
  x2 = rnorm(card2, mean2, sd2)
136
  min = floor(min(c(x1,x2)))
137
  max = ceiling(max(c(x1,x2)))
138
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
139
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
140
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
141
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
142
  lod_score_vecs(x1,x2)
143
 })
144

    
145
dfadd = structure(function# Adding list to a dataframe.
146
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
147
	(df, ##<<  A dataframe
148
		l ##<<  A list
149
	) {
150
  if (is.null(df)) {
151
    df = data.frame(l,stringsAsFactors=FALSE)
152
  } else {
153
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
154
  }
155
  return(df)
156
### Return the dataframe \emph{df}.
157
}, ex=function(){
158
		## Here dataframe is NULL
159
		print(df)
160
		df = NULL
161

    
162
		# Initialize df
163
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
164
		print(df)
165

    
166
		# Adding elements to df
167
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
168
		print(df)
169
})
170

    
171

    
172
sign_from_strand = function(
173
### Get the sign of strand
174
strands) {
175
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
176
### If strand in forward then returns 1 else returns -1
177
}
178

    
179
flat_reads = function(
180
### Extract reads coordinates from TempleteFilter input sequence
181
reads, ##<< TemplateFilter input reads
182
nuc_width ##<< Width used to shift F and R reads.
183
) {
184
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
185
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
186
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
187
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
188
### Returns a list of F reads, R reads and joint/shifted F and R reads.
189
}
190

    
191

    
192

    
193
filter_tf_outputs = function(# Filter TemplateFilter outputs
194
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlap threshold.
195
tf_outputs, ##<< TemplateFilter outputs.
196
chr, ##<< Chromosome observed, here chr is an integer.
197
x_min, ##<< Coordinate of the first bp observed.
198
x_max, ##<< Coordinate of the last bp observed.
199
nuc_width = 160, ##<< Nucleosome width.
200
ol_bp = 59, ##<< Overlap Threshold.
201
corr_thres = 0.5 ##<< Correlation threshold.
202
) {
203
  if (x_min < 0) {
204
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= -x_max & tf_outputs$center + tf_outputs$width/2 <=  -x_min,]
205
	} else {
206
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= x_min & tf_outputs$center + tf_outputs$width/2 <= x_max,]
207
  }
208
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
209
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
210
  tf_outputs = tf_outputs[tf_outputs$correlation >= corr_thres,]
211
  tf_outputs = tf_outputs[order(tf_outputs$correlation,decreasing=TRUE),]
212
  i = 1
213
  while (i <= length(tf_outputs[,1])) {
214
    lb = tf_outputs[i,]$low
215
    ub = tf_outputs[i,]$up
216
    tf_outputs = tf_outputs[!(tf_outputs$low <= (ub-ol_bp) & tf_outputs$up > ub) & !(tf_outputs$up >= (lb+ol_bp) & tf_outputs$low < lb),]
217
    i = i+1
218
  }
219
  return(tf_outputs)
220
### Returns filtered TemplateFilter Outputs
221
}
222

    
223

    
224

    
225
filter_tf_inputs = function(# Filter TemplateFilter inputs
226
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
227
inputs, ##<< TF inputs to be filtered.
228
chr, ##<< Chromosome observed, here chr is an integer.
229
x_min, ##<< Coordinate of the first bp observed.
230
x_max, ##<< Coordinate of the last bp observed.
231
nuc_width = 160, ##<< Nucleosome width.
232
only_f = FALSE, ##<< Filter only F reads.
233
only_r = FALSE, ##<< Filter only R reads.
234
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
235
) {
236
	if (only_f) {
237
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
238
	} else if (only_r) {
239
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
240
	} else {
241
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
242
	}
243
  if (!filter_for_coverage) {
244
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
245
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
246
  }
247
	return(inputs)
248
### Returns filtred inputs.
249
}
250

    
251

    
252

    
253
get_comp_strand = function(
254
### Compute the complementatry strand.
255
strand ##<< The original strand.
256
) {
257
	apply(t(strand),2, function(n){
258
	  if (n=="a") {return("t")}
259
		if (n=="t") {return("a")}
260
		if (n=="c") {return("g")}
261
		if (n=="g") {return("c")}
262
	})
263
### Returns the complementatry strand.
264
}
265

    
266

    
267
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
268
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
269
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
270
lod_thres=20, ##<< Log likelihood ration threshold.
271
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
272
){
273
	end_of_tracks = function(tracks) {
274
		if (length(tracks) == 0) {
275
			return(TRUE)
276
		}
277
	  for (lower_bound in tracks) {
278
			if(!is.na(lower_bound)) {
279
	      if (lower_bound < coord_max) {
280
	        return(FALSE)
281
	      }
282
	  	}
283
	  }
284
	  return(TRUE)
285
	}
286
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
287
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
288
			new_cluster$wp = TRUE
289
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
290
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
291
		  	clusters[[length(clusters) + 1]] = new_cluster
292
				# print(new_cluster)
293
		  }
294
		} else {
295
			new_cluster$wp = FALSE
296
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
297
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
298
			  clusters[[length(clusters) + 1]] = new_cluster
299
			}
300
		}
301
		return(clusters)
302
	}
303
	strain = samples[[1]]$strain
304
	lod_scores = c()
305
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
306
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
307
  # compute clusters
308
  clusters = list()
309
  cluster_contents = list()
310
  # Init reader
311
  indexes = c()
312
  track_readers = c()
313
  current_nuc = NULL
314
	lod_score = lod_thres + 1
315
  # Read nucs from TF outputs
316
  tf_outs = list()
317
	i = 1
318
  for (sample in samples) {
319
		# print(sample$roi$chr)
320
		# print(min_nuc_center)
321
		# print(max_nuc_center)
322
		# print(sample$outputs)
323
		# tf_outs[[i]] = filter_tf_outputs(sample$outputs, sample$roi$chr, min_nuc_center, max_nuc_center)
324
		# print(tf_outs[[i]])
325
		tf_outs[[i]] = sample$outputs
326
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]
327
    indexes[i] = 1
328
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
329
      track_readers[i] = coord_max
330
	  } else {
331
      track_readers[i] = tf_outs[[i]][indexes[i],]$center
332
		}
333
		i = i+1
334
  }
335
	# print(track_readers)
336
  new_cluster = NULL
337
  nb_nucs_in_cluster = 0
338
  nuc_from_track = c()
339
  for (i in 1:length(tf_outs)){
340
    nuc_from_track[i] = FALSE
341
  }
342
  # Start clustering
343
  while (!end_of_tracks(track_readers)) {
344
    new_center = min(track_readers)
345
		current_track = which(track_readers == new_center)[1]
346
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
347
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
348
		new_nuc$inputs = samples[[current_track]]$inputs
349
		new_nuc$chr = samples[[current_track]]$roi$chr
350
		new_nuc$track = current_track
351

    
352
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width)
353
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
354
		new_nuc$original_reads = flatted_reads[[3]]
355

    
356
    new_upper_bound = new_nuc$upper_bound
357

    
358
    if (!is.null(current_nuc)) {
359
			lod_score = lod_score_vecs(current_nuc$original_reads,new_nuc$original_reads)
360
			lod_scores = c(lod_scores,lod_score)
361
		}
362
		# print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
363
		if (is.na(lod_score)) {
364
			lod_score = lod_thres + 1
365
		}
366
		# Store lod_score
367
		new_nuc$lod_score = lod_score
368
	  if (lod_score < lod_thres) {
369
      # aggregate to current cluster
370
      #   update bound
371
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
372
        new_cluster$upper_bound = new_nuc$upper_bound
373
      }
374
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
375
        new_cluster$lower_bound = new_nuc$lower_bound
376
      }
377
      #   add nucleosome to current cluster
378
      nuc_from_track[current_track] = TRUE
379
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
380
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
381
    } else {
382
			if (!is.null(new_cluster)) {
383
        # store old cluster
384
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
385
			}
386
      # Reinit current cluster composition stuff
387
      nb_nucs_in_cluster = 0
388
      nuc_from_track = c()
389
      for (i in 1:length(tf_outs)){
390
        nuc_from_track[i] = FALSE
391
      }
392
      # create new cluster
393
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
394
      # update upper bound
395
      current_upper_bound = new_upper_bound
396
      # add nucleosome to current cluster
397
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
398
      nuc_from_track[current_track] = TRUE
399
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
400

    
401
		}
402

    
403
		current_nuc = new_nuc
404

    
405
    # update indexes
406
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {
407
      indexes[current_track] = indexes[current_track] + 1
408
      # update track
409
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
410
    } else {
411
      # update track
412
      track_readers[current_track] = coord_max
413
    }
414
  }
415
  # store last cluster
416
  if (!is.null(new_cluster)) {
417
    # store old cluster
418
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
419
  }
420
	return(list(clusters, lod_scores))
421
### Returns a list of clusterized nucleosomes, and all computed lod scores.
422
}, ex=function(){
423
	# Dealing with a region of interest
424
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
425
	samples = list()
426
	for (i in 1:3) {
427
		# Create TF output
428
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
429
		outputs = dfadd(NULL,tf_nuc)
430
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
431
		# Generate corresponding reads
432
		nb_reads = round(runif(1,170,230))
433
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
434
		u_reads = sort(unique(reads))
435
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
436
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
437
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
438
		u_reads = u_reads + shifts
439
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
440
		                         "V2" = u_reads,
441
														 "V3" = strands,
442
														 "V4" = counts), stringsAsFactors=FALSE)
443
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
444
	}
445
	print(aggregate_intra_strain_nucs(samples))
446
})
447

    
448
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
449
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
450
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
451
cur_index ##<< the index of the roi involved
452
) {
453
	if  (length(partial_strain_maps) == 0 ){
454
		print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." ))
455
    tmp_strain_maps = list()
456
	} else {
457
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
458
			tmp_nuc = partial_strain_maps[[i]]
459
			tmp_nuc_as_list = list()
460
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
461
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
462
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
463
			tmp_nuc_as_list[["cur_index"]] = cur_index
464
			tmp_nuc_as_list[["index_nuc"]] = i
465
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
466
			all_original_reads = c()
467
			for (j in 1:length(tmp_nuc$nucs)) {
468
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
469
			}
470
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
471
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
472
			if (tmp_nuc$wp) {
473
				tmp_nuc_as_list[["lod_1"]] = signif(tmp_nuc$nucs[[2]]$lod_score,5)
474
				tmp_nuc_as_list[["lod_2"]] = signif(tmp_nuc$nucs[[3]]$lod_score,5)
475
			} else {
476
				tmp_nuc_as_list[["lod_1"]] = NA
477
				tmp_nuc_as_list[["lod_2"]] = NA
478
			}
479
      return(tmp_nuc_as_list)
480
    })
481
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
482
	}
483
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
484
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
485
}
486

    
487
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
488
### This function aligns nucs between two strains for a given genome region.
489
replicates, ##<< Set of replicates, ideally 3 per strain.
490
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
491
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
492
corr_thres=0.5, ##<< Correlation threshold.
493
lod_thres=100, ##<< LOD cut off.
494
config=NULL, ##<< GLOBAL config variable
495
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
496
) {
497
	if (length(replicates) < 2) {
498
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
499
	} else if (length(replicates) > 2) {
500
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
501
	}
502
	common_nuc = NULL
503
	lod_scores = c()
504
	chr = replicates[[1]][[1]]$roi$chr
505
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
506
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
507
	strain_ref1 = replicates[[1]][[1]]$strain
508
	strain_ref2 = replicates[[2]][[1]]$strain
509
	big_cur = replicates[[1]][[1]]$roi
510
  orig_big_cur = replicates[[1]][[1]]$orig_roi
511
	if(big_cur$end - big_cur$begin < 0) {
512
		tmp_begin = big_cur$begin
513
		big_cur$begin =  big_cur$end
514
		big_cur$end =  tmp_begin
515
	}
516
	# GO!
517
	if (is.null(wp_nucs_strain_ref1)) {
518
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
519
	}
520
	if (is.null(wp_nucs_strain_ref2)) {
521
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
522
  }
523
	lws = c()
524
	ups = c()
525
	for (na in wp_nucs_strain_ref2) {
526
		lws = c(lws, na$lower_bound)
527
		ups = c(ups, na$upper_bound)
528
	}
529

    
530
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))
531
	roi_strain_ref1 = NULL
532
	roi_strain_ref2 = NULL
533
	if (length(wp_nucs_strain_ref1) > 0) {
534
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
535
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
536
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
537
			# Filtering on Well Positionned
538
			if (nuc_strain_ref1$wp) {
539
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
540
				roi_strain_ref2 = translate_cur(roi_strain_ref1, strain_ref2, big_cur=orig_big_cur, config=config)
541
        if (!is.null(roi_strain_ref2)){
542
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
543
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
544
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
545
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
546
					for (index_nuc_strain_ref2 in matching_nas) {
547
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
548
						# Filtering on Well Positionned
549
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)
550
						if (!is.null(translate_cur(nuc_strain_ref2_to_roi, strain_ref1, big_cur=orig_big_cur, config=config)) &
551
                nuc_strain_ref2$wp) {
552
							# Filtering on correlation Score and collecting reads
553
							SKIP = FALSE
554
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
555
							reads_strain_ref1 = c()
556
							for (nuc in nuc_strain_ref1$nucs){
557
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)
558
								if (nuc$corr < corr_thres) {
559
									SKIP = TRUE
560
								}
561
							}
562
							reads_strain_ref2 = c()
563
							for (nuc in nuc_strain_ref2$nucs){
564
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)
565
								if (nuc$corr < corr_thres) {
566
									SKIP = TRUE
567
								}
568
							}
569
							# Filtering on correlation Score
570
							if (!SKIP) {
571
								# tranlation of reads into strain 2 coords
572
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
573
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
574
								lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2)
575
								lod_scores = c(lod_scores, lod_score)
576
								# Filtering on LOD Score
577
								if (lod_score < lod_thres) {
578
									tmp_nuc = list()
579
									# strain_ref1
580
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
581
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
582
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
583
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
584
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
585
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
586
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
587
									# tmp_nuc[[paste("corr1_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[1]]$corr,5)
588
									# tmp_nuc[[paste("corr2_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[2]]$corr,5)
589
									# tmp_nuc[[paste("corr3_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[3]]$corr,5)
590
									# strain_ref2
591
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
592
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
593
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
594
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
595
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
596
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
597
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
598
									# tmp_nuc[[paste("corr1_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[1]]$corr,5)
599
									# tmp_nuc[[paste("corr2_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[2]]$corr,5)
600
									# tmp_nuc[[paste("corr3_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[3]]$corr,5)
601
									# common
602
									tmp_nuc[["lod_score"]] = signif(lod_score,5)
603
									# print(tmp_nuc)
604
									common_nuc = dfadd(common_nuc, tmp_nuc)
605
								}
606
							}
607
						}
608
					}
609
				} else {
610
		      print("WARNING! No roi for strain ref 2.")
611
			  }
612
		  }
613
		}
614

    
615
		if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
616
			index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
617
			to_remove_list = c()
618
			for (i in 1:length(index_redundant)) {
619
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
620
				  to_remove = index_redundant[i]
621
				}	 else {
622
					to_remove = index_redundant[i] + 1
623
			  }
624
				to_remove_list = c(to_remove_list, to_remove)
625
			}
626
			common_nuc = common_nuc[-to_remove_list,]
627
		}
628

    
629
		if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
630
			index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
631
			to_remove_list = c()
632
			for (i in 1:length(index_redundant)) {
633
				if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
634
				  to_remove = index_redundant[i]
635
				}	 else {
636
					to_remove = index_redundant[i] + 1
637
			  }
638
				to_remove_list = c(to_remove_list, to_remove)
639
			}
640
			common_nuc = common_nuc[-to_remove_list,]
641
		}
642

    
643
		return(list(common_nuc, lod_scores))
644
	} else {
645
		print("WARNING, no nucs for strain_ref1.")
646
		return(NULL)
647
	}
648
### Returns a list of clusterized nucleosomes, and all computed lod scores.
649
}, ex=function(){
650

    
651
    # Define new translate_cur function...
652
    translate_cur = function(roi, strain2, big_cur=NULL, config=NULL) {
653
      return(roi)
654
    }
655
    # Binding it by uncomment follwing lines.
656
    unlockBinding("translate_cur", as.environment("package:nucleominer"))
657
    unlockBinding("translate_cur", getNamespace("nucleominer"))
658
    assign("translate_cur", translate_cur, "package:nucleominer")
659
    assign("translate_cur", translate_cur, getNamespace("nucleominer"))
660
    lockBinding("translate_cur", getNamespace("nucleominer"))
661
    lockBinding("translate_cur", as.environment("package:nucleominer"))
662

    
663
	# Dealing with a region of interest
664
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
665
	roi2 = translate_cur(roi, roi$strain_ref1)
666
	replicates = list()
667
	for (j in 1:2) {
668
		samples = list()
669
		for (i in 1:3) {
670
			# Create TF output
671
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
672
			outputs = dfadd(NULL,tf_nuc)
673
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
674
			# Generate corresponding reads
675
			nb_reads = round(runif(1,170,230))
676
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
677
			u_reads = sort(unique(reads))
678
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
679
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
680
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
681
			u_reads = u_reads + shifts
682
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
683
			                         "V2" = u_reads,
684
															 "V3" = strands,
685
															 "V4" = counts), stringsAsFactors=FALSE)
686
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
687
		}
688
		replicates[[length(replicates) + 1]] = samples
689
	}
690
	print(align_inter_strain_nucs(replicates))
691
})
692

    
693

    
694

    
695

    
696

    
697

    
698

    
699

    
700

    
701

    
702

    
703

    
704

    
705

    
706

    
707

    
708

    
709

    
710

    
711

    
712

    
713

    
714

    
715

    
716

    
717

    
718
fetch_mnase_replicates = function(# Prefetch data
719
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
720
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
721
roi, ##<< Region of interest.
722
all_samples, ##<< Global list of samples.
723
config=NULL, ##<< GLOBAL config variable
724
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
725
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
726
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
727
) {
728
	samples=list()
729
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
730
	for (i in samples_ids) {
731
		sample = as.list(all_samples[all_samples$id==i,])
732
    sample$orig_roi = roi
733
    sample$roi = translate_cur(roi, sample$strain, config = config)
734
		if (get_genome) {
735
			# Get Genome
736
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
737
		}
738
		# Get inputs
739
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
740
		sample$total_reads = sum(sample$inputs[,4])
741
		if (!only_fetch) {
742
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
743
	  }
744
	  # Get TF outputs for Mnase_Seq samples
745
		if (sample$marker == "Mnase_Seq" & get_ouputs) {
746
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
747
			if (!only_fetch) {
748
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
749
  		}
750
		}
751
		samples[[length(samples) + 1]] = sample
752
	}
753
  return(samples)
754
}
755

    
756
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
757
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
758
region1, ##<< Original regions.
759
region2 ##<< Regions to substract.
760
) {
761
  rec_substract_region = function(region1, region2) {
762
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
763
    cur_fuzzy = region1[i,]
764
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
765
    if (length(inter_wp[,1]) > 0) {
766
      ret = c()
767
      for (j in 1:length(inter_wp[,1])) {
768
        cur_wp = inter_wp[j,]
769
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
770
          # remove cur_fuzzy
771
          ret = c()
772
          break
773
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
774
          # crop fuzzy
775
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1
776
          ret = cur_fuzzy
777
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
778
          # crop fuzzy
779
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1
780
          ret = cur_fuzzy
781
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
782
          # split fuzzy
783
          tmp_ret_fuzzy_1 = cur_fuzzy
784
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1
785
          tmp_ret_fuzzy_2 = cur_fuzzy
786
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1
787
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
788
          # print(ret)
789
          # ret = cur_fuzzy
790
          break
791
        } else {
792
          stop("WARNING NO ADAPTED CASE!")
793
        }
794
      }
795
      return(ret)
796
    } else {
797
      return(cur_fuzzy)
798
    }
799
  })
800
  }
801
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
802
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
803
  tmp_ulist = unlist(non_inter_fuzzy)
804
  tmp_names = names(tmp_ulist)[1:4]
805
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE)
806
  names(non_inter_fuzzy) = tmp_names
807
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)
808
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)
809
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
810
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
811
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]
812
  return(non_inter_fuzzy)
813
}
814

    
815
union_regions = function(# Aggregate regions that intersect themnselves.
816
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
817
regions ##<< The Regions to be aggregated
818
) {
819
  if (is.null(regions)) {return(regions)}
820
  if (nrow(regions) == 0) {return(regions)}
821
  old_length = length(regions[,1])
822
  new_length = 0
823

    
824
  while (old_length != new_length) {
825
    regions = regions[order(regions$lower_bound), ]
826
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE)
827
    vec_end_1 = which(regions$stop)
828
    if (length(vec_end_1) == 0) {
829
      vec_end_1 = c(length(regions$stop))
830
    }
831
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
832
      vec_end_1 = c(vec_end_1, length(regions$stop))
833
    }
834
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
835
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
836
      chr = regions$chr[vec_beg_1[i]]
837
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
838
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
839
      cur_index = regions$cur_index[vec_beg_1[i]]
840
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, cur_index=cur_index))
841
      })
842
    union = collapse_regions(union)
843
    old_length = length(regions[,1])
844
    new_length = length(union[,1])
845
    regions = union
846
  }
847
  return(union)
848
}
849

    
850
# remove_aligned_wp = function(# Remove wp nucs from common nucs list.
851
# ### It is based on common wp nucs index on nucs and region.
852
# strain_maps, ##<< Nuc maps.
853
# cur_index, ##<< The region of interest index.
854
# tmp_common_nucs, ##<< the list of wp nucs.
855
# strain##<< The strain to consider.
856
# ){
857
#   fuzzy_nucs = strain_maps[[strain]]
858
#   fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$cur_index == cur_index,]
859
#   fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
860
#   if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
861
#   if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
862
#   anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
863
#   fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
864
#   return(fuzzy_nucs)
865
# }
866

    
867
translate_regions = function(# Translate a list of regions from a strain ref to another.
868
### This function is an eloborated call to translate_cur.
869
regions, ##<< Regions to be translated.
870
combi, ##<< Combination of strains.
871
cur_index, ##<< The region of interest index.
872
config=NULL, ##<< GLOBAL config variable
873
roi ##<< The region of interest.
874
) {
875
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
876
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
877
    big_cur =  roi
878
    trs_tmp_regions_ref2 = translate_cur(tmp_regions_ref2, combi[1], config = config, big_cur = big_cur)
879
    if (is.null(trs_tmp_regions_ref2)) {
880
      return(NULL)
881
    } else {
882
      return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), cur_index=cur_index)))
883
    }
884
  })
885

    
886
  return(collapse_regions(tr_regions))
887
}
888

    
889
collapse_regions = function(# reformat an "apply  manipulated" list of regions
890
### Utils to reformat an "apply  manipulated" list of regions
891
regions ##< a list of regions
892
) {
893
  if (is.null(regions)) {
894
    return(NULL)
895
  } else {
896
    regions = do.call(rbind, regions)
897
    regions$chr = as.character(regions$chr)
898
    regions$chr = as.numeric(regions$chr)
899
    regions$lower_bound = as.numeric(regions$lower_bound)
900
    regions$upper_bound = as.numeric(regions$upper_bound)
901
    regions = regions[order(regions$lower_bound),]
902
    return(regions)
903
  }
904
}
905

    
906

    
907
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
908
### The fucntion is no more necessary since we remove "big_cur" bug in translate_cur function.
909
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
910
roi, ##<< The region of interest.
911
strain, ##<< The strain to consider.
912
config=NULL ##<< GLOBAL config variable
913
) {
914
  tr_roi = translate_cur(roi, strain, config = config)
915
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
916
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
917
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
918
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
919
  }
920
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
921
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
922
  }
923
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
924
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
925
  }
926
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
927
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
928
  }
929
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
930
  return(tmp_fuzzy_nucs)
931
}
932

    
933
get_all_reads = function(# Retrieve Reads
934
### Retrieve reads for a given marker, combi, form.
935
marker, ##<< The marker to considere.
936
combi, ##<< The starin combination to considere.
937
form="wp", ##<< The nuc form to considere.
938
config=NULL ##<< GLOBAL config variable
939
) {
940
	all_reads = NULL
941
  for (manip in c("Mnase_Seq", marker)) {
942
    if (form == "unr") {
943
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
944
  		tmp_res = read.table(file=out_filename, header=TRUE)
945
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
946
      tmp_res$form = form
947
    } else if (form == "wp") {
948
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
949
  		tmp_res = read.table(file=out_filename, header=TRUE)
950
      tmp_res$form = form
951
    } else if (form == "wpunr") {
952
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
953
  		tmp_res = read.table(file=out_filename, header=TRUE)
954
      tmp_res$form = "wp"
955
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
956
  		tmp_res2 = read.table(file=out_filename, header=TRUE)
957
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
958
      tmp_res2$form = "unr"
959
      tmp_res = rbind(tmp_res, tmp_res2)
960
    }
961
		if (is.null(all_reads)) {
962
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
963
		}
964
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
965
		all_reads = cbind(all_reads, tmp_res)
966
  }
967
  return(all_reads)
968
}
969

    
970
get_design = function(# Build the design for deseq
971
### This function build the design according sample properties.
972
marker, ##<< The marker to considere.
973
combi, ##<< The starin combination to considere.
974
all_samples ##<< Global list of samples.
975
) {
976
  off1 = 0
977
  off2 = 0
978
	manips = c("Mnase_Seq", marker)
979
	design_rownames = c()
980
	design_manip = c()
981
	design_strain = c()
982
  off2index = function(off) {
983
  	switch(toString(off),
984
  		"1"=c(0,1,1),
985
  	  "2"=c(1,0,1),
986
    	"3"=c(1,1,0),
987
  		c(1,1,1)
988
  		)
989
  }
990
	for (manip in manips) {
991
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
992
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
993
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
994
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
995
		} else {
996
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
997
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
998
			}
999
		}
1000
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))
1001
		for (strain in combi) {
1002
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
1003
			design_strain = c(design_strain, rep(strain, length(cols)))
1004
			design_rownames = c(design_rownames, cols)
1005
		}
1006
	}
1007
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
1008
	return(snep_design)
1009
}
1010

    
1011
plot_dist_samples = function(# Plot the distribution of reads.
1012
### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
1013
strain, ##<< The strain to considere.
1014
marker, ##<< The marker to considere.
1015
res, ##<< Data
1016
all_samples, ##<< Global list of samples.
1017
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
1018
) {
1019
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
1020
	snepCountTable = res[,cols]
1021
	snepDesign = data.frame(
1022
		row.names = cols,
1023
		manip = rep(marker, length(cols)),
1024
		strain = rep(strain, length(cols))
1025
		)
1026
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
1027
	sizeFactors = estimateSizeFactors(cdsFull)
1028
	# print(sizeFactors[[1]])
1029
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
1030
	if (NEWPLOT) {
1031
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))
1032
		NEWPLOT = FALSE
1033
	}
1034
	for (it in 1:length(sample_ids)) {
1035
		sample_id = sample_ids[it]
1036
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1037
	}
1038
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1039
}
1040

    
1041
analyse_design = function(# Launch deseq methods.
1042
### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
1043
snep_design, ##<< The design to considere.
1044
reads ##<< The data to considere.
1045
) {
1046
	snep_count_table = reads[, rownames(snep_design)]
1047
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1048
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1049
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1050

    
1051
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1052
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1053

    
1054
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1055
	return(list(fit1, fit0, snep_design, pvalsGLM))
1056
}
1057

    
1058

    
1059

    
1060

    
1061

    
1062

    
1063

    
1064

    
1065
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1066
### This function uses
1067
marker, ##<< The marker involved.
1068
combi, ##<< The strain combination involved.
1069
form, ##<< the nuc form involved.
1070
all_samples, ##<< Global list of samples.
1071
config=NULL ##<< GLOBAL config variable
1072
) {
1073
  # PRETREAT
1074
  snep_design = get_design(marker, combi, all_samples)
1075
  reads = get_all_reads(marker, combi, form, config=config)
1076
  # RUN ANALYSE
1077
  tmp_analyse = analyse_design(snep_design, reads)
1078
  # RESULTS
1079
	fit1 = tmp_analyse[[1]]
1080
	fit0 = tmp_analyse[[2]]
1081
  k = names(fit1)
1082
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1083
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1084
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1085
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1086
	snep_design = tmp_analyse[[3]]
1087
  # print(snep_design)
1088
	fdr = 0.0001
1089
	thres = FDR(reads$pvalsGLM, fdr)
1090
	reads$snep_index = reads$pvalsGLM < thres
1091
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = ""))
1092
  return(reads)
1093
  },  ex=function(){
1094
    marker = "H3K4me1"
1095
    combi = c("BY", "YJM")
1096
    form = "wpunr" # "wp" | "unr" | "wpunr"
1097
    # foo = get_sneps(marker, combi, form)
1098
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1099
})
1100

    
1101

    
1102

    
1103

    
1104

    
1105

    
1106

    
1107

    
1108

    
1109

    
1110

    
1111

    
1112

    
1113

    
1114

    
1115

    
1116

    
1117

    
1118

    
1119

    
1120

    
1121

    
1122

    
1123

    
1124
ROM2ARAB = function(# Roman to Arabic pair list.
1125
### Util to convert Roman to Arabic
1126
){list(
1127
  "I" = 1,
1128
  "II" = 2,
1129
  "III" = 3,
1130
  "IV" = 4,
1131
  "V" = 5,
1132
  "VI" = 6,
1133
  "VII" = 7,
1134
  "VIII" = 8,
1135
  "IX" = 9,
1136
  "X" = 10,
1137
  "XI" = 11,
1138
  "XII" = 12,
1139
  "XIII" = 13,
1140
  "XIV" = 14,
1141
  "XV" = 15,
1142
  "XVI" = 16,
1143
  "XVII" = 17,
1144
  "XVIII" = 18,
1145
  "XIX" = 19,
1146
  "XX" = 20
1147
)}
1148

    
1149
switch_pairlist = structure(function(# Switch a pairlist
1150
### Take a pairlist key:value and return the switched pairlist value:key.
1151
l ##<< The pairlist to switch.
1152
) {
1153
	ret = list()
1154
	for (name in names(l)) {
1155
		ret[[as.character(l[[name]])]] = name
1156
	}
1157
	ret
1158
### The switched pairlist.
1159
}, ex=function(){
1160
	l = list(key1 = "value1", key2 = "value2")
1161
	print(switch_pairlist(l))
1162
})
1163

    
1164
ARAB2ROM = function(# Arabic to Roman pair list.
1165
### Util to convert Arabicto Roman
1166
){switch_pairlist(ROM2ARAB())}
1167

    
1168

    
1169

    
1170

    
1171
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1172
### This fonction allow to acces to a specific part of the c2c file.
1173
strain1, ##<< the key strain
1174
strain2, ##<< the target strain
1175
chr=NULL, ##<< if defined, the c2c will filtered according to the chromosome value
1176
lower_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome upper than lower_bound
1177
upper_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome lower than upper_bound
1178
config=NULL##<<  GLOBAL config variable
1179
) {
1180
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1181
	# Launch c2c file
1182
	if (reverse) {
1183
		c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]
1184
	} else {
1185
		c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]
1186
	}
1187
	c2c = get_content(c2c_filename, "table", stringsAsFactors=FALSE)
1188
  # Filtering unagapped
1189
  c2c = c2c[c2c$V6=="-",]
1190
	# Reverse
1191
	if (reverse) {
1192
		tmp_col = c2c$V1
1193
		c2c$V1 = c2c$V7
1194
		c2c$V7 = tmp_col
1195
		tmp_col = c2c$V2
1196
		c2c$V2 = c2c$V9
1197
		c2c$V9 = tmp_col
1198
		tmp_col = c2c$V3
1199
		c2c$V3 = c2c$V10
1200
		c2c$V10 = tmp_col
1201
	}
1202
  if (!is.null(chr)) {
1203
  	if (strain1 == "BY") {
1204
  		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1205
  	} else if (strain1 == "RM") {
1206
  	  chro_1 = paste("supercontig_1.",chr,sep="")
1207
  	} else if (strain1 == "YJM") {
1208
  	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1209
  	}
1210
  	c2c = c2c[c2c$V1 == chro_1,]
1211
    if (!is.null(lower_bound)) {
1212
      if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound}
1213
      if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound}      
1214
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1215
    }
1216
    if (!is.null(upper_bound)) {
1217
      if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound}
1218
      if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound}      
1219
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1220
    }
1221
  }
1222
  return(c2c)
1223
# It retruns the appropriate c2c file part.
1224
}
1225

    
1226
translate_cur = structure(function(# Translate coords of a genome region.
1227
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1228
roi, ##<< Original genome region of interest.
1229
strain2, ##<< The strain in wich you want the genome region of interest.
1230
config=NULL, ##<< GLOBAL config variable
1231
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1232
) {
1233
	strain1 = roi$strain_ref
1234
	if (strain1 == strain2) {
1235
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1236
		return(roi)
1237
	}
1238

    
1239
	# Extract c2c file
1240
	if (!is.null(big_cur)) {
1241
  	# Dealing with big_cur
1242
		if (roi$strain_ref != big_cur$strain_ref) {
1243
      big_cur = translate_cur(big_cur, roi$strain_ref, config=config)
1244
    }
1245
    if (big_cur$end < big_cur$begin) {
1246
      tmp_var = big_cur$begin
1247
      big_cur$begin = big_cur$end
1248
      big_cur$end = tmp_var
1249
      big_cur$length = big_cur$end - big_cur$begin + 1
1250
    }
1251
    if (big_cur$chr!=roi$chr | roi$end > big_cur$end | roi$end < big_cur$begin | roi$begin > big_cur$end | roi$begin < big_cur$begin) {
1252
      print("WARNING! Trying to translate a roi not included in a big_cur.")
1253
      return(NULL)
1254
    }
1255
  	c2c = c2c_extraction(strain1, strain2, big_cur$chr, big_cur$begin, big_cur$end, config=config)
1256
  } else {
1257
    # No big_cur
1258
  	c2c = c2c_extraction(strain1, strain2, roi$chr, config=config)    
1259
  }
1260
  
1261
  #	Convert initial roi$chr into c2c format
1262
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1263
	begin_1 = roi$begin
1264
  end_1 = roi$end
1265
  if (reverse) {
1266
  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1267
    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1268
	} else {
1269
		tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1270
	  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1271
	}
1272

    
1273
	# Never happend conditions ...
1274
	{
1275
		if (length(tmptransfostart$V8) == 0) {
1276
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1277
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1278
  			tmp_c2c = c2c[c2c$V2>=begin_1,]
1279
  			begin_1 = min(tmp_c2c$V2)
1280
      } else {
1281
  			tmp_c2c = c2c[c2c$V3>=begin_1,]
1282
  			begin_1 = min(tmp_c2c$V3)
1283
      }
1284
			if (reverse) {
1285
		  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1286
			} else {
1287
				tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1288
			}
1289
			if (length(tmptransfostart$V8) == 0) {
1290
				if (!is.null(big_cur)) {
1291
					return(NULL)
1292
					tmptransfostart = c2c[c2c$V3>=big_cur$begin & c2c$V2<=big_cur$begin,]
1293
				} else {
1294
					print(tmptransfostart)
1295
					print(tmptransfostop)
1296
					stop("Never happend condition 1.")
1297
				}
1298
			}
1299
		}
1300
		if (length(tmptransfostop$V8) == 0) {
1301
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1302
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1303
  			tmp_c2c = c2c[c2c$V3<=end_1,]
1304
  			end_1 = max(tmp_c2c$V3)
1305
      } else {
1306
  			tmp_c2c = c2c[c2c$V2<=end_1,]
1307
  			end_1 = max(tmp_c2c$V2)
1308
      }
1309
			if (reverse) {
1310
		    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1311
			} else {
1312
			  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1313
			}
1314
			if (length(tmptransfostop$V8) == 0) {
1315
				if (!is.null(big_cur)) {
1316
					return(NULL)
1317
				  tmptransfostop = c2c[c2c$V3>=big_cur$end & c2c$V2<=big_cur$end,]
1318
				} else {
1319
					print(tmptransfostart)
1320
					print(tmptransfostop)
1321
					stop("Never happend condition 2.")
1322
				}
1323
			}
1324
		}
1325
		if (length(tmptransfostart$V8) != 1) {
1326
			# print("many start")
1327
			tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1328
			if (length(tmptransfostart$V8) != 1) {
1329
				print(tmptransfostart)
1330
				print(tmptransfostop)
1331
  			stop("Never happend condition 3.")
1332
			}
1333
		}
1334
		if (length(tmptransfostop$V8) != 1) {
1335
			# print("many stop")
1336
		  tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1337
			if (length(tmptransfostop$V8) != 1) {
1338
				print(tmptransfostart)
1339
				print(tmptransfostop)
1340
  			stop("Never happend condition 4.")
1341
			}
1342
		}
1343
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1344
			print(tmptransfostart)
1345
			print(tmptransfostop)
1346
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1347
		}
1348
	}
1349
  
1350
  # Deal with strand
1351
  if (tmptransfostart$V8 == 1) {
1352
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1353
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1354
  } else {
1355
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1356
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1357
  }
1358
	# Build returned roi
1359
	roi$strain_ref = strain2
1360
	if (roi$strain_ref == "BY") {
1361
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1362
	} else {
1363
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1364
	}
1365
  roi$begin = begin_2
1366
  roi$end = end_2
1367
	if (sign(roi$end - roi$begin) == 0) {
1368
		roi$length = 1
1369
	} else {
1370
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1371
	}
1372
  return(roi)
1373
}, ex=function(){
1374
	# Define new translate_cur function...
1375
	translate_cur = function(roi, strain2, config) {
1376
		strain1 = roi$strain_ref
1377
		if (strain1 == strain2) {
1378
			return(roi)
1379
		} else {
1380
		  stop("Here is my new translate_cur function...")
1381
		}
1382
	}
1383
	# Binding it by uncomment follwing lines.
1384
	# unlockBinding("translate_cur", as.environment("package:nm"))
1385
	# unlockBinding("translate_cur", getNamespace("nm"))
1386
	# assign("translate_cur", translate_cur, "package:nm")
1387
	# assign("translate_cur", translate_cur, getNamespace("nm"))
1388
	# lockBinding("translate_cur", getNamespace("nm"))
1389
	# lockBinding("translate_cur", as.environment("package:nm"))
1390
})
1391

    
1392

    
1393
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1394
### CURs are regions that can be aligned between the genomes
1395
diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
1396
min_cur_width = 4000, ##<< The minimum width of a CUR
1397
config = NULL ##<< GLOBAL config variable
1398
) {
1399

    
1400
  check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
1401
    c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1402
    check_homogeneity(c2c)
1403
  	if (length(c2c[,1]) == 0 ) {
1404
      stop("WARNING! checking overlapping for a region corresponding to an empty c2c.")
1405
    } else {
1406
  		lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)})
1407
  		upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)})
1408
  		tmp_index = order(lower_bounds)
1409
      lower_bounds = lower_bounds[tmp_index]
1410
      upper_bounds = upper_bounds[tmp_index]
1411
      tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)]
1412
      ov_index = which(tmp_diff < 0)
1413
      if(length(ov_index < 0) !=0 ) {
1414
        ov_index = ov_index[1]
1415
        print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep=""))
1416
        c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),]
1417
        print(c2c_corrupted)
1418
        return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1))
1419
      }
1420
      return(NULL)
1421
    }
1422
  }
1423

    
1424
  check_homogeneity = function(sub_c2c) {
1425
    tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3)
1426
    tmp_signs = tmp_signs[tmp_signs != 0]
1427
  	if (sum(tmp_signs[1]  != tmp_signs)) {
1428
  		print(paste("*************** ERROR, non homogenous region (sign)! ********************"))
1429
      print(tmp_signs)
1430
  	}
1431
    tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10)
1432
    tmp_signs2 = tmp_signs2[tmp_signs2 != 0]
1433
  	if (sum(tmp_signs2[1]  != tmp_signs2)) {
1434
  		print(paste("*************** ERROR, non homogenous region (sign2)! ********************"))
1435
      print(tmp_signs2)
1436
  	}
1437
  	if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1438
  		print("*************** ERROR, non homogenous region chrs or V8! ********************")
1439
  	}
1440
  }
1441

    
1442
  test_and_squeeze_rois = function(foo, config=NULL) {
1443
    is_it_ok = function(list1, list2) {
1444
      bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length))
1445
      ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0
1446
      if (!ok) {
1447
        print(bar[bar[,3] != 0 | bar[,6] != 0, ])
1448
      }
1449
      return (ok)
1450
    }
1451
    squeeze_rois = function(list1, list2) {
1452
      rois = apply(t(1:nrow(list1)), 2, function(i){
1453
        roi = list1[i,]
1454
        roi2 = list2[i,]
1455
        roi$begin = max(roi$begin, roi2$begin)
1456
        roi$end = min(roi$end, roi2$end)
1457
        roi$length =  roi$end - roi$begin + 1
1458
        return(roi)
1459
      })
1460
      return(do.call(rbind, rois))
1461
    }
1462
    # foo_orig = compute_inter_all_strain_curs2(config=config)
1463
    # foo = foo_orig
1464
    STOP = FALSE
1465
    nb_round = 0
1466
    while(!STOP) {
1467
      nb_round = nb_round + 1
1468
      print(paste("2-2 round #", nb_round, sep=""))
1469
      fooby = translate_curs(foo, "BY", config=config)
1470
      fooyjm = translate_curs(foo, "YJM", config=config)
1471
      fooyjmby = translate_curs(fooyjm, "BY", config=config)
1472
      if (!is_it_ok(fooby, fooyjmby)) {
1473
        print("case 1")
1474
        foo = squeeze_rois(fooby, fooyjmby)
1475
    		next
1476
      }
1477
      foorm = translate_curs(foo, "RM", config=config)
1478
      foormby = translate_curs(foorm, "BY", config=config)
1479
      if (!is_it_ok(fooby, foormby)) {
1480
        print("case 2")
1481
        foo = squeeze_rois(fooby, foormby)
1482
    		next
1483
      }
1484
      fooyjmrm = translate_curs(fooyjm, "RM", config=config)
1485
      fooyjmrmyjm = translate_curs(fooyjmrm, "YJM", config=config)
1486
      if (!is_it_ok(fooyjm, fooyjmrmyjm)) {
1487
        print("case 3")
1488
        foo = squeeze_rois(fooyjm, fooyjmrmyjm)
1489
        next
1490
      }
1491
      foormyjm = translate_curs(foorm, "YJM", config=config)
1492
      foormyjmrm = translate_curs(foormyjm, "RM", config=config)
1493
      if (!is_it_ok(foorm, foormyjmrm)) {
1494
        print("case 4")
1495
        foo = squeeze_rois(foorm, foormyjmrm)
1496
        next
1497
      }
1498
      foo = translate_curs(foo, "BY", config=config)
1499
      STOP = TRUE
1500
    }
1501
    STOP = FALSE
1502
    nb_round = 0
1503
    while(!STOP) {
1504
      nb_round = nb_round + 1
1505
      print(paste("3-3 round #", nb_round, sep=""))
1506
      fooby = translate_curs(foo, "BY", config=config)
1507
      foobyrm = translate_curs(fooby, "RM", config=config)
1508
      foobyrmyjm = translate_curs(foobyrm, "YJM", config=config)
1509
      foobyrmyjmby = translate_curs(foobyrmyjm, "BY", config=config)
1510
      if (!is_it_ok(fooby, foobyrmyjmby)) {
1511
        print("case 1")
1512
        foo = squeeze_rois(fooby, foobyrmyjmby)
1513
      }
1514
      fooby = translate_curs(foo, "BY", config=config)
1515
      foobyyjm = translate_curs(fooby, "YJM", config=config)
1516
      foobyyjmrm = translate_curs(foobyyjm, "RM", config=config)
1517
      foobyyjmrmby = translate_curs(foobyyjmrm, "BY", config=config)
1518
      if (!is_it_ok(fooby, foobyyjmrmby)) {
1519
        print("case 2")
1520
        foo = squeeze_rois(fooby, foobyyjmrmby)
1521
        next
1522
      }
1523
      foo = translate_curs(foo, "BY", config=config)
1524
      STOP = TRUE
1525
    }
1526
    print("end")
1527
    return(foo)
1528
  }
1529

    
1530
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) {
1531
    c2c = c2c_extraction(strain1, strain2, config=config)
1532
    # computing diffs
1533
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
1534
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
1535
    # Filtering
1536
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1537
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1538
    rois = apply(t(1:length(indexes_start)), 2, function(i) {
1539
      if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start)))
1540
      returned_rois = NULL
1541
  		start = indexes_start[i]
1542
  		stop = indexes_stop[i]
1543
  		sub_c2c = c2c[start:stop,]
1544
      check_homogeneity(sub_c2c)
1545
  		if (strain1 == "BY") {
1546
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1547
  		} else {
1548
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1549
  		}
1550
  		roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1)
1551
  		roi$length = roi$end - roi$begin + 1
1552
  		if (roi$length >= min_cur_width) {
1553
  			lower_bound = roi$begin
1554
  			upper_bound = roi$end
1555
        check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1556
        while(!is.null(check)) {
1557
          # print(check)
1558
      		roi1 = roi
1559
          roi1$end = check[[1]]
1560
      		roi1$length = roi1$end - roi1$begin + 1
1561
      		if (roi1$length >= min_cur_width) {
1562
            returned_rois = dfadd(returned_rois,roi1)
1563
          }
1564
          roi$begin = check[[2]]
1565
      		roi$length = roi$end - roi$begin + 1
1566
      		if (roi$length >= min_cur_width) {
1567
      			lower_bound = min(roi$begin, roi$end)
1568
      			upper_bound = max(roi$begin, roi$end)
1569
            check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1570
          } else {
1571
            check = NULL
1572
            roi = NULL
1573
          }
1574
        }
1575
        returned_rois = dfadd(returned_rois,roi)
1576
  	  }
1577
    })
1578
    rois = do.call(rbind,rois)
1579
    rois = rois[order(as.numeric(rois$chr), rois$begin), ]
1580
  	return(rois)
1581
  }
1582

    
1583
  translate_curs = function(rois, target_strain, config) {
1584
    tr_rois = apply(t(1:nrow(rois)), 2, function(i){
1585
      roi = rois[i,]
1586
      tr_roi = translate_cur(roi, target_strain, config=config)  
1587
      tmp_begin = min(tr_roi$begin, tr_roi$end)
1588
      tmp_end = max(tr_roi$begin, tr_roi$end)
1589
      tr_roi$begin = tmp_begin
1590
      tr_roi$end = tmp_end
1591
      tr_roi$length =  tr_roi$end - tr_roi$begin + 1
1592
      return(tr_roi)
1593
    })
1594
    tr_rois = do.call(rbind, tr_rois)
1595
    return(tr_rois)
1596
  }
1597

    
1598
  combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"))
1599
  rois = list()
1600
  for (combi in combis) {
1601
    strain1 = combi[1]
1602
    strain2 = combi[2]
1603
    print(paste(strain1, strain2))
1604
    rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1605
    strain1 = combi[2]
1606
    strain2 = combi[1]
1607
    print(paste(strain1, strain2))
1608
    rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1609
    tr_rois_rev = translate_curs(rois_rev, combi[1], config)      
1610
    region1 = rois_fwd
1611
    region2 = tr_rois_rev
1612
    rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev)
1613
  }
1614
  reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
1615
  reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
1616
  tr_reducted_1_rois = translate_curs(reducted_1_rois, "RM", config)  
1617
  reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]])
1618
  reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ]
1619
  reducted_rois = translate_curs(reducted_2_rois, "BY", config)  
1620
  reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
1621
  squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
1622
  return (squeezed_rois)
1623
}
1624

    
1625
intersect_region = function(# Returns the intersection of 2 list on regions.
1626
### This function...
1627
region1, ##<< Original regions.
1628
region2 ##<< Regions to intersect.
1629
) {
1630
  intersection = apply(t(1:nrow(region1)), 2, function(i) {
1631
    roi1 = region1[i, ]
1632
    sub_regions2 = region2[region2$chr == roi1$chr, ]
1633
    sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end |
1634
                                roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end |
1635
                                sub_regions2$begin < roi1$begin  & roi1$end < sub_regions2$end
1636
                                , ]
1637
    if (nrow(sub_regions2) == 0) {
1638
      print("removing a region")
1639
      return(NULL)
1640
    } else if (nrow(sub_regions2) > 1) {
1641
      print("more than one region in intersect_region")          
1642
      return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])})))
1643
    } else {
1644
      roi2 = sub_regions2[1,]
1645
      if (roi1$begin < roi2$begin) {
1646
        print("not the same begin")
1647
        roi1$begin = roi2$begin
1648
        roi1$length =  roi1$end - roi1$begin + 1
1649
      }
1650
      if (roi1$end > roi2$end) {
1651
        print("not the same end")
1652
        roi1$end = roi2$end
1653
        roi1$length =  roi1$end - roi1$begin + 1
1654
      }
1655
      return(roi1)
1656
    }         
1657
  })
1658
  return(do.call(rbind,intersection))
1659
}
1660

    
1661
build_replicates = structure(function(# Stage replicates data
1662
### This function loads in memory data corresponding to the given experiments.
1663
expe, ##<< a list of vector corresponding to vector of replicates.
1664
roi, ##<< the region that we are interested in.
1665
only_fetch=FALSE, ##<< filter or not inputs.
1666
get_genome=FALSE,##<< Load or not corresponding genome.
1667
all_samples, ##<< Global list of samples.
1668
config=NULL ##<< GLOBAL config variable.
1669
) {
1670
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1671
  	samples=list()
1672
  	for (i in samples_ids) {
1673
  		sample = as.list(all_samples[all_samples$id==i,])
1674
      sample$orig_roi = roi
1675
      sample$roi = translate_cur(roi, sample$strain, config = config)
1676
  		if (get_genome) {
1677
  			# Get Genome
1678
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1679
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1680
  		}
1681
  		# Get inputs
1682
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1683
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE)
1684
  		sample$total_reads = sum(sample$inputs[,4])
1685
  		if (!only_fetch) {
1686
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
1687
  	  }
1688
  	  # Get TF outputs for Mnase_Seq samples
1689
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {
1690
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1691
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1692
  			if (!only_fetch) {
1693
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1694
    		}
1695
  		}
1696
  		samples[[length(samples) + 1]] = sample
1697
  	}
1698
  	return(samples)
1699
  }
1700
	replicates = list()
1701
	for(samples_ids in expe) {
1702
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1703
		replicates[[length(replicates) + 1]] = samples
1704
	}
1705
	return(replicates)
1706
  }, ex = function() {
1707
    # library(rjson)
1708
    # library(nucleominer)
1709
    #
1710
    # # Read config file
1711
    # json_conf_file = "nucleo_miner_config.json"
1712
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1713
    # # Read sample file
1714
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
1715
    # # here are the sample ids in a list
1716
    # expes = list(c(1))
1717
    # # here is the region that we wnt to see the coverage
1718
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY")
1719
    # # it displays the corverage
1720
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1721
    # out = watch_samples(replicates, config$READ_LENGTH,
1722
    #       plot_coverage = TRUE,
1723
    #       plot_squared_reads = FALSE,
1724
    #       plot_ref_genome = FALSE,
1725
    #       plot_arrow_raw_reads = FALSE,
1726
    #       plot_arrow_nuc_reads = FALSE,
1727
    #       plot_gaussian_reads = FALSE,
1728
    #       plot_gaussian_unified_reads = FALSE,
1729
    #       plot_ellipse_nucs = FALSE,
1730
    #       plot_wp_nucs = FALSE,
1731
    #       plot_wp_nuc_model = FALSE,
1732
    #       plot_common_nucs = FALSE,
1733
    #       height = 50)
1734
  })
1735

    
1736

    
1737

    
1738

    
1739

    
1740

    
1741

    
1742

    
1743

    
1744

    
1745

    
1746

    
1747

    
1748

    
1749

    
1750

    
1751

    
1752

    
1753

    
1754

    
1755

    
1756

    
1757

    
1758

    
1759

    
1760

    
1761

    
1762

    
1763

    
1764

    
1765

    
1766

    
1767

    
1768

    
1769

    
1770

    
1771

    
1772

    
1773

    
1774

    
1775

    
1776

    
1777

    
1778

    
1779

    
1780

    
1781

    
1782

    
1783

    
1784

    
1785

    
1786

    
1787

    
1788

    
1789

    
1790

    
1791

    
1792

    
1793

    
1794

    
1795

    
1796

    
1797

    
1798

    
1799

    
1800

    
1801

    
1802

    
1803

    
1804

    
1805

    
1806

    
1807

    
1808

    
1809

    
1810

    
1811

    
1812

    
1813

    
1814

    
1815

    
1816

    
1817

    
1818
watch_samples = function(# Watching analysis of samples
1819
### This function allows to view analysis for a particuler region of the genome.
1820
replicates, ##<< replicates under the form...
1821
read_length, ##<< length of the reads
1822
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
1823
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
1824
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
1825
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
1826
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
1827
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
1828
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
1829
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
1830
change_col = TRUE, ##<< Change the color of each nucleosome.
1831
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
1832
plot_fuzzy_nucs = TRUE,  ##<< Plot (or not) cluster of fuzzy
1833
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
1834
plot_common_nucs = FALSE,  ##<< Plot (or not) aligned reads.
1835
plot_common_unrs = FALSE,  ##<< Plot (or not) unaligned nucleosomal refgions (UNRs).
1836
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
1837
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
1838
plot_sample_id = FALSE, ##<<  Plot (or not) the sample id for each sample.
1839
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
1840
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
1841
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1842
main=NULL, ##<< main title of the produced plot
1843
xlab=NULL, ##<< xlab of the produced plot
1844
ylab="#reads (per million reads)", ##<< ylab of the produced plot
1845
config=NULL ##<< GLOBAL config variable
1846
){
1847
  returned_list = list()
1848
  # Computing global display parameters
1849
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
1850
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
1851
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
1852
  } else {
1853
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
1854
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
1855
  }
1856
	base_glo = 0
1857
	nb_rank_glo = 0
1858
  for (samples in replicates) {
1859
  	nb_rank_glo = nb_rank_glo + length(samples)
1860
  }
1861
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
1862
	y_min_glo = min(ylim_glo)
1863
	y_max_glo = max(ylim_glo)
1864
  delta_y_glo = y_max_glo - y_min_glo
1865
  # Plot main frame
1866
  if (is.null(xlab)) {
1867
    xlab = paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr)
1868
  }
1869
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab=ylab, xlab=xlab, main=main )
1870
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
1871
  # Go
1872
	replicates_wp_nucs = list()
1873
  wp_maps = list()
1874
  fuzzy_maps = list()
1875
  for (replicate_rank in 1:length(replicates)) {
1876
		# Computing replicate parameters
1877
		nb_rank = length(samples)
1878
		base = (replicate_rank-1) * height * nb_rank
1879
		ylim = c(base, base + height * nb_rank)
1880
		y_min = min(ylim)
1881
		y_max = max(ylim)
1882
	  delta_y = y_max - y_min
1883
		samples = replicates[[replicate_rank]]
1884
		for (sample_rank in 1:length(samples)) {
1885
			# computing sample parameters
1886
			sample = samples[[sample_rank]]
1887
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
1888
      if (plot_sample_id) {
1889
  			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
1890
      }
1891

    
1892
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
1893
			  x_min = sample$roi[["begin"]]
1894
			  x_max = sample$roi[["end"]]
1895
		  } else {
1896
			  x_min = - sample$roi[["begin"]]
1897
			  x_max = - sample$roi[["end"]]
1898
		  }
1899
			shift = x_min_glo - x_min
1900
	    # Plot Genome seq
1901
			if (plot_ref_genome) {
1902
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
1903
		  }
1904
			# Plot reads
1905
			reads = sample$inputs
1906
			signs = sign_from_strand(reads[,3])
1907
			if (plot_arrow_raw_reads) {
1908
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev,
1909
				col=1, length=0.15/nb_rank)
1910
			}
1911
	    if (plot_squared_reads) {
1912
        # require(plotrix)
1913
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
1914
			}
1915
	    if (plot_coverage) {
1916
        if (length(reads[,1]) != 0) {
1917
          step_h = sign(x_min) * signs * reads[,4]
1918
          step_b = sign(x_min) * reads[,2] + shift
1919
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
1920
          steps_x = min(step_b, step_e):max(step_b, step_e)
1921
          steps_y = rep(0, length(steps_x))
1922
          for (i in 1:length(step_h)) {
1923
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
1924
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
1925
          }
1926
          tmp_index = which(steps_y != 0)
1927
          steps_x = steps_x[tmp_index]
1928
          steps_y = steps_y[tmp_index]
1929
          tmp_current_level = 0
1930
          for (i in 1:length(steps_y)) {
1931
            steps_y[i] = tmp_current_level + steps_y[i]
1932
            tmp_current_level = steps_y[i]
1933
          }
1934
          steps_y = c(0, steps_y)
1935
          steps_y = steps_y * 1000000/sample$total_reads
1936
        } else {
1937
          steps_y = c(0, 0, 0)
1938
          steps_x = c(x_min, x_max)
1939
        }
1940
        # print(steps_x)
1941
        # print(steps_y)
1942
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
1943
        abline(y_lev,0)
1944
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
1945
			}
1946
			# Plot nucs
1947
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
1948
				nucs = sample$outputs
1949
				if (length(nucs$center) > 0) {
1950
					col = 1
1951
		      for (i in 1:length(nucs$center)) {
1952
            foo<<-nucs
1953
            if (change_col) {
1954
  						col = col + 1
1955
              } else {
1956
                col = "blue"
1957
              }
1958
		        nuc = nucs[i,]
1959
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
1960
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
1961
						total_involved_reads = sum(involved_reads[,4])
1962
						if (plot_arrow_nuc_reads ) {
1963
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev,
1964
							col=col, length=0.15/nb_rank)
1965
						}
1966
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
1967
  						flatted_reads = flat_reads(involved_reads, nuc$width)
1968
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1969
		  			}
1970
	          if (plot_gaussian_reads ) {
1971
							flatted_reads = flat_reads(involved_reads, nuc$width)
1972
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1973
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
1974
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
1975
	          }
1976
	          if (plot_gaussian_unified_reads ) {
1977
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=1)
1978
	          }
1979
	          if (plot_ellipse_nucs) {
1980
				      # require(plotrix)
1981
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col)
1982
						}
1983
		      }
1984
		    } else {
1985
		      print("WARNING! No nucs to print.")
1986
		    }
1987
			}
1988
	  }
1989

    
1990
	  # Plot wp nucs
1991
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) {
1992
			if (samples[[1]]$marker == "Mnase_Seq") {
1993
				if (is.null(aggregated_intra_strain_nucs)) {
1994
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
1995
				} else {
1996
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
1997
				}
1998
		  } else {
1999
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
2000
		  }
2001
      if (plot_chain) {
2002
        tf_nucs = lapply(wp_nucs, function(nuc) {
2003
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
2004
            tmp_nuc = tmp_nuc[[1]]
2005
            tmp_nuc$inputs = NULL
2006
            tmp_nuc$original_reads = NULL
2007
            tmp_nuc$wp = nuc$wp
2008
            # print(tmp_nuc)
2009
            return(tmp_nuc)
2010
          })
2011
          return(do.call(rbind, bar))
2012
        })
2013
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
2014
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2
2015
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
2016
        tmp_y_prev = tmp_y[-length(tmp_y)]
2017
        tmp_y_next = tmp_y[-1]
2018
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
2019
        tmp_track = unlist(tf_nucs$track)
2020
        tmp_track_prev = tmp_track[-length(tmp_track)]
2021
        tmp_track_next = tmp_track[-1]
2022
        # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
2023
        if (is.null(config$TRACK_LOD_OFFSET)) {
2024
          config$TRACK_LOD_OFFSET = 0
2025
        }
2026
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LOD_OFFSET * 25
2027
        tmp_x_prev = tmp_x[-length(tmp_x)]
2028
        tmp_x_next = tmp_x[-1]
2029
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
2030
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
2031
        tmp_lod_inter =signif(unlist(tf_nucs$lod_score)[-1], 2)
2032
        new_tmp_x = c()
2033
        new_tmp_y = c()
2034
        index_odd = 1:length(tmp_x) * 2 - 1
2035
        index_even = (1:(length(tmp_x) - 1)) * 2
2036
        new_tmp_x[index_odd] = tmp_x
2037
        new_tmp_y[index_odd] = tmp_y
2038
        new_tmp_x[index_even] = tmp_x_inter
2039
        new_tmp_y[index_even] = tmp_y_inter
2040
        lines(new_tmp_x , new_tmp_y, lw=2)
2041
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
2042
        points(tmp_x, tmp_y, cex=4, lw=2)
2043
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
2044
        if (is.null(config$LEGEND_LOD_POS)) {
2045
          pos = 2
2046
        } else {
2047
          pos = config$LEGEND_LOD_POS
2048
        }
2049
        text(tmp_x_inter, tmp_y_inter, tmp_lod_inter, cex=1.5, pos=pos) 
2050
      }
2051

    
2052
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
2053
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
2054
        strain = samples[[1]]$strain
2055
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo")
2056
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
2057

    
2058
        if (plot_fuzzy_nucs) {
2059
          fuzzy_map = fuzzy_maps[[strain]]
2060
          if (!is.null(fuzzy_map)) {
2061
            if (nrow(fuzzy_map) > 0) {
2062
              rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1)                    
2063
            }
2064
          }
2065
        } 
2066

    
2067
        if (plot_wp_nucs) {
2068
    			for (wp_nuc in wp_nucs) {
2069
    				if (wp_nuc$wp){
2070
    					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2071
    					if (plot_wp_nuc_model) {
2072
      					all_original_reads = c()
2073
      					for(initial_nuc in wp_nuc$nucs) {
2074
      						all_original_reads = c(all_original_reads, initial_nuc$original_reads)
2075
      					}
2076
      					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2077
    					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
2078
    				  }
2079
  				  }
2080
  				}
2081
  			}
2082
      }
2083
		}
2084
	}
2085

    
2086
	if (plot_common_nucs) {
2087
    if (is.null(aligned_inter_strain_nucs)) {
2088
      aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2089
      if (!is.null(aligned_inter_strain_nucs)) {
2090
        aligned_inter_strain_nucs$cur_index = "foo" 
2091
      }
2092
    }
2093

    
2094
    #Plot common wp nucs
2095
    mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2096
    for (replicate_rank in 1:length(replicates)) {
2097
      nb_rank[[replicate_rank]] = length(samples)
2098
      base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2099
      ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2100
      y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2101
      y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2102
      delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2103
      mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2104

    
2105
      samples = replicates[[replicate_rank]]
2106
      for (sample_rank in 1:length(samples)) {
2107
        sample = samples[[sample_rank]]
2108
        y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2109
        if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2110
          x_min[[replicate_rank]] = sample$roi[["begin"]]
2111
          x_max[[replicate_rank]] = sample$roi[["end"]]
2112
        } else {
2113
          x_min[[replicate_rank]] = - sample$roi[["begin"]]
2114
          x_max[[replicate_rank]] = - sample$roi[["end"]]
2115
        }
2116
        shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2117
      }
2118
    }
2119
    print(aligned_inter_strain_nucs)
2120
    if (!is.null(aligned_inter_strain_nucs)) {
2121
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2122
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2123
        tmp_xs = tmp_ys = c()
2124
        for (replicate_rank in 1:length(replicates)) {
2125
          samples = replicates[[replicate_rank]]
2126
          strain = samples[[1]]$strain
2127
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2128
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2129
        }
2130
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]]), cex=dev.size()[1]*200/(x_max[[1]]-x_min[[1]]), pch=19)
2131
      }
2132
    }
2133
    
2134
    if (plot_common_unrs) {
2135
      combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain)
2136
      roi = as.list(samples[[1]]$roi)
2137
      cur_index = "foo"
2138
      common_nuc_results = list()
2139
      common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs
2140
      unrs = get_unrs(combi, roi, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) 
2141
      rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], border=4, lw=3, col=adjustcolor(4, alpha.f = 0.05))        
2142
    }
2143

    
2144
	}
2145
  return(returned_list)
2146
}
2147

    
2148

    
2149

    
2150
get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain.
2151
### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions.
2152
wp_map, ##<< Well positionned nucleosomes map.
2153
roi, ##<< The region of interest.
2154
strain, ##<< The strain we want to extracvt the fuzzy map.
2155
config=NULL ##<< GLOBAL config variable.
2156
) {
2157
  fuzzy_map = wp_map[wp_map$wp==0, ]
2158
  if (nrow(fuzzy_map) > 0) {
2159
    fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,])
2160
    if (!is.null(fuzzy_map)) {
2161
      fuzzy_map = union_regions(fuzzy_map)
2162
      fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config)
2163
    }
2164
  }
2165
  return(fuzzy_map)
2166
}
2167

    
2168

    
2169

    
2170

    
2171

    
2172
get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs).
2173
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline.
2174
combi, ##<< The strain combination to consider.
2175
roi, ##<< The region of interest.
2176
cur_index, ##<< The region of interest index.
2177
wp_maps, ##<< Well positionned nucleosomes maps.
2178
fuzzy_maps, ##<< Fuzzy nucleosomes maps.
2179
common_nuc_results, ##<< Common wp nuc maps
2180
config=NULL ##<< GLOBAL config variable
2181
) {
2182
  # print(cur_index)
2183

    
2184
  tmp_combi_key = paste(combi[1], combi[2], sep="_")
2185
  tmp_common_nucs = common_nuc_results[[tmp_combi_key]]
2186
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$cur_index == cur_index, ]
2187

    
2188
  # print(paste("Dealing with unr from", combi[1]))
2189
  tmp_fuzzy = fuzzy_maps[[combi[1]]]
2190
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2191
  tmp_wp = wp_maps[[combi[1]]]
2192
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2193
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2194
  # Let's go!
2195
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2196
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2197
      return(NULL)
2198
    } else {
2199
      return (index_nuc)
2200
    }
2201
  }))
2202
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2203
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2204
  if (length(tmp_unr) != 0) {
2205
    tmp_unr = union_regions(tmp_unr)
2206
  }
2207
  tmp_unr_nucs_1 = tmp_unr
2208
  if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)}
2209
  agg_unr_1 = tmp_unr_nucs_1
2210

    
2211
  # print(paste("Dealing with unr from ", combi[2]))
2212
  tmp_fuzzy = fuzzy_maps[[combi[2]]]
2213
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2214
  tmp_wp = wp_maps[[combi[2]]]
2215
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2216
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2217
  # Let's go!
2218
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2219
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2220
      return(NULL)
2221
    } else {
2222
      return (index_nuc)
2223
    }
2224
  }))
2225
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2226
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2227
  if (length(tmp_unr) != 0) {
2228
    tmp_unr = union_regions(tmp_unr)
2229
  }
2230
  tmp_unr_nucs_2 = tmp_unr
2231
  if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)}
2232
  agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config)
2233
  tr_agg_unr_2 = translate_regions(agg_unr_2, combi, cur_index, roi=roi, config=config)
2234
  tr_agg_unr_2 = union_regions(tr_agg_unr_2)
2235

    
2236
  # print("Dealing with unr from both...")
2237
  all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2))
2238

    
2239
  # print(paste("Dealing with wp from", combi[1]))
2240
  tmp_wp = wp_maps[[combi[1]]]
2241
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2242
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2243
  # Let's go!
2244
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2245
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2246
      return (index_nuc)
2247
    } else {
2248
      return(NULL)
2249
    }
2250
  }))
2251
  wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2252
  
2253
  # print(paste("Dealing with wp from", combi[2]))
2254
  tmp_wp = wp_maps[[combi[2]]]
2255
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2256
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2257
  # Let's go!
2258
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2259
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2260
      return (index_nuc)
2261
    } else {
2262
      return(NULL)
2263
    }
2264
  }))
2265
  wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2266
  wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config)
2267
  if (nrow(wp_nucs_2) == 0) {
2268
    tr_wp_nucs_2 = wp_nucs_2
2269
  } else {
2270
    tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, cur_index, roi=roi, config=config)      
2271
  }
2272

    
2273
  # print("Dealing with wp from both...")
2274
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
2275

    
2276
  # print("Dealing with unr and wp...")
2277
  non_inter_unr = substract_region(all_unr, all_wp)
2278
  non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config)
2279
  if (is.null(non_inter_unr)) { return(NULL) }
2280
  non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound
2281
  # non_inter_fuzzy = non_inter_fuzzy[non_inter_fuzzy$len >= min_fuzz_width,]
2282

    
2283
  non_inter_unr$index_nuc = 1:length(non_inter_unr[,1])
2284
  return (non_inter_unr)
2285
}
2286

    
2287

    
2288