Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ cc54d805

Historique | Voir | Annoter | Télécharger (87,29 ko)

1
FDR = structure(function#  False Discovery Rate
2
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
3
##author<< Gael Yvert,
4
(
5
x, ##<< A vector x of independent p-values.
6
FDR ##<< The specified FDR.
7
) {
8
  x <- sort(na.omit(x))
9
  N = length(x)
10
  i = 1;
11
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
12
  if (i == 1)
13
    return (NA)
14
  else
15
    return( x[i-1] )
16
### Return the the corresponding cutoff.
17
}, ex=function(){
18
  print("example")
19
})
20

    
21

    
22

    
23
llr_score_nvecs = structure(function # Likelihood ratio
24
### Compute the log likelihood ratio of two or more set of value.
25
(
26
  xs ##<< list of vectors.
27
) {
28
  l = length(xs)
29
  if (l < 1) {
30
    return(NA)
31
  }
32
  if (l == 1) {
33
    return(1)
34
  }
35
  sumllX = 0
36
  for (i in 1:l) {
37
    x = xs[[i]]
38
  	if (length(x) <= 1) {
39
  		return(NA)
40
  	}
41
    meanX = mean(x)
42
    sdX = sd(x)
43
    llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
44
    sumllX = sumllX + llX
45
  }
46
  meanXglo = mean(unlist(xs))
47
  sdXglo = sd(unlist(xs))
48
  llXYZ = sum(log(dnorm(unlist(xs),mean=meanXglo,sd=sdXglo)))
49
  ratio = sumllX - llXYZ
50
  return(ratio)
51
### Returns the log likelihood ratio.
52
}, ex=function(){
53
  # LLR score for 2 set of values
54
  mean1=5; sd1=2; card2 = 250
55
  mean2=6; sd2=3; card1 = 200
56
  x1 = rnorm(card1, mean1, sd1)
57
  x2 = rnorm(card2, mean2, sd2)
58
  min = floor(min(c(x1,x2)))
59
  max = ceiling(max(c(x1,x2)))
60
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
61
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
62
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
63
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
64
  llr_score_nvecs(list(x1,x2))
65
 })
66

    
67
dfadd = structure(function# Adding list to a dataframe.
68
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
69
	(df, ##<<  A dataframe
70
		l ##<<  A list
71
	) {
72
  if (is.null(df)) {
73
    df = data.frame(l,stringsAsFactors=FALSE)
74
  } else {
75
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
76
  }
77
  return(df)
78
### Return the dataframe \emph{df}.
79
}, ex=function(){
80
		## Here dataframe is NULL
81
		print(df)
82
		df = NULL
83

    
84
		# Initialize df
85
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
86
		print(df)
87

    
88
		# Adding elements to df
89
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
90
		print(df)
91
})
92

    
93

    
94
sign_from_strand = function(
95
### Get the sign of strand
96
strands) {
97
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
98
### If strand in forward then returns 1 else returns -1
99
}
100

    
101
flat_reads = function(
102
### Extract reads coordinates from TempleteFilter input sequence
103
reads, ##<< TemplateFilter input reads
104
nuc_width ##<< Width used to shift F and R reads.
105
) {
106
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
107
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
108
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
109
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
110
### Returns a list of F reads, R reads and joint/shifted F and R reads.
111
}
112

    
113

    
114

    
115
filter_tf_outputs = function(# Filter TemplateFilter outputs
116
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlapping threshold.
117
tf_outputs, ##<< TemplateFilter outputs.
118
chr, ##<< Chromosome observed, here chr is an integer.
119
x_min, ##<< Coordinate of the first bp observed.
120
x_max, ##<< Coordinate of the last bp observed.
121
nuc_width = 160, ##<< Nucleosome width.
122
ol_bp = 59, ##<< Overlap Threshold.
123
corr_thres = 0.5 ##<< Correlation threshold.
124
) {
125
  if (x_min < 0) {
126
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= -x_max & tf_outputs$center + tf_outputs$width/2 <=  -x_min,]
127
	} else {
128
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= x_min & tf_outputs$center + tf_outputs$width/2 <= x_max,]
129
  }
130
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
131
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
132
  tf_outputs = tf_outputs[tf_outputs$correlation.score >= corr_thres,]
133
  tf_outputs = tf_outputs[order(tf_outputs$correlation.score, decreasing=TRUE),]
134
  i = 1
135
  while (i <= length(tf_outputs[,1])) {
136
    lb = tf_outputs[i,]$lower_bound
137
    ub = tf_outputs[i,]$upper_bound
138
    tf_outputs = tf_outputs[!(tf_outputs$lower_bound <= (ub-ol_bp) & tf_outputs$upper_bound > ub) & !(tf_outputs$upper_bound >= (lb+ol_bp) & tf_outputs$lower_bound < lb),]
139
    i = i+1
140
  }
141
  return(tf_outputs)
142
### Returns filtered TemplateFilter Outputs
143
}
144

    
145

    
146

    
147
filter_tf_inputs = function(# Filter TemplateFilter inputs
148
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
149
inputs, ##<< TF inputs to be filtered.
150
chr, ##<< Chromosome observed, here chr is an integer.
151
x_min, ##<< Coordinate of the first bp observed.
152
x_max, ##<< Coordinate of the last bp observed.
153
nuc_width = 160, ##<< Nucleosome width.
154
only_f = FALSE, ##<< Filter only F reads.
155
only_r = FALSE, ##<< Filter only R reads.
156
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
157
) {
158
	if (only_f) {
159
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
160
	} else if (only_r) {
161
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]			
162
	} else {
163
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
164
	}
165
  if (!filter_for_coverage) {
166
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
167
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]    
168
  }
169
	return(inputs)
170
### Returns filtred inputs.
171
}
172

    
173

    
174

    
175
# filter_tf_inputs = function(# Filter TemplateFilter inputs
176
# ### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
177
# inputs, ##<< TF inputs to be filtered.
178
# chr, ##<< Chromosome observed, here chr is an integer.
179
# x_min, ##<< Coordinate of the first bp observed.
180
# x_max, ##<< Coordinate of the last bp observed.
181
# nuc_width = 160, ##<< Nucleosome width.
182
# only_f = FALSE, ##<< Filter only F reads.
183
# only_r = FALSE, ##<< Filter only R reads.
184
# filter_for_coverage = FALSE, ##<< Does it filter for plot coverage?
185
# USE_DPLYR = FALSE ##<< Use dplyr lib to filter reads.
186
# ) {
187
#   n = names(inputs)
188
#
189
#   if (!USE_DPLYR) {
190
#     if (only_f) {
191
#       inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
192
#     } else if (only_r) {
193
#       inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
194
#     } else {
195
#       inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
196
#     }
197
#   } else {
198
#     names(inputs) = c("chr", "pos", "str", "lev")
199
#     if (only_f) {
200
#       inputs_out = filter(inputs, chr == chr,  pos >= x_min - nuc_width, str == "F", pos <= x_max + nuc_width)
201
#     } else if (only_r) {
202
#       inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, str == "R" & pos <= x_max + nuc_width)
203
#     } else {
204
#       inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, pos <= x_max + nuc_width)
205
#     }
206
#       # if (!filter_for_coverage) {
207
#       #   inputs$corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
208
#       #   inputs = filter(inputs, chr == chr, corrected_inputs_coords >= x_min, corrected_inputs_coords <= x_max)
209
#       #   inputs$corrected_inputs_coords = NULL
210
#       # }
211
#   }
212
#
213
#   if (!filter_for_coverage) {
214
#     corrected_inputs_coords = inputs_out[,2] + nuc_width/2 * sign_from_strand(inputs_out[,3])
215
#     inputs_out = inputs_out[inputs_out[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
216
#   }
217
#
218
#   names(inputs_out) = n
219
#   return(inputs_out)
220
# ### Returns filtred inputs.
221
# }
222

    
223

    
224

    
225
get_comp_strand = function(
226
### Compute the complementatry strand.
227
strand ##<< The original strand.
228
) {
229
	apply(t(strand),2, function(n){
230
	  if (n=="a") {return("t")}
231
		if (n=="t") {return("a")}
232
		if (n=="c") {return("g")}
233
		if (n=="g") {return("c")}
234
	})
235
### Returns the complementatry strand.
236
}
237

    
238

    
239
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
240
### This function aggregates nucleosomes from replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center (dyad). A chain of nucleosomes is builts across all replicates. Adjacent nucleosomes of the chain are compared two by two. Comparison is based on a log likelihood ratio (LLR1). depending on the LLR1 value nucleosomes are merged (low LLR) or separated (high LLR). Finally the function returns a list of clusters and all computed llr_scores. Each cluster ows an attribute wp for "well positioned". This attribute is set to TRUE if the cluster is composed of exactly one nucleosome of each sample.
241
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
242
llr_thres=20, ##<< Log likelihood ratio threshold to decide between merging and separating
243
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
244
){
245
	end_of_tracks = function(tracks) {
246
		if (length(tracks) == 0) {
247
			return(TRUE)
248
		}
249
	  for (lower_bound in tracks) {
250
			if(!is.na(lower_bound)) {
251
	      if (lower_bound < coord_max) {
252
	        return(FALSE)
253
	      }
254
	  	}
255
	  }
256
	  return(TRUE)
257
	}
258
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center) {
259
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
260
			new_cluster$wp = TRUE
261
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
262
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
263
		  	clusters[[length(clusters) + 1]] = new_cluster
264
				# print(new_cluster)
265
		  }
266
		} else {
267
			new_cluster$wp = FALSE
268
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
269
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
270
			  clusters[[length(clusters) + 1]] = new_cluster
271
			}
272
		}
273
		return(clusters)
274
	}
275
	strain = samples[[1]]$strain
276
	llr_scores = c()
277
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
278
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
279
  # compute clusters
280
  clusters = list()
281
  cluster_contents = list()
282
  # Init reader
283
  indexes = c()
284
  track_readers = c()
285
  current_nuc = NULL
286
	llr_score = llr_thres + 1
287
  # Read nucs from TF outputs
288
  tf_outs = list()
289
	i = 1
290
  for (sample in samples) {
291
		tf_outs[[i]] = sample$outputs
292
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]
293
    indexes[i] = 1
294
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
295
      track_readers[i] = coord_max
296
	  } else {
297
      track_readers[i] = tf_outs[[i]][indexes[i],]$center
298
		}
299
		i = i+1
300
  }
301
  nb_tracks = length(tf_outs)
302
	# print(track_readers)
303
  new_cluster = NULL
304
  nb_nucs_in_cluster = 0
305
  nuc_from_track = c()
306
  for (i in 1:nb_tracks){
307
    nuc_from_track[i] = FALSE
308
  }
309
  # Start clustering
310
  while (!end_of_tracks(track_readers)) {
311
    new_center = min(track_readers)
312
		current_track = which(track_readers == new_center)[1]
313
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
314
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
315
		new_nuc$inputs = samples[[current_track]]$inputs
316
		new_nuc$chr = samples[[current_track]]$roi$chr
317
		new_nuc$track = current_track
318

    
319
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width)
320
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
321
		new_nuc$original_reads = flatted_reads[[3]]
322

    
323
    new_upper_bound = new_nuc$upper_bound
324

    
325
    if (!is.null(current_nuc)) {
326
			llr_score = llr_score_nvecs(list(current_nuc$original_reads,new_nuc$original_reads))
327
			llr_scores = c(llr_scores,llr_score)
328
		}
329
		# print(paste(llr_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
330
		if (is.na(llr_score)) {
331
			llr_score = llr_thres + 1
332
		}
333
		# Store llr_score
334
		new_nuc$llr_score = llr_score
335
	  if (llr_score < llr_thres) {
336
      # aggregate to current cluster
337
      #   update bound
338
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
339
        new_cluster$upper_bound = new_nuc$upper_bound
340
      }
341
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
342
        new_cluster$lower_bound = new_nuc$lower_bound
343
      }
344
      #   add nucleosome to current cluster
345
      nuc_from_track[current_track] = TRUE
346
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
347
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
348
    } else {
349
			if (!is.null(new_cluster)) {
350
        # store old cluster
351
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
352
			}
353
      # Reinit current cluster composition stuff
354
      nb_nucs_in_cluster = 0
355
      nuc_from_track = c()
356
      for (i in 1:nb_tracks){
357
        nuc_from_track[i] = FALSE
358
      }
359
      # create new cluster
360
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
361
      # update upper bound
362
      current_upper_bound = new_upper_bound
363
      # add nucleosome to current cluster
364
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
365
      nuc_from_track[current_track] = TRUE
366
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
367

    
368
		}
369

    
370
		current_nuc = new_nuc
371

    
372
    # update indexes
373
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {
374
      indexes[current_track] = indexes[current_track] + 1
375
      # update track
376
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
377
    } else {
378
      # update track
379
      track_readers[current_track] = coord_max
380
    }
381
  }
382
  # store last cluster
383
  if (!is.null(new_cluster)) {
384
    # store old cluster
385
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
386
  }
387
	return(list(clusters, llr_scores))
388
### Returns a list of clusterized nucleosomes, and all computed llr scores.
389
}, ex=function(){
390
	# Dealing with a region of interest
391
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
392
	samples = list()
393
	for (i in 1:3) {
394
		# Create TF output
395
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
396
		outputs = dfadd(NULL,tf_nuc)
397
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
398
		# Generate corresponding reads
399
		nb_reads = round(runif(1,170,230))
400
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
401
		u_reads = sort(unique(reads))
402
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
403
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
404
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
405
		u_reads = u_reads + shifts
406
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
407
		                         "V2" = u_reads,
408
														 "V3" = strands,
409
														 "V4" = counts), stringsAsFactors=FALSE)
410
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
411
	}
412
	print(aggregate_intra_strain_nucs(samples))
413
})
414

    
415
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
416
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
417
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
418
cur_index, ##<< the index of the roi involved
419
nb_tracks=3 ##<< the number of replicates
420
) {
421
	if  (length(partial_strain_maps) == 0 ){
422
		print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." ))
423
    tmp_strain_maps = list()
424
	} else {
425
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
426
			tmp_nuc = partial_strain_maps[[i]]
427
			tmp_nuc_as_list = list()
428
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
429
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
430
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
431
			tmp_nuc_as_list[["cur_index"]] = cur_index
432
			tmp_nuc_as_list[["index_nuc"]] = i
433
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
434
			all_original_reads = c()
435
			for (j in 1:length(tmp_nuc$nucs)) {
436
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
437
			}
438
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
439
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
440
			if (tmp_nuc$wp) {
441
        for (i in 1:(nb_tracks-1)) {
442
  				tmp_nuc_as_list[[paste("llr", i, sep="_")]] = signif(tmp_nuc$nucs[[i + 1]]$llr_score,5)          
443
        }
444
			} else {
445
        for (i in 1:(nb_tracks-1)) {
446
  				tmp_nuc_as_list[[paste("llr", i, sep="_")]] = NA
447
        }
448
			}
449
      return(tmp_nuc_as_list)
450
    })
451
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
452
	}
453
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
454
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
455
}
456

    
457
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
458
### This function aligns nucleosomes between two strains for a given genome region.
459
replicates, ##<< Set of replicates, ideally 3 per strain.
460
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's NULL this list will be computed.
461
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's NULL this list will be computed.
462
corr_thres=0.5, ##<< Correlation threshold.
463
llr_thres=100, ##<< Log likelihood ratio threshold to decide between merging and separating
464
config=NULL, ##<< GLOBAL config variable
465
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
466
) {
467
	if (length(replicates) < 2) {
468
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
469
	} else if (length(replicates) > 2) {
470
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
471
	}
472
	common_nuc = NULL
473
	llr_scores = c()
474
	chr = replicates[[1]][[1]]$roi$chr
475
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
476
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
477
	strain_ref1 = replicates[[1]][[1]]$strain
478
	strain_ref2 = replicates[[2]][[1]]$strain
479
	big_cur = replicates[[1]][[1]]$roi
480
  orig_big_cur = replicates[[1]][[1]]$orig_roi
481
	if(big_cur$end - big_cur$begin < 0) {
482
		tmp_begin = big_cur$begin
483
		big_cur$begin =  big_cur$end
484
		big_cur$end =  tmp_begin
485
	}
486
	# GO!
487
	if (is.null(wp_nucs_strain_ref1)) {
488
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
489
	}
490
	if (is.null(wp_nucs_strain_ref2)) {
491
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
492
  }
493
	lws = c()
494
	ups = c()
495
	for (na in wp_nucs_strain_ref2) {
496
		lws = c(lws, na$lower_bound)
497
		ups = c(ups, na$upper_bound)
498
	}
499

    
500
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))
501
	roi_strain_ref1 = NULL
502
	roi_strain_ref2 = NULL
503
	if (length(wp_nucs_strain_ref1) > 0) {
504
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
505
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
506
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
507
			# Filtering on Well Positionned
508
			if (nuc_strain_ref1$wp) {
509
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
510
				roi_strain_ref2 = translate_cur(roi_strain_ref1, strain_ref2, big_cur=orig_big_cur, config=config)
511
        if (!is.null(roi_strain_ref2)){
512
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
513
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
514
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
515
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
516
					for (index_nuc_strain_ref2 in matching_nas) {
517
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
518
						# Filtering on Well Positionned
519
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)
520
						if (!is.null(translate_cur(nuc_strain_ref2_to_roi, strain_ref1, big_cur=orig_big_cur, config=config)) &
521
                nuc_strain_ref2$wp) {
522
							# Filtering on correlation Score and collecting reads
523
							SKIP = FALSE
524
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
525
							reads_strain_ref1 = c()
526
							for (nuc in nuc_strain_ref1$nucs){
527
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)
528
								if (nuc$corr < corr_thres) {
529
									SKIP = TRUE
530
								}
531
							}
532
							reads_strain_ref2 = c()
533
							for (nuc in nuc_strain_ref2$nucs){
534
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)
535
								if (nuc$corr < corr_thres) {
536
									SKIP = TRUE
537
								}
538
							}
539
							# Filtering on correlation Score
540
							if (!SKIP) {
541
								# tranlation of reads into strain 2 coords
542
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
543
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
544
								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
545
								llr_scores = c(llr_scores, llr_score)
546
								# Filtering on LLR Score
547
                if (llr_score < llr_thres) {
548
									tmp_nuc = list()
549
									# strain_ref1
550
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
551
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
552
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
553
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
554
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
555
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
556
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
557
									# strain_ref2
558
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
559
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
560
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
561
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
562
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
563
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
564
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
565
									# common
566
									tmp_nuc[["llr_score"]] = signif(llr_score,5)
567
                  tmp_nuc[["common_wp_pval"]] = signif(1-pchisq(2*tmp_nuc[["llr_score"]], df=2),5)
568
									tmp_nuc[["diff"]] = abs(abs((roi_strain_ref2$begin - roi_strain_ref2$end)) / 2 - abs((nuc_strain_ref2$lower_bound - nuc_strain_ref2$upper_bound)) / 2)
569
									common_nuc = dfadd(common_nuc, tmp_nuc)
570
                }
571
							}
572
						}
573
					}
574
				} else {
575
		      print("WARNING! No roi for strain ref 2.")
576
			  }
577
		  }
578
		}
579

    
580
    if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
581
      index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
582
      to_remove_list = c()
583
      for (i in 1:length(index_redundant)) {
584
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
585
          to_remove = index_redundant[i]
586
        }   else {
587
          to_remove = index_redundant[i] + 1
588
        }
589
        to_remove_list = c(to_remove_list, to_remove)
590
      }
591
      common_nuc = common_nuc[-to_remove_list,]
592
    }
593

    
594
    if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
595
      index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
596
      to_remove_list = c()
597
      for (i in 1:length(index_redundant)) {
598
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
599
          to_remove = index_redundant[i]
600
        }   else {
601
          to_remove = index_redundant[i] + 1
602
        }
603
        to_remove_list = c(to_remove_list, to_remove)
604
      }
605
      common_nuc = common_nuc[-to_remove_list,]
606
    }
607

    
608
		return(list(common_nuc, llr_scores))
609
	} else {
610
		print("WARNING, no nucs for strain_ref1.")
611
		return(NULL)
612
	}
613
### Returns a list of clusterized nucleosomes, and all computed llr scores.
614
}, ex=function(){
615

    
616
    # Define new translate_cur function...
617
    translate_cur = function(roi, strain2, big_cur=NULL, config=NULL) {
618
      return(roi)
619
    }
620
    # Binding it by uncomment follwing lines.
621
    unlockBinding("translate_cur", as.environment("package:nucleominer"))
622
    unlockBinding("translate_cur", getNamespace("nucleominer"))
623
    assign("translate_cur", translate_cur, "package:nucleominer")
624
    assign("translate_cur", translate_cur, getNamespace("nucleominer"))
625
    lockBinding("translate_cur", getNamespace("nucleominer"))
626
    lockBinding("translate_cur", as.environment("package:nucleominer"))
627

    
628
	# Dealing with a region of interest
629
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
630
	roi2 = translate_cur(roi, roi$strain_ref1)
631
	replicates = list()
632
	for (j in 1:2) {
633
		samples = list()
634
		for (i in 1:3) {
635
			# Create TF output
636
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
637
			outputs = dfadd(NULL,tf_nuc)
638
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
639
			# Generate corresponding reads
640
			nb_reads = round(runif(1,170,230))
641
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
642
			u_reads = sort(unique(reads))
643
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
644
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
645
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
646
			u_reads = u_reads + shifts
647
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
648
			                         "V2" = u_reads,
649
															 "V3" = strands,
650
															 "V4" = counts), stringsAsFactors=FALSE)
651
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
652
		}
653
		replicates[[length(replicates) + 1]] = samples
654
	}
655
	print(align_inter_strain_nucs(replicates))
656
})
657

    
658

    
659

    
660

    
661

    
662

    
663

    
664

    
665

    
666

    
667

    
668

    
669

    
670

    
671

    
672

    
673

    
674

    
675

    
676

    
677

    
678

    
679

    
680

    
681

    
682

    
683
fetch_mnase_replicates = function(# Prefetch data
684
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
685
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
686
roi, ##<< Region of interest.
687
all_samples, ##<< Global list of samples.
688
config=NULL, ##<< GLOBAL config variable
689
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
690
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
691
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
692
) {
693
	samples=list()
694
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
695
	for (i in samples_ids) {
696
		sample = as.list(all_samples[all_samples$id==i,])
697
    sample$orig_roi = roi
698
    sample$roi = translate_cur(roi, sample$strain, config = config)
699
		if (get_genome) {
700
			# Get Genome
701
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
702
		}
703
		# Get inputs
704
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
705
		sample$total_reads = sum(sample$inputs[,4])
706
		if (!only_fetch) {
707
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
708
	  }
709
	  # Get TF outputs for Mnase_Seq samples
710
		if (sample$marker == "Mnase_Seq" & get_ouputs) {
711
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
712
			if (!only_fetch) {
713
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
714
  		}
715
		}
716
		samples[[length(samples) + 1]] = sample
717
	}
718
  return(samples)
719
}
720

    
721
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
722
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
723
region1, ##<< Original regions.
724
region2 ##<< Regions to substract.
725
) {
726
  rec_substract_region = function(region1, region2) {
727
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
728
    cur_fuzzy = region1[i,]
729
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
730
    if (length(inter_wp[,1]) > 0) {
731
      ret = c()
732
      for (j in 1:length(inter_wp[,1])) {
733
        cur_wp = inter_wp[j,]
734
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
735
          # remove cur_fuzzy
736
          ret = c()
737
          break
738
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
739
          # crop fuzzy
740
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1
741
          ret = cur_fuzzy
742
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
743
          # crop fuzzy
744
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1
745
          ret = cur_fuzzy
746
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
747
          # split fuzzy
748
          tmp_ret_fuzzy_1 = cur_fuzzy
749
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1
750
          tmp_ret_fuzzy_2 = cur_fuzzy
751
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1
752
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
753
          # print(ret)
754
          # ret = cur_fuzzy
755
          break
756
        } else {
757
          stop("WARNING NO ADAPTED CASE!")
758
        }
759
      }
760
      return(ret)
761
    } else {
762
      return(cur_fuzzy)
763
    }
764
  })
765
  }
766
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
767
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
768
  tmp_ulist = unlist(non_inter_fuzzy)
769
  tmp_names = names(tmp_ulist)[1:4]
770
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE)
771
  names(non_inter_fuzzy) = tmp_names
772
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)
773
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)
774
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
775
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
776
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]
777
  return(non_inter_fuzzy)
778
}
779

    
780
union_regions = function(# Aggregate regions that intersect themselves.
781
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
782
regions ##<< The Regions to be aggregated
783
) {
784
  if (is.null(regions)) {return(regions)}
785
  if (nrow(regions) == 0) {return(regions)}
786
  old_length = length(regions[,1])
787
  new_length = 0
788

    
789
  while (old_length != new_length) {
790
    regions = regions[order(regions$lower_bound), ]
791
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE)
792
    vec_end_1 = which(regions$stop)
793
    if (length(vec_end_1) == 0) {
794
      vec_end_1 = c(length(regions$stop))
795
    }
796
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
797
      vec_end_1 = c(vec_end_1, length(regions$stop))
798
    }
799
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
800
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
801
      chr = regions$chr[vec_beg_1[i]]
802
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
803
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
804
      cur_index = regions$cur_index[vec_beg_1[i]]
805
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, cur_index=cur_index))
806
      })
807
    union = collapse_regions(union)
808
    old_length = length(regions[,1])
809
    new_length = length(union[,1])
810
    regions = union
811
  }
812
  return(union)
813
}
814

    
815
# remove_aligned_wp = function(# Remove wp nucs from common nucs list.
816
# ### It is based on common wp nucs index on nucs and region.
817
# strain_maps, ##<< Nuc maps.
818
# cur_index, ##<< The region of interest index.
819
# tmp_common_nucs, ##<< the list of wp nucs.
820
# strain##<< The strain to consider.
821
# ){
822
#   fuzzy_nucs = strain_maps[[strain]]
823
#   fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$cur_index == cur_index,]
824
#   fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
825
#   if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
826
#   if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
827
#   anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
828
#   fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
829
#   return(fuzzy_nucs)
830
# }
831

    
832
translate_regions = function(# Translate a list of regions from a strain ref to another.
833
### This function is an elaborated call to translate_cur.
834
regions, ##<< Regions to be translated.
835
combi, ##<< Combination of strains.
836
cur_index, ##<< The region of interest index.
837
config=NULL, ##<< GLOBAL config variable
838
roi ##<< The region of interest.
839
) {
840
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
841
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
842
    big_cur =  roi
843
    trs_tmp_regions_ref2 = translate_cur(tmp_regions_ref2, combi[1], config = config, big_cur = big_cur)
844
    if (is.null(trs_tmp_regions_ref2)) {
845
      return(NULL)
846
    } else {
847
      return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), cur_index=cur_index)))
848
    }
849
  })
850

    
851
  return(collapse_regions(tr_regions))
852
}
853

    
854
collapse_regions = function(# reformat an "apply  manipulated" list of regions
855
### Utils to reformat an "apply  manipulated" list of regions
856
regions ##< a list of regions
857
) {
858
  if (is.null(regions)) {
859
    return(NULL)
860
  } else {
861
    regions = do.call(rbind, regions)
862
    regions$chr = as.character(regions$chr)
863
    regions$chr = as.numeric(regions$chr)
864
    regions$lower_bound = as.numeric(regions$lower_bound)
865
    regions$upper_bound = as.numeric(regions$upper_bound)
866
    regions = regions[order(regions$lower_bound),]
867
    return(regions)
868
  }
869
}
870

    
871

    
872
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
873
### The fucntion is no more necessary since we remove "big_cur" bug in translate_cur function.
874
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
875
roi, ##<< The region of interest.
876
strain, ##<< The strain to consider.
877
config=NULL ##<< GLOBAL config variable
878
) {
879
  tr_roi = translate_cur(roi, strain, config = config)
880
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
881
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
882
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
883
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
884
  }
885
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
886
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
887
  }
888
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
889
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
890
  }
891
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
892
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
893
  }
894
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
895
  return(tmp_fuzzy_nucs)
896
}
897

    
898
get_all_reads = function(# Retrieve Reads
899
### Retrieve reads for a given marker, combi, form.
900
marker, ##<< The marker to considere.
901
combi, ##<< The starin combination to considere.
902
form="wp", ##<< The nuc form to considere.
903
config=NULL ##<< GLOBAL config variable
904
) {
905
	all_reads = NULL
906
  for (manip in c("Mnase_Seq", marker)) {
907
    if (form == "unr") {
908
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
909
  		tmp_res = read.table(file=out_filename, header=TRUE)
910
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
911
      tmp_res$form = form
912
    } else if (form == "wp") {
913
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
914
  		tmp_res = read.table(file=out_filename, header=TRUE)
915
      tmp_res$form = form
916
    } else if (form == "wpunr") {
917
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
918
  		tmp_res = read.table(file=out_filename, header=TRUE)
919
      tmp_res$form = "wp"
920
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
921
  		tmp_res2 = read.table(file=out_filename, header=TRUE)
922
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
923
      tmp_res2$form = "unr"
924
      tmp_res = rbind(tmp_res, tmp_res2)
925
    }
926
		if (is.null(all_reads)) {
927
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
928
		}
929
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
930
		all_reads = cbind(all_reads, tmp_res)
931
  }
932
  return(all_reads)
933
}
934

    
935
get_design = function(# Build the design for DESeq
936
### This function build the design according sample properties.
937
marker, ##<< The marker to considere.
938
combi, ##<< The starin combination to considere.
939
all_samples ##<< Global list of samples.
940
) {
941
  off1 = 0
942
  off2 = 0
943
	manips = c("Mnase_Seq", marker)
944
	design_rownames = c()
945
	design_manip = c()
946
	design_strain = c()
947
  off2index = function(off) {
948
  	switch(toString(off),
949
  		"1"=c(0,1,1),
950
  	  "2"=c(1,0,1),
951
    	"3"=c(1,1,0),
952
  		c(1,1,1)
953
  		)
954
  }
955
	for (manip in manips) {
956
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
957
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
958
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
959
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
960
		} else {
961
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
962
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
963
			}
964
		}
965
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))
966
		for (strain in combi) {
967
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
968
			design_strain = c(design_strain, rep(strain, length(cols)))
969
			design_rownames = c(design_rownames, cols)
970
		}
971
	}
972
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
973
	return(snep_design)
974
}
975

    
976
plot_dist_samples = function(# Plot the distribution of reads.
977
### This fuxntion use the DESeq nomalization feature to compare qualitatively the distribution.
978
strain, ##<< The strain to considere.
979
marker, ##<< The marker to considere.
980
res, ##<< Data
981
all_samples, ##<< Global list of samples.
982
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
983
) {
984
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
985
	snepCountTable = res[,cols]
986
	snepDesign = data.frame(
987
		row.names = cols,
988
		manip = rep(marker, length(cols)),
989
		strain = rep(strain, length(cols))
990
		)
991
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
992
	sizeFactors = estimateSizeFactors(cdsFull)
993
	# print(sizeFactors[[1]])
994
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
995
	if (NEWPLOT) {
996
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))
997
		NEWPLOT = FALSE
998
	}
999
	for (it in 1:length(sample_ids)) {
1000
		sample_id = sample_ids[it]
1001
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1002
	}
1003
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1004
}
1005

    
1006
analyse_design = function(# Launch DESeq methods.
1007
### This function is based on DESeq example. It normalizes data, fit data to GLM model with and without interaction term and compares the two models.
1008
snep_design, ##<< The design to consider.
1009
reads ##<< The data to consider.
1010
) {
1011
	snep_count_table = reads[, rownames(snep_design)]
1012
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1013
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1014
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1015

    
1016
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1017
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1018

    
1019
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1020
	return(list(fit1, fit0, snep_design, pvalsGLM))
1021
}
1022

    
1023

    
1024

    
1025

    
1026

    
1027

    
1028

    
1029

    
1030
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1031
### This function uses
1032
marker, ##<< The marker involved.
1033
combi, ##<< The strain combination involved.
1034
form, ##<< the nuc form involved.
1035
all_samples, ##<< Global list of samples.
1036
FDR = 0.0001, ## the specific False Discover Rate
1037
config=NULL ##<< GLOBAL config variable
1038
) {
1039
  # PRETREAT
1040
  snep_design = get_design(marker, combi, all_samples)
1041
  reads = get_all_reads(marker, combi, form, config=config)
1042
  # RUN ANALYSE
1043
  tmp_analyse = analyse_design(snep_design, reads)
1044
  # RESULTS
1045
	fit1 = tmp_analyse[[1]]
1046
	fit0 = tmp_analyse[[2]]
1047
  k = names(fit1)
1048
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1049
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1050
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1051
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1052
	snep_design = tmp_analyse[[3]]
1053
  # print(snep_design)
1054
	thres = FDR(reads$pvalsGLM, FDR)
1055
	reads$snep_index = reads$pvalsGLM < thres
1056
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", FDR*100,"% of FDR.", sep = ""))
1057
  return(reads)
1058
  },  ex=function(){
1059
    marker = "H3K4me1"
1060
    combi = c("BY", "YJM")
1061
    form = "wpunr" # "wp" | "unr" | "wpunr"
1062
    # foo = get_sneps(marker, combi, form)
1063
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1064
})
1065

    
1066

    
1067

    
1068

    
1069

    
1070

    
1071

    
1072

    
1073

    
1074

    
1075

    
1076

    
1077

    
1078

    
1079

    
1080

    
1081

    
1082

    
1083

    
1084

    
1085

    
1086

    
1087

    
1088

    
1089
ROM2ARAB = function(# Roman to Arabic pair list.
1090
### Utility to convert Roman numbers into Arabic numbers
1091
){list(
1092
  "I" = 1,
1093
  "II" = 2,
1094
  "III" = 3,
1095
  "IV" = 4,
1096
  "V" = 5,
1097
  "VI" = 6,
1098
  "VII" = 7,
1099
  "VIII" = 8,
1100
  "IX" = 9,
1101
  "X" = 10,
1102
  "XI" = 11,
1103
  "XII" = 12,
1104
  "XIII" = 13,
1105
  "XIV" = 14,
1106
  "XV" = 15,
1107
  "XVI" = 16,
1108
  "XVII" = 17,
1109
  "XVIII" = 18,
1110
  "XIX" = 19,
1111
  "XX" = 20
1112
)}
1113

    
1114
switch_pairlist = structure(function(# Switch a pairlist
1115
### Take a pairlist key:value and return the switched pairlist value:key.
1116
l ##<< The pairlist to switch.
1117
) {
1118
	ret = list()
1119
	for (name in names(l)) {
1120
		ret[[as.character(l[[name]])]] = name
1121
	}
1122
	ret
1123
### The switched pairlist.
1124
}, ex=function(){
1125
	l = list(key1 = "value1", key2 = "value2")
1126
	print(switch_pairlist(l))
1127
})
1128

    
1129
ARAB2ROM = function(# Arabic to Roman pair list.
1130
### Utility to convert Arabic numbers to Roman numbers
1131
){switch_pairlist(ROM2ARAB())}
1132

    
1133

    
1134

    
1135

    
1136
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1137
### This fonction allows to access to a specific part of the c2c file.
1138
strain1, ##<< the key strain
1139
strain2, ##<< the target strain
1140
chr=NULL, ##<< if defined, the c2c will be filtered according to the chromosome value
1141
lower_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome upper than lower_bound
1142
upper_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome lower than upper_bound
1143
config=NULL##<<  GLOBAL config variable
1144
) {
1145
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1146
	# Launch c2c file
1147
	if (reverse) {
1148
		c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]
1149
	} else {
1150
		c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]
1151
	}
1152
	c2c = get_content(c2c_filename, "table", stringsAsFactors=FALSE)
1153
  # Filtering unagapped
1154
  c2c = c2c[c2c$V6=="-",]
1155
	# Reverse
1156
	if (reverse) {
1157
		tmp_col = c2c$V1
1158
		c2c$V1 = c2c$V7
1159
		c2c$V7 = tmp_col
1160
		tmp_col = c2c$V2
1161
		c2c$V2 = c2c$V9
1162
		c2c$V9 = tmp_col
1163
		tmp_col = c2c$V3
1164
		c2c$V3 = c2c$V10
1165
		c2c$V10 = tmp_col
1166
	}
1167
  if (!is.null(chr)) {
1168
  	if (strain1 == "BY") {
1169
  		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1170
  	} else if (strain1 == "RM") {
1171
  	  chro_1 = paste("supercontig_1.",chr,sep="")
1172
  	} else if (strain1 == "YJM") {
1173
  	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1174
  	}
1175
  	c2c = c2c[c2c$V1 == chro_1,]
1176
    if (!is.null(lower_bound)) {
1177
      if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound}
1178
      if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound}      
1179
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1180
    }
1181
    if (!is.null(upper_bound)) {
1182
      if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound}
1183
      if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound}      
1184
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1185
    }
1186
  }
1187
  return(c2c)
1188
# It retruns the appropriate c2c file part.
1189
}
1190

    
1191
translate_cur = structure(function(# Translate coords of a genome region.
1192
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1193
roi, ##<< Original genome region of interest.
1194
strain2, ##<< The strain in wich you want the genome region of interest.
1195
config=NULL, ##<< GLOBAL config variable
1196
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1197
) {
1198
	strain1 = roi$strain_ref  
1199
  # Do something or nothing?
1200
  if (is.null(config$NEUTRAL_TRANSLATE_CUR)) {
1201
    config$NEUTRAL_TRANSLATE_CUR = FALSE
1202
  }
1203
	if (strain1 == strain2 | config$NEUTRAL_TRANSLATE_CUR) {
1204
    roi$strain_ref = strain2
1205
    roi$length = roi$end - roi$begin + sign(roi$end - roi$begin)
1206
		return(roi)
1207
	}
1208
  
1209
	# Extract c2c file
1210
	if (!is.null(big_cur)) {
1211
  	# Dealing with big_cur
1212
		if (roi$strain_ref != big_cur$strain_ref) {
1213
      big_cur = translate_cur(big_cur, roi$strain_ref, config=config)
1214
    }
1215
    if (big_cur$end < big_cur$begin) {
1216
      tmp_var = big_cur$begin
1217
      big_cur$begin = big_cur$end
1218
      big_cur$end = tmp_var
1219
      big_cur$length = big_cur$end - big_cur$begin + 1
1220
    }
1221
    if (big_cur$chr!=roi$chr | roi$end > big_cur$end | roi$end < big_cur$begin | roi$begin > big_cur$end | roi$begin < big_cur$begin) {
1222
      print("WARNING! Trying to translate a roi not included in a big_cur.")
1223
      return(NULL)
1224
    }
1225
  	c2c = c2c_extraction(strain1, strain2, big_cur$chr, big_cur$begin, big_cur$end, config=config)
1226
  } else {
1227
    # No big_cur
1228
  	c2c = c2c_extraction(strain1, strain2, roi$chr, config=config)    
1229
  }
1230
  
1231
  #	Convert initial roi$chr into c2c format
1232
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1233
	begin_1 = roi$begin
1234
  end_1 = roi$end
1235
  if (reverse) {
1236
  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1237
    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1238
	} else {
1239
		tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1240
	  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1241
	}
1242

    
1243
	# Never happend conditions ...
1244
	{
1245
		if (length(tmptransfostart$V8) == 0) {
1246
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1247
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1248
  			tmp_c2c = c2c[c2c$V2>=begin_1,]
1249
  			begin_1 = min(tmp_c2c$V2)
1250
      } else {
1251
  			tmp_c2c = c2c[c2c$V3>=begin_1,]
1252
  			begin_1 = min(tmp_c2c$V3)
1253
      }
1254
			if (reverse) {
1255
		  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1256
			} else {
1257
				tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1258
			}
1259
			if (length(tmptransfostart$V8) == 0) {
1260
				if (!is.null(big_cur)) {
1261
					return(NULL)
1262
					tmptransfostart = c2c[c2c$V3>=big_cur$begin & c2c$V2<=big_cur$begin,]
1263
				} else {
1264
					print(tmptransfostart)
1265
					print(tmptransfostop)
1266
					stop("Never happend condition 1.")
1267
				}
1268
			}
1269
		}
1270
		if (length(tmptransfostop$V8) == 0) {
1271
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1272
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1273
  			tmp_c2c = c2c[c2c$V3<=end_1,]
1274
  			end_1 = max(tmp_c2c$V3)
1275
      } else {
1276
  			tmp_c2c = c2c[c2c$V2<=end_1,]
1277
  			end_1 = max(tmp_c2c$V2)
1278
      }
1279
			if (reverse) {
1280
		    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1281
			} else {
1282
			  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1283
			}
1284
			if (length(tmptransfostop$V8) == 0) {
1285
				if (!is.null(big_cur)) {
1286
					return(NULL)
1287
				  tmptransfostop = c2c[c2c$V3>=big_cur$end & c2c$V2<=big_cur$end,]
1288
				} else {
1289
					print(tmptransfostart)
1290
					print(tmptransfostop)
1291
					stop("Never happend condition 2.")
1292
				}
1293
			}
1294
		}
1295
		if (length(tmptransfostart$V8) != 1) {
1296
			# print("many start")
1297
			tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1298
			if (length(tmptransfostart$V8) != 1) {
1299
				print(tmptransfostart)
1300
				print(tmptransfostop)
1301
  			stop("Never happend condition 3.")
1302
			}
1303
		}
1304
		if (length(tmptransfostop$V8) != 1) {
1305
			# print("many stop")
1306
		  tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1307
			if (length(tmptransfostop$V8) != 1) {
1308
				print(tmptransfostart)
1309
				print(tmptransfostop)
1310
  			stop("Never happend condition 4.")
1311
			}
1312
		}
1313
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1314
			print(tmptransfostart)
1315
			print(tmptransfostop)
1316
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1317
		}
1318
	}
1319
  
1320
  # Deal with strand
1321
  if (tmptransfostart$V8 == 1) {
1322
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1323
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1324
  } else {
1325
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1326
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1327
  }
1328
	# Build returned roi
1329
	roi$strain_ref = strain2
1330
	if (roi$strain_ref == "BY") {
1331
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1332
	} else {
1333
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1334
	}
1335
  roi$begin = begin_2
1336
  roi$end = end_2
1337
	if (sign(roi$end - roi$begin) == 0) {
1338
		roi$length = 1
1339
	} else {
1340
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1341
	}
1342
  return(roi)
1343
}, ex=function(){
1344
	# Define new translate_cur function...
1345
	translate_cur = function(roi, strain2, config) {
1346
		strain1 = roi$strain_ref
1347
		if (strain1 == strain2) {
1348
			return(roi)
1349
		} else {
1350
		  stop("Here is my new translate_cur function...")
1351
		}
1352
	}
1353
	# Binding it by uncomment follwing lines.
1354
	# unlockBinding("translate_cur", as.environment("package:nm"))
1355
	# unlockBinding("translate_cur", getNamespace("nm"))
1356
	# assign("translate_cur", translate_cur, "package:nm")
1357
	# assign("translate_cur", translate_cur, getNamespace("nm"))
1358
	# lockBinding("translate_cur", getNamespace("nm"))
1359
	# lockBinding("translate_cur", as.environment("package:nm"))
1360
})
1361

    
1362

    
1363
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1364
### CURs are regions that can be aligned between the genomes
1365
diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
1366
min_cur_width = 4000, ##<< The minimum width of a CUR
1367
config = NULL ##<< GLOBAL config variable
1368
) {
1369

    
1370
  check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
1371
    c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1372
    check_homogeneity(c2c)
1373
  	if (length(c2c[,1]) == 0 ) {
1374
      stop("WARNING! checking overlapping for a region corresponding to an empty c2c.")
1375
    } else {
1376
  		lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)})
1377
  		upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)})
1378
  		tmp_index = order(lower_bounds)
1379
      lower_bounds = lower_bounds[tmp_index]
1380
      upper_bounds = upper_bounds[tmp_index]
1381
      tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)]
1382
      ov_index = which(tmp_diff < 0)
1383
      if(length(ov_index < 0) !=0 ) {
1384
        ov_index = ov_index[1]
1385
        print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep=""))
1386
        c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),]
1387
        print(c2c_corrupted)
1388
        return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1))
1389
      }
1390
      return(NULL)
1391
    }
1392
  }
1393

    
1394
  check_homogeneity = function(sub_c2c) {
1395
    tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3)
1396
    tmp_signs = tmp_signs[tmp_signs != 0]
1397
  	if (sum(tmp_signs[1]  != tmp_signs)) {
1398
  		print(paste("*************** ERROR, non homogenous region (sign)! ********************"))
1399
      print(tmp_signs)
1400
  	}
1401
    tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10)
1402
    tmp_signs2 = tmp_signs2[tmp_signs2 != 0]
1403
  	if (sum(tmp_signs2[1]  != tmp_signs2)) {
1404
  		print(paste("*************** ERROR, non homogenous region (sign2)! ********************"))
1405
      print(tmp_signs2)
1406
  	}
1407
  	if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1408
  		print("*************** ERROR, non homogenous region chrs or V8! ********************")
1409
  	}
1410
  }
1411

    
1412
  test_and_squeeze_rois = function(foo, config=NULL) {
1413
    is_it_ok = function(list1, list2) {
1414
      bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length))
1415
      ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0
1416
      if (!ok) {
1417
        print(bar[bar[,3] != 0 | bar[,6] != 0, ])
1418
      }
1419
      return (ok)
1420
    }
1421
    squeeze_rois = function(list1, list2) {
1422
      rois = apply(t(1:nrow(list1)), 2, function(i){
1423
        roi = list1[i,]
1424
        roi2 = list2[i,]
1425
        roi$begin = max(roi$begin, roi2$begin)
1426
        roi$end = min(roi$end, roi2$end)
1427
        roi$length =  roi$end - roi$begin + 1
1428
        return(roi)
1429
      })
1430
      return(do.call(rbind, rois))
1431
    }
1432
    # foo_orig = compute_inter_all_strain_curs2(config=config)
1433
    # foo = foo_orig
1434
    STOP = FALSE
1435
    nb_round = 0
1436
    while(!STOP) {
1437
      nb_round = nb_round + 1
1438
      print(paste("2-2 round #", nb_round, sep=""))
1439
      fooby = translate_curs(foo, "BY", config=config)
1440
      fooyjm = translate_curs(foo, "YJM", config=config)
1441
      fooyjmby = translate_curs(fooyjm, "BY", config=config)
1442
      if (!is_it_ok(fooby, fooyjmby)) {
1443
        print("case 1")
1444
        foo = squeeze_rois(fooby, fooyjmby)
1445
    		next
1446
      }
1447
      foorm = translate_curs(foo, "RM", config=config)
1448
      foormby = translate_curs(foorm, "BY", config=config)
1449
      if (!is_it_ok(fooby, foormby)) {
1450
        print("case 2")
1451
        foo = squeeze_rois(fooby, foormby)
1452
    		next
1453
      }
1454
      fooyjmrm = translate_curs(fooyjm, "RM", config=config)
1455
      fooyjmrmyjm = translate_curs(fooyjmrm, "YJM", config=config)
1456
      if (!is_it_ok(fooyjm, fooyjmrmyjm)) {
1457
        print("case 3")
1458
        foo = squeeze_rois(fooyjm, fooyjmrmyjm)
1459
        next
1460
      }
1461
      foormyjm = translate_curs(foorm, "YJM", config=config)
1462
      foormyjmrm = translate_curs(foormyjm, "RM", config=config)
1463
      if (!is_it_ok(foorm, foormyjmrm)) {
1464
        print("case 4")
1465
        foo = squeeze_rois(foorm, foormyjmrm)
1466
        next
1467
      }
1468
      foo = translate_curs(foo, "BY", config=config)
1469
      STOP = TRUE
1470
    }
1471
    STOP = FALSE
1472
    nb_round = 0
1473
    while(!STOP) {
1474
      nb_round = nb_round + 1
1475
      print(paste("3-3 round #", nb_round, sep=""))
1476
      fooby = translate_curs(foo, "BY", config=config)
1477
      foobyrm = translate_curs(fooby, "RM", config=config)
1478
      foobyrmyjm = translate_curs(foobyrm, "YJM", config=config)
1479
      foobyrmyjmby = translate_curs(foobyrmyjm, "BY", config=config)
1480
      if (!is_it_ok(fooby, foobyrmyjmby)) {
1481
        print("case 1")
1482
        foo = squeeze_rois(fooby, foobyrmyjmby)
1483
      }
1484
      fooby = translate_curs(foo, "BY", config=config)
1485
      foobyyjm = translate_curs(fooby, "YJM", config=config)
1486
      foobyyjmrm = translate_curs(foobyyjm, "RM", config=config)
1487
      foobyyjmrmby = translate_curs(foobyyjmrm, "BY", config=config)
1488
      if (!is_it_ok(fooby, foobyyjmrmby)) {
1489
        print("case 2")
1490
        foo = squeeze_rois(fooby, foobyyjmrmby)
1491
        next
1492
      }
1493
      foo = translate_curs(foo, "BY", config=config)
1494
      STOP = TRUE
1495
    }
1496
    print("end")
1497
    return(foo)
1498
  }
1499

    
1500
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) {
1501
    c2c = c2c_extraction(strain1, strain2, config=config)
1502
    # computing diffs
1503
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
1504
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
1505
    # Filtering
1506
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1507
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1508
    rois = apply(t(1:length(indexes_start)), 2, function(i) {
1509
      if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start)))
1510
      returned_rois = NULL
1511
  		start = indexes_start[i]
1512
  		stop = indexes_stop[i]
1513
  		sub_c2c = c2c[start:stop,]
1514
      check_homogeneity(sub_c2c)
1515
  		if (strain1 == "BY") {
1516
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1517
  		} else {
1518
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1519
  		}
1520
  		roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1)
1521
  		roi$length = roi$end - roi$begin + 1
1522
  		if (roi$length >= min_cur_width) {
1523
  			lower_bound = roi$begin
1524
  			upper_bound = roi$end
1525
        check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1526
        while(!is.null(check)) {
1527
          # print(check)
1528
      		roi1 = roi
1529
          roi1$end = check[[1]]
1530
      		roi1$length = roi1$end - roi1$begin + 1
1531
      		if (roi1$length >= min_cur_width) {
1532
            returned_rois = dfadd(returned_rois,roi1)
1533
          }
1534
          roi$begin = check[[2]]
1535
      		roi$length = roi$end - roi$begin + 1
1536
      		if (roi$length >= min_cur_width) {
1537
      			lower_bound = min(roi$begin, roi$end)
1538
      			upper_bound = max(roi$begin, roi$end)
1539
            check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1540
          } else {
1541
            check = NULL
1542
            roi = NULL
1543
          }
1544
        }
1545
        returned_rois = dfadd(returned_rois,roi)
1546
  	  }
1547
    })
1548
    rois = do.call(rbind,rois)
1549
    rois = rois[order(as.numeric(rois$chr), rois$begin), ]
1550
  	return(rois)
1551
  }
1552

    
1553
  translate_curs = function(rois, target_strain, config) {
1554
    tr_rois = apply(t(1:nrow(rois)), 2, function(i){
1555
      roi = rois[i,]
1556
      tr_roi = translate_cur(roi, target_strain, config=config)  
1557
      tmp_begin = min(tr_roi$begin, tr_roi$end)
1558
      tmp_end = max(tr_roi$begin, tr_roi$end)
1559
      tr_roi$begin = tmp_begin
1560
      tr_roi$end = tmp_end
1561
      tr_roi$length =  tr_roi$end - tr_roi$begin + 1
1562
      return(tr_roi)
1563
    })
1564
    tr_rois = do.call(rbind, tr_rois)
1565
    return(tr_rois)
1566
  }
1567

    
1568
  combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"))
1569
  rois = list()
1570
  for (combi in combis) {
1571
    strain1 = combi[1]
1572
    strain2 = combi[2]
1573
    print(paste(strain1, strain2))
1574
    rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1575
    strain1 = combi[2]
1576
    strain2 = combi[1]
1577
    print(paste(strain1, strain2))
1578
    rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1579
    tr_rois_rev = translate_curs(rois_rev, combi[1], config)      
1580
    region1 = rois_fwd
1581
    region2 = tr_rois_rev
1582
    rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev)
1583
  }
1584
  reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
1585
  reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
1586
  tr_reducted_1_rois = translate_curs(reducted_1_rois, "RM", config)  
1587
  reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]])
1588
  reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ]
1589
  reducted_rois = translate_curs(reducted_2_rois, "BY", config)  
1590
  reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
1591
  squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
1592
  return (squeezed_rois)
1593
}
1594

    
1595
intersect_region = function(# Returns the intersection of 2 list on regions.
1596
### This function...
1597
region1, ##<< Original regions.
1598
region2 ##<< Regions to intersect.
1599
) {
1600
  intersection = apply(t(1:nrow(region1)), 2, function(i) {
1601
    roi1 = region1[i, ]
1602
    sub_regions2 = region2[region2$chr == roi1$chr, ]
1603
    sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end |
1604
                                roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end |
1605
                                sub_regions2$begin < roi1$begin  & roi1$end < sub_regions2$end
1606
                                , ]
1607
    if (nrow(sub_regions2) == 0) {
1608
      print("removing a region")
1609
      return(NULL)
1610
    } else if (nrow(sub_regions2) > 1) {
1611
      print("more than one region in intersect_region")          
1612
      return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])})))
1613
    } else {
1614
      roi2 = sub_regions2[1,]
1615
      if (roi1$begin < roi2$begin) {
1616
        print("not the same begin")
1617
        roi1$begin = roi2$begin
1618
        roi1$length =  roi1$end - roi1$begin + 1
1619
      }
1620
      if (roi1$end > roi2$end) {
1621
        print("not the same end")
1622
        roi1$end = roi2$end
1623
        roi1$length =  roi1$end - roi1$begin + 1
1624
      }
1625
      return(roi1)
1626
    }         
1627
  })
1628
  return(do.call(rbind,intersection))
1629
}
1630

    
1631
build_replicates = structure(function(# Stage replicates data
1632
### This function loads in memory the data corresponding to the given experiments.
1633
expe, ##<< a list of vectors corresponding to replicates.
1634
roi, ##<< the region that we are interested in.
1635
only_fetch=FALSE, ##<< filter or not inputs.
1636
get_genome=FALSE,##<< Load or not corresponding genome.
1637
all_samples, ##<< Global list of samples.
1638
config=NULL ##<< GLOBAL config variable.
1639
) {
1640
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1641
  	samples=list()
1642
  	for (i in samples_ids) {
1643
  		sample = as.list(all_samples[all_samples$id==i,])
1644
      sample$orig_roi = roi
1645
      sample$roi = translate_cur(roi, sample$strain, config = config)
1646
  		if (get_genome) {
1647
  			# Get Genome
1648
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1649
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1650
  		}
1651
  		# Get inputs
1652
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1653
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE)
1654
  		sample$total_reads = sum(sample$inputs[,4])
1655
  		if (!only_fetch) {
1656
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
1657
  	  }
1658
  	  # Get TF outputs for Mnase_Seq samples
1659
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {
1660
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1661
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1662
  			if (!only_fetch) {
1663
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1664
    		}
1665
  		}
1666
  		samples[[length(samples) + 1]] = sample
1667
  	}
1668
  	return(samples)
1669
  }
1670
	replicates = list()
1671
	for(samples_ids in expe) {
1672
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1673
		replicates[[length(replicates) + 1]] = samples
1674
	}
1675
	return(replicates)
1676
  }, ex = function() {
1677
    # library(rjson)
1678
    # library(nucleominer)
1679
    #
1680
    # # Read config file
1681
    # json_conf_file = "nucleominer_config.json"
1682
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1683
    # # Read sample file
1684
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
1685
    # # here are the sample ids in a list
1686
    # expes = list(c(1))
1687
    # # here is the region that we wnt to see the coverage
1688
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY")
1689
    # # it displays the corverage
1690
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1691
    # out = watch_samples(replicates, config$READ_LENGTH,
1692
    #       plot_coverage = TRUE,
1693
    #       plot_squared_reads = FALSE,
1694
    #       plot_ref_genome = FALSE,
1695
    #       plot_arrow_raw_reads = FALSE,
1696
    #       plot_arrow_nuc_reads = FALSE,
1697
    #       plot_gaussian_reads = FALSE,
1698
    #       plot_gaussian_unified_reads = FALSE,
1699
    #       plot_ellipse_nucs = FALSE,
1700
    #       plot_wp_nucs = FALSE,
1701
    #       plot_wp_nuc_model = FALSE,
1702
    #       plot_common_nucs = FALSE,
1703
    #       height = 50)
1704
  })
1705

    
1706

    
1707

    
1708

    
1709

    
1710

    
1711

    
1712

    
1713

    
1714

    
1715

    
1716

    
1717

    
1718

    
1719

    
1720

    
1721

    
1722

    
1723

    
1724

    
1725

    
1726

    
1727

    
1728

    
1729

    
1730

    
1731

    
1732

    
1733

    
1734

    
1735

    
1736

    
1737

    
1738

    
1739

    
1740

    
1741

    
1742

    
1743

    
1744

    
1745

    
1746

    
1747

    
1748

    
1749

    
1750

    
1751

    
1752

    
1753

    
1754

    
1755

    
1756

    
1757

    
1758

    
1759

    
1760

    
1761

    
1762

    
1763

    
1764

    
1765

    
1766

    
1767

    
1768

    
1769

    
1770

    
1771

    
1772

    
1773

    
1774

    
1775

    
1776

    
1777

    
1778

    
1779

    
1780

    
1781

    
1782

    
1783

    
1784

    
1785

    
1786

    
1787

    
1788
watch_samples = function(# Watching analysis of samples
1789
### This function allows to view analysis for a particuler region of the genome.
1790
replicates, ##<< replicates under the form...
1791
read_length, ##<< length of the reads
1792
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
1793
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
1794
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
1795
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
1796
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
1797
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
1798
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
1799
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
1800
change_col = TRUE, ##<< Change the color of each nucleosome.
1801
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
1802
plot_fuzzy_nucs = FALSE,  ##<< Plot (or not) cluster of fuzzy
1803
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
1804
plot_common_nucs = FALSE,  ##<< Plot (or not) aligned reads.
1805
plot_common_unrs = FALSE,  ##<< Plot (or not) unaligned nucleosomal refgions (UNRs).
1806
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
1807
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
1808
plot_sample_id = FALSE, ##<<  Plot (or not) the sample id for each sample.
1809
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
1810
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
1811
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1812
main=NULL, ##<< main title of the produced plot
1813
xlab=NULL, ##<< xlab of the produced plot
1814
ylab="#reads (per million reads)", ##<< ylab of the produced plot
1815
config=NULL ##<< GLOBAL config variable
1816
){
1817
  returned_list = list()
1818
  # Computing global display parameters
1819
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
1820
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
1821
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
1822
  } else {
1823
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
1824
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
1825
  }
1826
	base_glo = 0
1827
	nb_rank_glo = 0
1828
  for (samples in replicates) {
1829
  	nb_rank_glo = nb_rank_glo + length(samples)
1830
  }
1831
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
1832
	y_min_glo = min(ylim_glo)
1833
	y_max_glo = max(ylim_glo)
1834
  delta_y_glo = y_max_glo - y_min_glo
1835
  # Plot main frame
1836
  if (is.null(xlab)) {
1837
    xlab = paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr)
1838
  }
1839
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab=ylab, xlab=xlab, main=main )
1840
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
1841
  # Go
1842
	replicates_wp_nucs = list()
1843
  wp_maps = list()
1844
  fuzzy_maps = list()
1845
  for (replicate_rank in 1:length(replicates)) {
1846
		# Computing replicate parameters
1847
		nb_rank = length(samples)
1848
		base = (replicate_rank-1) * height * nb_rank
1849
		ylim = c(base, base + height * nb_rank)
1850
		y_min = min(ylim)
1851
		y_max = max(ylim)
1852
	  delta_y = y_max - y_min
1853
		samples = replicates[[replicate_rank]]
1854
		for (sample_rank in 1:length(samples)) {
1855
			# computing sample parameters
1856
			sample = samples[[sample_rank]]
1857
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
1858
      if (plot_sample_id) {
1859
  			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
1860
      }
1861

    
1862
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
1863
			  x_min = sample$roi[["begin"]]
1864
			  x_max = sample$roi[["end"]]
1865
		  } else {
1866
			  x_min = - sample$roi[["begin"]]
1867
			  x_max = - sample$roi[["end"]]
1868
		  }
1869
			shift = x_min_glo - x_min
1870
	    # Plot Genome seq
1871
			if (plot_ref_genome) {
1872
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
1873
		  }
1874
			# Plot reads
1875
			reads = sample$inputs
1876
			signs = sign_from_strand(reads[,3])
1877
			if (plot_arrow_raw_reads) {
1878
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev,
1879
				col=1, length=0.15/nb_rank)
1880
			}
1881
	    if (plot_squared_reads) {
1882
        # require(plotrix)
1883
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
1884
			}
1885
	    if (plot_coverage) {
1886
        if (length(reads[,1]) != 0) {
1887
          step_h = sign(x_min) * signs * reads[,4]
1888
          step_b = sign(x_min) * reads[,2] + shift
1889
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
1890
          steps_x = min(step_b, step_e):max(step_b, step_e)
1891
          steps_y = rep(0, length(steps_x))
1892
          for (i in 1:length(step_h)) {
1893
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
1894
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
1895
          }
1896
          tmp_index = which(steps_y != 0)
1897
          steps_x = steps_x[tmp_index]
1898
          steps_y = steps_y[tmp_index]
1899
          tmp_current_level = 0
1900
          for (i in 1:length(steps_y)) {
1901
            steps_y[i] = tmp_current_level + steps_y[i]
1902
            tmp_current_level = steps_y[i]
1903
          }
1904
          steps_y = c(0, steps_y)
1905
          steps_y = steps_y * 1000000/sample$total_reads
1906
        } else {
1907
          steps_y = c(0, 0, 0)
1908
          steps_x = c(x_min, x_max)
1909
        }
1910
        # print(steps_x)
1911
        # print(steps_y)
1912
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
1913
        abline(y_lev,0)
1914
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
1915
			}
1916
			# Plot nucs
1917
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
1918
				nucs = sample$outputs
1919
				if (length(nucs$center) > 0) {
1920
					col = 1
1921
		      for (i in 1:length(nucs$center)) {
1922
            if (change_col) {
1923
  						col = col + 1
1924
              } else {
1925
                col = "blue"
1926
              }
1927
		        nuc = nucs[i,]
1928
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
1929
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
1930
						total_involved_reads = sum(involved_reads[,4])
1931
						if (plot_arrow_nuc_reads ) {
1932
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev,
1933
							col=col, length=0.15/nb_rank)
1934
						}
1935
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
1936
  						flatted_reads = flat_reads(involved_reads, nuc$width)
1937
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1938
		  			}
1939
	          if (plot_gaussian_reads ) {
1940
							flatted_reads = flat_reads(involved_reads, nuc$width)
1941
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1942
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
1943
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
1944
	          }
1945
	          if (plot_gaussian_unified_reads ) {
1946
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=1)
1947
	          }
1948
	          if (plot_ellipse_nucs) {
1949
				      # require(plotrix)
1950
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col)
1951
						}
1952
		      }
1953
		    } else {
1954
		      print("WARNING! No nucs to print.")
1955
		    }
1956
			}
1957
	  }
1958

    
1959
	  # Plot wp nucs
1960
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) {
1961
			if (samples[[1]]$marker == "Mnase_Seq") {
1962
				if (is.null(aggregated_intra_strain_nucs)) {
1963
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
1964
				} else {
1965
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
1966
				}
1967
		  } else {
1968
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
1969
		  }
1970
      if (plot_chain) {
1971
        tf_nucs = lapply(wp_nucs, function(nuc) {
1972
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
1973
            tmp_nuc = tmp_nuc[[1]]
1974
            tmp_nuc$inputs = NULL
1975
            tmp_nuc$original_reads = NULL
1976
            tmp_nuc$wp = nuc$wp
1977
            # print(tmp_nuc)
1978
            return(tmp_nuc)
1979
          })
1980
          return(do.call(rbind, bar))
1981
        })
1982
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
1983
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2
1984
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
1985
        tmp_y_prev = tmp_y[-length(tmp_y)]
1986
        tmp_y_next = tmp_y[-1]
1987
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
1988
        tmp_track = unlist(tf_nucs$track)
1989
        tmp_track_prev = tmp_track[-length(tmp_track)]
1990
        tmp_track_next = tmp_track[-1]
1991
        # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
1992
        if (is.null(config$TRACK_LLR_OFFSET)) {
1993
          config$TRACK_LLR_OFFSET = 0
1994
        }
1995
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LLR_OFFSET * 25
1996
        tmp_x_prev = tmp_x[-length(tmp_x)]
1997
        tmp_x_next = tmp_x[-1]
1998
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
1999
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
2000
        tmp_llr_inter =signif(unlist(tf_nucs$llr_score)[-1], 2)
2001
        new_tmp_x = c()
2002
        new_tmp_y = c()
2003
        index_odd = 1:length(tmp_x) * 2 - 1
2004
        index_even = (1:(length(tmp_x) - 1)) * 2
2005
        new_tmp_x[index_odd] = tmp_x
2006
        new_tmp_y[index_odd] = tmp_y
2007
        new_tmp_x[index_even] = tmp_x_inter
2008
        new_tmp_y[index_even] = tmp_y_inter
2009
        lines(new_tmp_x , new_tmp_y, lwd=2)
2010
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
2011
        points(tmp_x, tmp_y, cex=4, lwd=2)
2012
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
2013
        if (is.null(config$LEGEND_LLR_POS)) {
2014
          pos = 2
2015
        } else {
2016
          pos = config$LEGEND_LLR_POS
2017
        }
2018
        col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
2019
        text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) 
2020
        
2021
      }
2022

    
2023
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
2024
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
2025
        strain = samples[[1]]$strain
2026
        nb_tracks = length(replicates[[1]])
2027
        # print(paste("**************"), nb_tracks)
2028
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo", nb_tracks)
2029
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
2030

    
2031
        if (plot_fuzzy_nucs) {
2032
          fuzzy_map = fuzzy_maps[[strain]]
2033
          if (!is.null(fuzzy_map)) {
2034
            if (nrow(fuzzy_map) > 0) {
2035
              rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1)                    
2036
            }
2037
          }
2038
        } 
2039

    
2040
        if (plot_wp_nucs) {
2041
    			for (wp_nuc in wp_nucs) {
2042
    				if (wp_nuc$wp){
2043
    					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2044
    					if (plot_wp_nuc_model) {
2045
      					all_original_reads = c()
2046
      					for(initial_nuc in wp_nuc$nucs) {
2047
      						all_original_reads = c(all_original_reads, initial_nuc$original_reads)
2048
      					}
2049
      					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2050
    					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
2051
    				  }
2052
  				  }
2053
  				}
2054
  			}
2055
      }
2056
		}
2057
	}
2058

    
2059
	if (plot_common_nucs) {
2060
    if (is.null(aligned_inter_strain_nucs)) {
2061
      aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2062
      if (!is.null(aligned_inter_strain_nucs)) {
2063
        aligned_inter_strain_nucs$cur_index = "foo" 
2064
      }
2065
    }
2066

    
2067
    #Plot common wp nucs
2068
    mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2069
    for (replicate_rank in 1:length(replicates)) {
2070
      nb_rank[[replicate_rank]] = length(samples)
2071
      base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2072
      ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2073
      y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2074
      y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2075
      delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2076
      mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2077

    
2078
      samples = replicates[[replicate_rank]]
2079
      for (sample_rank in 1:length(samples)) {
2080
        sample = samples[[sample_rank]]
2081
        y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2082
        if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2083
          x_min[[replicate_rank]] = sample$roi[["begin"]]
2084
          x_max[[replicate_rank]] = sample$roi[["end"]]
2085
        } else {
2086
          x_min[[replicate_rank]] = - sample$roi[["begin"]]
2087
          x_max[[replicate_rank]] = - sample$roi[["end"]]
2088
        }
2089
        shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2090
      }
2091
    }
2092
    print(aligned_inter_strain_nucs)
2093
    if (!is.null(aligned_inter_strain_nucs)) {
2094
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2095
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2096
        tmp_xs = tmp_ys = c()
2097
        for (replicate_rank in 1:length(replicates)) {
2098
          samples = replicates[[replicate_rank]]
2099
          strain = samples[[1]]$strain
2100
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2101
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2102
        }
2103
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]])*8, cex=dev.size()[1]*400/(x_max[[1]]-x_min[[1]]), pch=19)
2104
      }
2105
    }
2106
    
2107
    if (plot_common_unrs) {
2108
      combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain)
2109
      roi = as.list(samples[[1]]$roi)
2110
      cur_index = "foo"
2111
      common_nuc_results = list()
2112
      common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs
2113
      unrs = get_unrs(combi, roi, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) 
2114
      rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], border=4, lwd=10, col=adjustcolor(4, alpha.f = 0.05))        
2115
    }
2116

    
2117
	}
2118
  return(returned_list)
2119
}
2120

    
2121

    
2122

    
2123
get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain.
2124
### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions.
2125
wp_map, ##<< Well positionned nucleosomes map.
2126
roi, ##<< The region of interest.
2127
strain, ##<< The strain we want to extracvt the fuzzy map.
2128
config=NULL ##<< GLOBAL config variable.
2129
) {
2130
  fuzzy_map = wp_map[wp_map$wp==0, ]
2131
  if (nrow(fuzzy_map) > 0) {
2132
    fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,])
2133
    if (!is.null(fuzzy_map)) {
2134
      fuzzy_map = union_regions(fuzzy_map)
2135
      fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config)
2136
    }
2137
  }
2138
  return(fuzzy_map)
2139
}
2140

    
2141

    
2142

    
2143

    
2144

    
2145
get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs).
2146
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline.
2147
combi, ##<< The strain combination to consider.
2148
roi, ##<< The region of interest.
2149
cur_index, ##<< The region of interest index.
2150
wp_maps, ##<< Well positionned nucleosomes maps.
2151
fuzzy_maps, ##<< Fuzzy nucleosomes maps.
2152
common_nuc_results, ##<< Common wp nuc maps
2153
config=NULL ##<< GLOBAL config variable
2154
) {
2155
  # print(cur_index)
2156

    
2157
  tmp_combi_key = paste(combi[1], combi[2], sep="_")
2158
  tmp_common_nucs = common_nuc_results[[tmp_combi_key]]
2159
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$cur_index == cur_index, ]
2160

    
2161
  # print(paste("Dealing with unr from", combi[1]))
2162
  tmp_fuzzy = fuzzy_maps[[combi[1]]]
2163
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2164
  tmp_wp = wp_maps[[combi[1]]]
2165
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2166
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2167
  # Let's go!
2168
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2169
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2170
      return(NULL)
2171
    } else {
2172
      return (index_nuc)
2173
    }
2174
  }))
2175
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2176
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2177
  if (length(tmp_unr) != 0) {
2178
    tmp_unr = union_regions(tmp_unr)
2179
  }
2180
  tmp_unr_nucs_1 = tmp_unr
2181
  if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)}
2182
  agg_unr_1 = tmp_unr_nucs_1
2183

    
2184
  # print(paste("Dealing with unr from ", combi[2]))
2185
  tmp_fuzzy = fuzzy_maps[[combi[2]]]
2186
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2187
  tmp_wp = wp_maps[[combi[2]]]
2188
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2189
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2190
  # Let's go!
2191
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2192
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2193
      return(NULL)
2194
    } else {
2195
      return (index_nuc)
2196
    }
2197
  }))
2198
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2199
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2200
  if (length(tmp_unr) != 0) {
2201
    tmp_unr = union_regions(tmp_unr)
2202
  }
2203
  tmp_unr_nucs_2 = tmp_unr
2204
  if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)}
2205
  agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config)
2206
  tr_agg_unr_2 = translate_regions(agg_unr_2, combi, cur_index, roi=roi, config=config)
2207
  tr_agg_unr_2 = union_regions(tr_agg_unr_2)
2208

    
2209
  # print("Dealing with unr from both...")
2210
  all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2))
2211

    
2212
  # print(paste("Dealing with wp from", combi[1]))
2213
  tmp_wp = wp_maps[[combi[1]]]
2214
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2215
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2216
  # Let's go!
2217
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2218
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2219
      return (index_nuc)
2220
    } else {
2221
      return(NULL)
2222
    }
2223
  }))
2224
  wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2225
  
2226
  # print(paste("Dealing with wp from", combi[2]))
2227
  tmp_wp = wp_maps[[combi[2]]]
2228
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2229
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2230
  # Let's go!
2231
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2232
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2233
      return (index_nuc)
2234
    } else {
2235
      return(NULL)
2236
    }
2237
  }))
2238
  wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2239
  wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config)
2240
  if (nrow(wp_nucs_2) == 0) {
2241
    tr_wp_nucs_2 = wp_nucs_2
2242
  } else {
2243
    tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, cur_index, roi=roi, config=config)      
2244
  }
2245

    
2246
  # print("Dealing with wp from both...")
2247
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
2248

    
2249
  # print("Dealing with unr and wp...")
2250
  non_inter_unr = substract_region(all_unr, all_wp)
2251
  non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config)
2252
  if (is.null(non_inter_unr)) { return(NULL) }
2253
  non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound
2254
  min_unr_width = 75
2255
  non_inter_unr = non_inter_unr[non_inter_unr$len >= min_unr_width,]
2256

    
2257
  non_inter_unr$index_nuc = 1:length(non_inter_unr[,1])
2258
  return (non_inter_unr)
2259
}
2260

    
2261

    
2262