Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ dadb6a4d

Historique | Voir | Annoter | Télécharger (86,88 ko)

1
# get_content = function(# Get content from cached
2
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
3
# obj, ##<< The object that we want its content.
4
# ... ##<< Parameters that will be passed to my_read
5
# ) {
6
#   UseMethod("get_content", obj)
7
#   ### Returns the cached content of the current object.
8
# }
9
#
10
# get_content.default = structure( function(# Get content from cached
11
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
12
# obj, ##<< The object that we want its content.
13
# ... ##<< Parameters that will be passed to my_read
14
# ) {
15
#   if (inherits(try(NM_CACHE,TRUE), "try-error") || is.null(NM_CACHE)) {
16
#     NM_CACHE <<- list()
17
#   }
18
#   if (is.null(NM_CACHE[[obj$filename]])) {
19
#     print(paste("Loading file ",obj$filename, sep=""))
20
#     tmp_content = my_read(obj, ...)
21
#     print("affect it...")
22
#     NM_CACHE[[obj$filename]] <<- tmp_content
23
#     print("done.")
24
#   }
25
#   return(NM_CACHE[[obj$filename]])
26
# ### Returns the cached content of the current object.
27
# }, ex=function(){
28
#   # Create a dataframe
29
#   df = NULL
30
#   df = dfadd(df, list(key1 = "value1", key2 = "value2"))
31
#   df = dfadd(df, list(key1 = "value1'", key2 = "value3'"))
32
#   # Dump it into tmp file
33
#   write.table(df, file="/tmp/tmp.dump.table.tmp")
34
#   # Load it into cache using feature.
35
#   cached_content_object  = list(filename="/tmp/tmp.dump.table.tmp")
36
#   class(cached_content_object) = "table"
37
#   # First time it will be load into cache
38
#   print(get_content(cached_content_object))
39
#   # Second time not
40
#   print(get_content(cached_content_object))
41
#
42
# })
43
#
44
# my_read = function(
45
# ### Abstract my_read function.
46
#   obj, ...) {
47
#   UseMethod("my_read", obj)
48
# }
49
#
50
# my_read.default = function(
51
# ### Default my_read function.
52
#   obj, ...){
53
#   stop(paste("ERROR, my_read is not defined for any Objects (file ", obj$filename," )", sep=""))
54
# }
55
#
56
# my_read.fasta = function(
57
# ### my_read function for fasta files.
58
#   obj, ...){
59
#   # require(seqinr)
60
#   return(read.fasta(obj$filename, ...))
61
# }
62
#
63
# my_read.table = function(
64
# ### my_read function for table files.
65
#   obj, ...){
66
#   if (rev(unlist(strsplit(obj$filename, ".", fixed=TRUE)))[1] == "gz") {
67
#     return(read.table(file=gzfile(obj$filename), ...))
68
#   } else {
69
#     return(read.table(file=obj$filename, ...))
70
#   }
71
# }
72
#
73
# my_read.cvs = function(
74
# ### my_read function for cvs files.
75
#   obj, ...){
76
#   return(read.csv(file=obj$filename, ...))
77
# }
78

    
79

    
80

    
81

    
82

    
83

    
84

    
85

    
86

    
87
FDR = structure(function#  False Discovery Rate
88
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
89
##author<< Gael Yvert,
90
(
91
x, ##<< A vector x of independent p-values.
92
FDR ##<< The specified FDR.
93
) {
94
  x <- sort(na.omit(x))
95
  N = length(x)
96
  i = 1;
97
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
98
  if (i == 1)
99
    return (NA)
100
  else
101
    return( x[i-1] )
102
### Return the the corresponding cutoff.
103
}, ex=function(){
104
  print("example")
105
})
106

    
107

    
108

    
109
llr_score_nvecs = structure(function # Likelihood ratio
110
### Compute the log likelihood ratio of two or more set of value.
111
(
112
  xs ##<< list of vectors.
113
) {
114
  l = length(xs)
115
  if (l < 1) {
116
    return(NA)
117
  }
118
  if (l == 1) {
119
    return(1)
120
  }
121
  sumllX = 0
122
  for (i in 1:l) {
123
    x = xs[[i]]
124
  	if (length(x) <= 1) {
125
  		return(NA)
126
  	}
127
    meanX = mean(x)
128
    sdX = sd(x)
129
    llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
130
    sumllX = sumllX + llX
131
  }
132
  meanXglo = mean(unlist(xs))
133
  sdXglo = sd(unlist(xs))
134
  llXYZ = sum(log(dnorm(unlist(xs),mean=meanXglo,sd=sdXglo)))
135
  ratio = sumllX - llXYZ
136
  return(ratio)
137
### Returns the log likelihood ratio.
138
}, ex=function(){
139
  # LOD score for 2 set of values
140
  mean1=5; sd1=2; card2 = 250
141
  mean2=6; sd2=3; card1 = 200
142
  x1 = rnorm(card1, mean1, sd1)
143
  x2 = rnorm(card2, mean2, sd2)
144
  min = floor(min(c(x1,x2)))
145
  max = ceiling(max(c(x1,x2)))
146
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
147
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
148
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
149
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
150
  llr_score_nvecs(list(x1,x2))
151
 })
152

    
153
dfadd = structure(function# Adding list to a dataframe.
154
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
155
	(df, ##<<  A dataframe
156
		l ##<<  A list
157
	) {
158
  if (is.null(df)) {
159
    df = data.frame(l,stringsAsFactors=FALSE)
160
  } else {
161
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
162
  }
163
  return(df)
164
### Return the dataframe \emph{df}.
165
}, ex=function(){
166
		## Here dataframe is NULL
167
		print(df)
168
		df = NULL
169

    
170
		# Initialize df
171
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
172
		print(df)
173

    
174
		# Adding elements to df
175
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
176
		print(df)
177
})
178

    
179

    
180
sign_from_strand = function(
181
### Get the sign of strand
182
strands) {
183
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
184
### If strand in forward then returns 1 else returns -1
185
}
186

    
187
flat_reads = function(
188
### Extract reads coordinates from TempleteFilter input sequence
189
reads, ##<< TemplateFilter input reads
190
nuc_width ##<< Width used to shift F and R reads.
191
) {
192
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
193
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
194
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
195
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
196
### Returns a list of F reads, R reads and joint/shifted F and R reads.
197
}
198

    
199

    
200

    
201
filter_tf_outputs = function(# Filter TemplateFilter outputs
202
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlapping threshold.
203
tf_outputs, ##<< TemplateFilter outputs.
204
chr, ##<< Chromosome observed, here chr is an integer.
205
x_min, ##<< Coordinate of the first bp observed.
206
x_max, ##<< Coordinate of the last bp observed.
207
nuc_width = 160, ##<< Nucleosome width.
208
ol_bp = 59, ##<< Overlap Threshold.
209
corr_thres = 0.5 ##<< Correlation threshold.
210
) {
211
  if (x_min < 0) {
212
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= -x_max & tf_outputs$center + tf_outputs$width/2 <=  -x_min,]
213
	} else {
214
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= x_min & tf_outputs$center + tf_outputs$width/2 <= x_max,]
215
  }
216
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
217
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
218
  tf_outputs = tf_outputs[tf_outputs$correlation.score >= corr_thres,]
219
  tf_outputs = tf_outputs[order(tf_outputs$correlation.score, decreasing=TRUE),]
220
  i = 1
221
  while (i <= length(tf_outputs[,1])) {
222
    lb = tf_outputs[i,]$lower_bound
223
    ub = tf_outputs[i,]$upper_bound
224
    tf_outputs = tf_outputs[!(tf_outputs$lower_bound <= (ub-ol_bp) & tf_outputs$upper_bound > ub) & !(tf_outputs$upper_bound >= (lb+ol_bp) & tf_outputs$lower_bound < lb),]
225
    i = i+1
226
  }
227
  return(tf_outputs)
228
### Returns filtered TemplateFilter Outputs
229
}
230

    
231

    
232

    
233
filter_tf_inputs = function(# Filter TemplateFilter inputs
234
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
235
inputs, ##<< TF inputs to be filtered.
236
chr, ##<< Chromosome observed, here chr is an integer.
237
x_min, ##<< Coordinate of the first bp observed.
238
x_max, ##<< Coordinate of the last bp observed.
239
nuc_width = 160, ##<< Nucleosome width.
240
only_f = FALSE, ##<< Filter only F reads.
241
only_r = FALSE, ##<< Filter only R reads.
242
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
243
) {
244
	if (only_f) {
245
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
246
	} else if (only_r) {
247
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
248
	} else {
249
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
250
	}
251
  if (!filter_for_coverage) {
252
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
253
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
254
  }
255
	return(inputs)
256
### Returns filtred inputs.
257
}
258

    
259

    
260

    
261
get_comp_strand = function(
262
### Compute the complementatry strand.
263
strand ##<< The original strand.
264
) {
265
	apply(t(strand),2, function(n){
266
	  if (n=="a") {return("t")}
267
		if (n=="t") {return("a")}
268
		if (n=="c") {return("g")}
269
		if (n=="g") {return("c")}
270
	})
271
### Returns the complementatry strand.
272
}
273

    
274

    
275
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
276
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{llr_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
277
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
278
llr_thres=20, ##<< Log likelihood ration threshold.
279
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
280
){
281
	end_of_tracks = function(tracks) {
282
		if (length(tracks) == 0) {
283
			return(TRUE)
284
		}
285
	  for (lower_bound in tracks) {
286
			if(!is.na(lower_bound)) {
287
	      if (lower_bound < coord_max) {
288
	        return(FALSE)
289
	      }
290
	  	}
291
	  }
292
	  return(TRUE)
293
	}
294
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
295
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
296
			new_cluster$wp = TRUE
297
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
298
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
299
		  	clusters[[length(clusters) + 1]] = new_cluster
300
				# print(new_cluster)
301
		  }
302
		} else {
303
			new_cluster$wp = FALSE
304
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
305
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
306
			  clusters[[length(clusters) + 1]] = new_cluster
307
			}
308
		}
309
		return(clusters)
310
	}
311
	strain = samples[[1]]$strain
312
	llr_scores = c()
313
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
314
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
315
  # compute clusters
316
  clusters = list()
317
  cluster_contents = list()
318
  # Init reader
319
  indexes = c()
320
  track_readers = c()
321
  current_nuc = NULL
322
	llr_score = llr_thres + 1
323
  # Read nucs from TF outputs
324
  tf_outs = list()
325
	i = 1
326
  for (sample in samples) {
327
		tf_outs[[i]] = sample$outputs
328
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]
329
    indexes[i] = 1
330
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
331
      track_readers[i] = coord_max
332
	  } else {
333
      track_readers[i] = tf_outs[[i]][indexes[i],]$center
334
		}
335
		i = i+1
336
  }
337
	# print(track_readers)
338
  new_cluster = NULL
339
  nb_nucs_in_cluster = 0
340
  nuc_from_track = c()
341
  for (i in 1:length(tf_outs)){
342
    nuc_from_track[i] = FALSE
343
  }
344
  # Start clustering
345
  while (!end_of_tracks(track_readers)) {
346
    new_center = min(track_readers)
347
		current_track = which(track_readers == new_center)[1]
348
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
349
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
350
		new_nuc$inputs = samples[[current_track]]$inputs
351
		new_nuc$chr = samples[[current_track]]$roi$chr
352
		new_nuc$track = current_track
353

    
354
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width)
355
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
356
		new_nuc$original_reads = flatted_reads[[3]]
357

    
358
    new_upper_bound = new_nuc$upper_bound
359

    
360
    if (!is.null(current_nuc)) {
361
			llr_score = llr_score_nvecs(list(current_nuc$original_reads,new_nuc$original_reads))
362
			llr_scores = c(llr_scores,llr_score)
363
		}
364
		# print(paste(llr_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
365
		if (is.na(llr_score)) {
366
			llr_score = llr_thres + 1
367
		}
368
		# Store llr_score
369
		new_nuc$llr_score = llr_score
370
	  if (llr_score < llr_thres) {
371
      # aggregate to current cluster
372
      #   update bound
373
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
374
        new_cluster$upper_bound = new_nuc$upper_bound
375
      }
376
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
377
        new_cluster$lower_bound = new_nuc$lower_bound
378
      }
379
      #   add nucleosome to current cluster
380
      nuc_from_track[current_track] = TRUE
381
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
382
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
383
    } else {
384
			if (!is.null(new_cluster)) {
385
        # store old cluster
386
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
387
			}
388
      # Reinit current cluster composition stuff
389
      nb_nucs_in_cluster = 0
390
      nuc_from_track = c()
391
      for (i in 1:length(tf_outs)){
392
        nuc_from_track[i] = FALSE
393
      }
394
      # create new cluster
395
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
396
      # update upper bound
397
      current_upper_bound = new_upper_bound
398
      # add nucleosome to current cluster
399
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
400
      nuc_from_track[current_track] = TRUE
401
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
402

    
403
		}
404

    
405
		current_nuc = new_nuc
406

    
407
    # update indexes
408
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {
409
      indexes[current_track] = indexes[current_track] + 1
410
      # update track
411
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
412
    } else {
413
      # update track
414
      track_readers[current_track] = coord_max
415
    }
416
  }
417
  # store last cluster
418
  if (!is.null(new_cluster)) {
419
    # store old cluster
420
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
421
  }
422
	return(list(clusters, llr_scores))
423
### Returns a list of clusterized nucleosomes, and all computed llr scores.
424
}, ex=function(){
425
	# Dealing with a region of interest
426
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
427
	samples = list()
428
	for (i in 1:3) {
429
		# Create TF output
430
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
431
		outputs = dfadd(NULL,tf_nuc)
432
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
433
		# Generate corresponding reads
434
		nb_reads = round(runif(1,170,230))
435
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
436
		u_reads = sort(unique(reads))
437
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
438
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
439
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
440
		u_reads = u_reads + shifts
441
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
442
		                         "V2" = u_reads,
443
														 "V3" = strands,
444
														 "V4" = counts), stringsAsFactors=FALSE)
445
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
446
	}
447
	print(aggregate_intra_strain_nucs(samples))
448
})
449

    
450
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
451
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
452
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
453
cur_index ##<< the index of the roi involved
454
) {
455
	if  (length(partial_strain_maps) == 0 ){
456
		print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." ))
457
    tmp_strain_maps = list()
458
	} else {
459
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
460
			tmp_nuc = partial_strain_maps[[i]]
461
			tmp_nuc_as_list = list()
462
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
463
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
464
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
465
			tmp_nuc_as_list[["cur_index"]] = cur_index
466
			tmp_nuc_as_list[["index_nuc"]] = i
467
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
468
			all_original_reads = c()
469
			for (j in 1:length(tmp_nuc$nucs)) {
470
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
471
			}
472
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
473
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
474
			if (tmp_nuc$wp) {
475
				tmp_nuc_as_list[["llr_1"]] = signif(tmp_nuc$nucs[[2]]$llr_score,5)
476
				tmp_nuc_as_list[["llr_2"]] = signif(tmp_nuc$nucs[[3]]$llr_score,5)
477
			} else {
478
				tmp_nuc_as_list[["llr_1"]] = NA
479
				tmp_nuc_as_list[["llr_2"]] = NA
480
			}
481
      return(tmp_nuc_as_list)
482
    })
483
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
484
	}
485
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
486
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
487
}
488

    
489
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
490
### This function aligns nucs between two strains for a given genome region.
491
replicates, ##<< Set of replicates, ideally 3 per strain.
492
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
493
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
494
corr_thres=0.5, ##<< Correlation threshold.
495
llr_thres=100, ##<< LOD cut off.
496
config=NULL, ##<< GLOBAL config variable
497
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
498
) {
499
	if (length(replicates) < 2) {
500
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
501
	} else if (length(replicates) > 2) {
502
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
503
	}
504
	common_nuc = NULL
505
	llr_scores = c()
506
	chr = replicates[[1]][[1]]$roi$chr
507
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
508
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
509
	strain_ref1 = replicates[[1]][[1]]$strain
510
	strain_ref2 = replicates[[2]][[1]]$strain
511
	big_cur = replicates[[1]][[1]]$roi
512
  orig_big_cur = replicates[[1]][[1]]$orig_roi
513
	if(big_cur$end - big_cur$begin < 0) {
514
		tmp_begin = big_cur$begin
515
		big_cur$begin =  big_cur$end
516
		big_cur$end =  tmp_begin
517
	}
518
	# GO!
519
	if (is.null(wp_nucs_strain_ref1)) {
520
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
521
	}
522
	if (is.null(wp_nucs_strain_ref2)) {
523
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
524
  }
525
	lws = c()
526
	ups = c()
527
	for (na in wp_nucs_strain_ref2) {
528
		lws = c(lws, na$lower_bound)
529
		ups = c(ups, na$upper_bound)
530
	}
531

    
532
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))
533
	roi_strain_ref1 = NULL
534
	roi_strain_ref2 = NULL
535
	if (length(wp_nucs_strain_ref1) > 0) {
536
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
537
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
538
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
539
			# Filtering on Well Positionned
540
			if (nuc_strain_ref1$wp) {
541
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
542
				roi_strain_ref2 = translate_cur(roi_strain_ref1, strain_ref2, big_cur=orig_big_cur, config=config)
543
        if (!is.null(roi_strain_ref2)){
544
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
545
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
546
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
547
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
548
					for (index_nuc_strain_ref2 in matching_nas) {
549
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
550
						# Filtering on Well Positionned
551
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)
552
						if (!is.null(translate_cur(nuc_strain_ref2_to_roi, strain_ref1, big_cur=orig_big_cur, config=config)) &
553
                nuc_strain_ref2$wp) {
554
							# Filtering on correlation Score and collecting reads
555
							SKIP = FALSE
556
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
557
							reads_strain_ref1 = c()
558
							for (nuc in nuc_strain_ref1$nucs){
559
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)
560
								if (nuc$corr < corr_thres) {
561
									SKIP = TRUE
562
								}
563
							}
564
							reads_strain_ref2 = c()
565
							for (nuc in nuc_strain_ref2$nucs){
566
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)
567
								if (nuc$corr < corr_thres) {
568
									SKIP = TRUE
569
								}
570
							}
571
							# Filtering on correlation Score
572
							if (!SKIP) {
573
								# tranlation of reads into strain 2 coords
574
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
575
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
576
								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
577
								llr_scores = c(llr_scores, llr_score)
578
								# Filtering on LOD Score
579
                if (llr_score < llr_thres) {
580
									tmp_nuc = list()
581
									# strain_ref1
582
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
583
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
584
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
585
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
586
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
587
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
588
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
589
									# strain_ref2
590
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
591
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
592
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
593
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
594
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
595
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
596
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
597
									# common
598
									tmp_nuc[["llr_score"]] = signif(llr_score,5)
599
                  tmp_nuc[["common_wp_pval"]] = signif(1-pchisq(2*tmp_nuc[["llr_score"]], df=2),5)
600
									tmp_nuc[["diff"]] = abs(abs((roi_strain_ref2$begin - roi_strain_ref2$end)) / 2 - abs((nuc_strain_ref2$lower_bound - nuc_strain_ref2$upper_bound)) / 2)
601
									common_nuc = dfadd(common_nuc, tmp_nuc)
602
                }
603
							}
604
						}
605
					}
606
				} else {
607
		      print("WARNING! No roi for strain ref 2.")
608
			  }
609
		  }
610
		}
611

    
612
    if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
613
      index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
614
      to_remove_list = c()
615
      for (i in 1:length(index_redundant)) {
616
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
617
          to_remove = index_redundant[i]
618
        }   else {
619
          to_remove = index_redundant[i] + 1
620
        }
621
        to_remove_list = c(to_remove_list, to_remove)
622
      }
623
      common_nuc = common_nuc[-to_remove_list,]
624
    }
625

    
626
    if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
627
      index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
628
      to_remove_list = c()
629
      for (i in 1:length(index_redundant)) {
630
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
631
          to_remove = index_redundant[i]
632
        }   else {
633
          to_remove = index_redundant[i] + 1
634
        }
635
        to_remove_list = c(to_remove_list, to_remove)
636
      }
637
      common_nuc = common_nuc[-to_remove_list,]
638
    }
639

    
640
		return(list(common_nuc, llr_scores))
641
	} else {
642
		print("WARNING, no nucs for strain_ref1.")
643
		return(NULL)
644
	}
645
### Returns a list of clusterized nucleosomes, and all computed llr scores.
646
}, ex=function(){
647

    
648
    # Define new translate_cur function...
649
    translate_cur = function(roi, strain2, big_cur=NULL, config=NULL) {
650
      return(roi)
651
    }
652
    # Binding it by uncomment follwing lines.
653
    unlockBinding("translate_cur", as.environment("package:nucleominer"))
654
    unlockBinding("translate_cur", getNamespace("nucleominer"))
655
    assign("translate_cur", translate_cur, "package:nucleominer")
656
    assign("translate_cur", translate_cur, getNamespace("nucleominer"))
657
    lockBinding("translate_cur", getNamespace("nucleominer"))
658
    lockBinding("translate_cur", as.environment("package:nucleominer"))
659

    
660
	# Dealing with a region of interest
661
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
662
	roi2 = translate_cur(roi, roi$strain_ref1)
663
	replicates = list()
664
	for (j in 1:2) {
665
		samples = list()
666
		for (i in 1:3) {
667
			# Create TF output
668
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
669
			outputs = dfadd(NULL,tf_nuc)
670
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
671
			# Generate corresponding reads
672
			nb_reads = round(runif(1,170,230))
673
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
674
			u_reads = sort(unique(reads))
675
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
676
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
677
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
678
			u_reads = u_reads + shifts
679
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
680
			                         "V2" = u_reads,
681
															 "V3" = strands,
682
															 "V4" = counts), stringsAsFactors=FALSE)
683
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
684
		}
685
		replicates[[length(replicates) + 1]] = samples
686
	}
687
	print(align_inter_strain_nucs(replicates))
688
})
689

    
690

    
691

    
692

    
693

    
694

    
695

    
696

    
697

    
698

    
699

    
700

    
701

    
702

    
703

    
704

    
705

    
706

    
707

    
708

    
709

    
710

    
711

    
712

    
713

    
714

    
715
fetch_mnase_replicates = function(# Prefetch data
716
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
717
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
718
roi, ##<< Region of interest.
719
all_samples, ##<< Global list of samples.
720
config=NULL, ##<< GLOBAL config variable
721
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
722
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
723
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
724
) {
725
	samples=list()
726
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
727
	for (i in samples_ids) {
728
		sample = as.list(all_samples[all_samples$id==i,])
729
    sample$orig_roi = roi
730
    sample$roi = translate_cur(roi, sample$strain, config = config)
731
		if (get_genome) {
732
			# Get Genome
733
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
734
		}
735
		# Get inputs
736
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
737
		sample$total_reads = sum(sample$inputs[,4])
738
		if (!only_fetch) {
739
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
740
	  }
741
	  # Get TF outputs for Mnase_Seq samples
742
		if (sample$marker == "Mnase_Seq" & get_ouputs) {
743
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
744
			if (!only_fetch) {
745
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
746
  		}
747
		}
748
		samples[[length(samples) + 1]] = sample
749
	}
750
  return(samples)
751
}
752

    
753
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
754
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
755
region1, ##<< Original regions.
756
region2 ##<< Regions to substract.
757
) {
758
  rec_substract_region = function(region1, region2) {
759
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
760
    cur_fuzzy = region1[i,]
761
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
762
    if (length(inter_wp[,1]) > 0) {
763
      ret = c()
764
      for (j in 1:length(inter_wp[,1])) {
765
        cur_wp = inter_wp[j,]
766
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
767
          # remove cur_fuzzy
768
          ret = c()
769
          break
770
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
771
          # crop fuzzy
772
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1
773
          ret = cur_fuzzy
774
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
775
          # crop fuzzy
776
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1
777
          ret = cur_fuzzy
778
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
779
          # split fuzzy
780
          tmp_ret_fuzzy_1 = cur_fuzzy
781
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1
782
          tmp_ret_fuzzy_2 = cur_fuzzy
783
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1
784
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
785
          # print(ret)
786
          # ret = cur_fuzzy
787
          break
788
        } else {
789
          stop("WARNING NO ADAPTED CASE!")
790
        }
791
      }
792
      return(ret)
793
    } else {
794
      return(cur_fuzzy)
795
    }
796
  })
797
  }
798
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
799
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
800
  tmp_ulist = unlist(non_inter_fuzzy)
801
  tmp_names = names(tmp_ulist)[1:4]
802
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE)
803
  names(non_inter_fuzzy) = tmp_names
804
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)
805
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)
806
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
807
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
808
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]
809
  return(non_inter_fuzzy)
810
}
811

    
812
union_regions = function(# Aggregate regions that intersect themnselves.
813
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
814
regions ##<< The Regions to be aggregated
815
) {
816
  if (is.null(regions)) {return(regions)}
817
  if (nrow(regions) == 0) {return(regions)}
818
  old_length = length(regions[,1])
819
  new_length = 0
820

    
821
  while (old_length != new_length) {
822
    regions = regions[order(regions$lower_bound), ]
823
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE)
824
    vec_end_1 = which(regions$stop)
825
    if (length(vec_end_1) == 0) {
826
      vec_end_1 = c(length(regions$stop))
827
    }
828
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
829
      vec_end_1 = c(vec_end_1, length(regions$stop))
830
    }
831
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
832
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
833
      chr = regions$chr[vec_beg_1[i]]
834
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
835
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
836
      cur_index = regions$cur_index[vec_beg_1[i]]
837
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, cur_index=cur_index))
838
      })
839
    union = collapse_regions(union)
840
    old_length = length(regions[,1])
841
    new_length = length(union[,1])
842
    regions = union
843
  }
844
  return(union)
845
}
846

    
847
# remove_aligned_wp = function(# Remove wp nucs from common nucs list.
848
# ### It is based on common wp nucs index on nucs and region.
849
# strain_maps, ##<< Nuc maps.
850
# cur_index, ##<< The region of interest index.
851
# tmp_common_nucs, ##<< the list of wp nucs.
852
# strain##<< The strain to consider.
853
# ){
854
#   fuzzy_nucs = strain_maps[[strain]]
855
#   fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$cur_index == cur_index,]
856
#   fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
857
#   if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
858
#   if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
859
#   anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
860
#   fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
861
#   return(fuzzy_nucs)
862
# }
863

    
864
translate_regions = function(# Translate a list of regions from a strain ref to another.
865
### This function is an eloborated call to translate_cur.
866
regions, ##<< Regions to be translated.
867
combi, ##<< Combination of strains.
868
cur_index, ##<< The region of interest index.
869
config=NULL, ##<< GLOBAL config variable
870
roi ##<< The region of interest.
871
) {
872
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
873
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
874
    big_cur =  roi
875
    trs_tmp_regions_ref2 = translate_cur(tmp_regions_ref2, combi[1], config = config, big_cur = big_cur)
876
    if (is.null(trs_tmp_regions_ref2)) {
877
      return(NULL)
878
    } else {
879
      return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), cur_index=cur_index)))
880
    }
881
  })
882

    
883
  return(collapse_regions(tr_regions))
884
}
885

    
886
collapse_regions = function(# reformat an "apply  manipulated" list of regions
887
### Utils to reformat an "apply  manipulated" list of regions
888
regions ##< a list of regions
889
) {
890
  if (is.null(regions)) {
891
    return(NULL)
892
  } else {
893
    regions = do.call(rbind, regions)
894
    regions$chr = as.character(regions$chr)
895
    regions$chr = as.numeric(regions$chr)
896
    regions$lower_bound = as.numeric(regions$lower_bound)
897
    regions$upper_bound = as.numeric(regions$upper_bound)
898
    regions = regions[order(regions$lower_bound),]
899
    return(regions)
900
  }
901
}
902

    
903

    
904
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
905
### The fucntion is no more necessary since we remove "big_cur" bug in translate_cur function.
906
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
907
roi, ##<< The region of interest.
908
strain, ##<< The strain to consider.
909
config=NULL ##<< GLOBAL config variable
910
) {
911
  tr_roi = translate_cur(roi, strain, config = config)
912
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
913
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
914
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
915
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
916
  }
917
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
918
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
919
  }
920
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
921
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
922
  }
923
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
924
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
925
  }
926
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
927
  return(tmp_fuzzy_nucs)
928
}
929

    
930
get_all_reads = function(# Retrieve Reads
931
### Retrieve reads for a given marker, combi, form.
932
marker, ##<< The marker to considere.
933
combi, ##<< The starin combination to considere.
934
form="wp", ##<< The nuc form to considere.
935
config=NULL ##<< GLOBAL config variable
936
) {
937
	all_reads = NULL
938
  for (manip in c("Mnase_Seq", marker)) {
939
    if (form == "unr") {
940
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
941
  		tmp_res = read.table(file=out_filename, header=TRUE)
942
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
943
      tmp_res$form = form
944
    } else if (form == "wp") {
945
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
946
  		tmp_res = read.table(file=out_filename, header=TRUE)
947
      tmp_res$form = form
948
    } else if (form == "wpunr") {
949
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
950
  		tmp_res = read.table(file=out_filename, header=TRUE)
951
      tmp_res$form = "wp"
952
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
953
  		tmp_res2 = read.table(file=out_filename, header=TRUE)
954
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
955
      tmp_res2$form = "unr"
956
      tmp_res = rbind(tmp_res, tmp_res2)
957
    }
958
		if (is.null(all_reads)) {
959
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
960
		}
961
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
962
		all_reads = cbind(all_reads, tmp_res)
963
  }
964
  return(all_reads)
965
}
966

    
967
get_design = function(# Build the design for deseq
968
### This function build the design according sample properties.
969
marker, ##<< The marker to considere.
970
combi, ##<< The starin combination to considere.
971
all_samples ##<< Global list of samples.
972
) {
973
  off1 = 0
974
  off2 = 0
975
	manips = c("Mnase_Seq", marker)
976
	design_rownames = c()
977
	design_manip = c()
978
	design_strain = c()
979
  off2index = function(off) {
980
  	switch(toString(off),
981
  		"1"=c(0,1,1),
982
  	  "2"=c(1,0,1),
983
    	"3"=c(1,1,0),
984
  		c(1,1,1)
985
  		)
986
  }
987
	for (manip in manips) {
988
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
989
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
990
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
991
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
992
		} else {
993
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
994
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
995
			}
996
		}
997
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))
998
		for (strain in combi) {
999
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
1000
			design_strain = c(design_strain, rep(strain, length(cols)))
1001
			design_rownames = c(design_rownames, cols)
1002
		}
1003
	}
1004
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
1005
	return(snep_design)
1006
}
1007

    
1008
plot_dist_samples = function(# Plot the distribution of reads.
1009
### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
1010
strain, ##<< The strain to considere.
1011
marker, ##<< The marker to considere.
1012
res, ##<< Data
1013
all_samples, ##<< Global list of samples.
1014
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
1015
) {
1016
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
1017
	snepCountTable = res[,cols]
1018
	snepDesign = data.frame(
1019
		row.names = cols,
1020
		manip = rep(marker, length(cols)),
1021
		strain = rep(strain, length(cols))
1022
		)
1023
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
1024
	sizeFactors = estimateSizeFactors(cdsFull)
1025
	# print(sizeFactors[[1]])
1026
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
1027
	if (NEWPLOT) {
1028
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))
1029
		NEWPLOT = FALSE
1030
	}
1031
	for (it in 1:length(sample_ids)) {
1032
		sample_id = sample_ids[it]
1033
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1034
	}
1035
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1036
}
1037

    
1038
analyse_design = function(# Launch deseq methods.
1039
### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
1040
snep_design, ##<< The design to considere.
1041
reads ##<< The data to considere.
1042
) {
1043
	snep_count_table = reads[, rownames(snep_design)]
1044
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1045
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1046
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1047

    
1048
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1049
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1050

    
1051
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1052
	return(list(fit1, fit0, snep_design, pvalsGLM))
1053
}
1054

    
1055

    
1056

    
1057

    
1058

    
1059

    
1060

    
1061

    
1062
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1063
### This function uses
1064
marker, ##<< The marker involved.
1065
combi, ##<< The strain combination involved.
1066
form, ##<< the nuc form involved.
1067
all_samples, ##<< Global list of samples.
1068
config=NULL ##<< GLOBAL config variable
1069
) {
1070
  # PRETREAT
1071
  snep_design = get_design(marker, combi, all_samples)
1072
  reads = get_all_reads(marker, combi, form, config=config)
1073
  # RUN ANALYSE
1074
  tmp_analyse = analyse_design(snep_design, reads)
1075
  # RESULTS
1076
	fit1 = tmp_analyse[[1]]
1077
	fit0 = tmp_analyse[[2]]
1078
  k = names(fit1)
1079
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1080
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1081
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1082
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1083
	snep_design = tmp_analyse[[3]]
1084
  # print(snep_design)
1085
	fdr = 0.0001
1086
	thres = FDR(reads$pvalsGLM, fdr)
1087
	reads$snep_index = reads$pvalsGLM < thres
1088
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = ""))
1089
  return(reads)
1090
  },  ex=function(){
1091
    marker = "H3K4me1"
1092
    combi = c("BY", "YJM")
1093
    form = "wpunr" # "wp" | "unr" | "wpunr"
1094
    # foo = get_sneps(marker, combi, form)
1095
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1096
})
1097

    
1098

    
1099

    
1100

    
1101

    
1102

    
1103

    
1104

    
1105

    
1106

    
1107

    
1108

    
1109

    
1110

    
1111

    
1112

    
1113

    
1114

    
1115

    
1116

    
1117

    
1118

    
1119

    
1120

    
1121
ROM2ARAB = function(# Roman to Arabic pair list.
1122
### Util to convert Roman to Arabic
1123
){list(
1124
  "I" = 1,
1125
  "II" = 2,
1126
  "III" = 3,
1127
  "IV" = 4,
1128
  "V" = 5,
1129
  "VI" = 6,
1130
  "VII" = 7,
1131
  "VIII" = 8,
1132
  "IX" = 9,
1133
  "X" = 10,
1134
  "XI" = 11,
1135
  "XII" = 12,
1136
  "XIII" = 13,
1137
  "XIV" = 14,
1138
  "XV" = 15,
1139
  "XVI" = 16,
1140
  "XVII" = 17,
1141
  "XVIII" = 18,
1142
  "XIX" = 19,
1143
  "XX" = 20
1144
)}
1145

    
1146
switch_pairlist = structure(function(# Switch a pairlist
1147
### Take a pairlist key:value and return the switched pairlist value:key.
1148
l ##<< The pairlist to switch.
1149
) {
1150
	ret = list()
1151
	for (name in names(l)) {
1152
		ret[[as.character(l[[name]])]] = name
1153
	}
1154
	ret
1155
### The switched pairlist.
1156
}, ex=function(){
1157
	l = list(key1 = "value1", key2 = "value2")
1158
	print(switch_pairlist(l))
1159
})
1160

    
1161
ARAB2ROM = function(# Arabic to Roman pair list.
1162
### Util to convert Arabicto Roman
1163
){switch_pairlist(ROM2ARAB())}
1164

    
1165

    
1166

    
1167

    
1168
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1169
### This fonction allow to acces to a specific part of the c2c file.
1170
strain1, ##<< the key strain
1171
strain2, ##<< the target strain
1172
chr=NULL, ##<< if defined, the c2c will filtered according to the chromosome value
1173
lower_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome upper than lower_bound
1174
upper_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome lower than upper_bound
1175
config=NULL##<<  GLOBAL config variable
1176
) {
1177
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1178
	# Launch c2c file
1179
	if (reverse) {
1180
		c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]
1181
	} else {
1182
		c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]
1183
	}
1184
	c2c = get_content(c2c_filename, "table", stringsAsFactors=FALSE)
1185
  # Filtering unagapped
1186
  c2c = c2c[c2c$V6=="-",]
1187
	# Reverse
1188
	if (reverse) {
1189
		tmp_col = c2c$V1
1190
		c2c$V1 = c2c$V7
1191
		c2c$V7 = tmp_col
1192
		tmp_col = c2c$V2
1193
		c2c$V2 = c2c$V9
1194
		c2c$V9 = tmp_col
1195
		tmp_col = c2c$V3
1196
		c2c$V3 = c2c$V10
1197
		c2c$V10 = tmp_col
1198
	}
1199
  if (!is.null(chr)) {
1200
  	if (strain1 == "BY") {
1201
  		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1202
  	} else if (strain1 == "RM") {
1203
  	  chro_1 = paste("supercontig_1.",chr,sep="")
1204
  	} else if (strain1 == "YJM") {
1205
  	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1206
  	}
1207
  	c2c = c2c[c2c$V1 == chro_1,]
1208
    if (!is.null(lower_bound)) {
1209
      if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound}
1210
      if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound}      
1211
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1212
    }
1213
    if (!is.null(upper_bound)) {
1214
      if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound}
1215
      if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound}      
1216
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1217
    }
1218
  }
1219
  return(c2c)
1220
# It retruns the appropriate c2c file part.
1221
}
1222

    
1223
translate_cur = structure(function(# Translate coords of a genome region.
1224
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1225
roi, ##<< Original genome region of interest.
1226
strain2, ##<< The strain in wich you want the genome region of interest.
1227
config=NULL, ##<< GLOBAL config variable
1228
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1229
) {
1230
	strain1 = roi$strain_ref
1231
	if (strain1 == strain2) {
1232
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1233
		return(roi)
1234
	}
1235

    
1236
	# Extract c2c file
1237
	if (!is.null(big_cur)) {
1238
  	# Dealing with big_cur
1239
		if (roi$strain_ref != big_cur$strain_ref) {
1240
      big_cur = translate_cur(big_cur, roi$strain_ref, config=config)
1241
    }
1242
    if (big_cur$end < big_cur$begin) {
1243
      tmp_var = big_cur$begin
1244
      big_cur$begin = big_cur$end
1245
      big_cur$end = tmp_var
1246
      big_cur$length = big_cur$end - big_cur$begin + 1
1247
    }
1248
    if (big_cur$chr!=roi$chr | roi$end > big_cur$end | roi$end < big_cur$begin | roi$begin > big_cur$end | roi$begin < big_cur$begin) {
1249
      print("WARNING! Trying to translate a roi not included in a big_cur.")
1250
      return(NULL)
1251
    }
1252
  	c2c = c2c_extraction(strain1, strain2, big_cur$chr, big_cur$begin, big_cur$end, config=config)
1253
  } else {
1254
    # No big_cur
1255
  	c2c = c2c_extraction(strain1, strain2, roi$chr, config=config)    
1256
  }
1257
  
1258
  #	Convert initial roi$chr into c2c format
1259
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1260
	begin_1 = roi$begin
1261
  end_1 = roi$end
1262
  if (reverse) {
1263
  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1264
    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1265
	} else {
1266
		tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1267
	  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1268
	}
1269

    
1270
	# Never happend conditions ...
1271
	{
1272
		if (length(tmptransfostart$V8) == 0) {
1273
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1274
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1275
  			tmp_c2c = c2c[c2c$V2>=begin_1,]
1276
  			begin_1 = min(tmp_c2c$V2)
1277
      } else {
1278
  			tmp_c2c = c2c[c2c$V3>=begin_1,]
1279
  			begin_1 = min(tmp_c2c$V3)
1280
      }
1281
			if (reverse) {
1282
		  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1283
			} else {
1284
				tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1285
			}
1286
			if (length(tmptransfostart$V8) == 0) {
1287
				if (!is.null(big_cur)) {
1288
					return(NULL)
1289
					tmptransfostart = c2c[c2c$V3>=big_cur$begin & c2c$V2<=big_cur$begin,]
1290
				} else {
1291
					print(tmptransfostart)
1292
					print(tmptransfostop)
1293
					stop("Never happend condition 1.")
1294
				}
1295
			}
1296
		}
1297
		if (length(tmptransfostop$V8) == 0) {
1298
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1299
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1300
  			tmp_c2c = c2c[c2c$V3<=end_1,]
1301
  			end_1 = max(tmp_c2c$V3)
1302
      } else {
1303
  			tmp_c2c = c2c[c2c$V2<=end_1,]
1304
  			end_1 = max(tmp_c2c$V2)
1305
      }
1306
			if (reverse) {
1307
		    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1308
			} else {
1309
			  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1310
			}
1311
			if (length(tmptransfostop$V8) == 0) {
1312
				if (!is.null(big_cur)) {
1313
					return(NULL)
1314
				  tmptransfostop = c2c[c2c$V3>=big_cur$end & c2c$V2<=big_cur$end,]
1315
				} else {
1316
					print(tmptransfostart)
1317
					print(tmptransfostop)
1318
					stop("Never happend condition 2.")
1319
				}
1320
			}
1321
		}
1322
		if (length(tmptransfostart$V8) != 1) {
1323
			# print("many start")
1324
			tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1325
			if (length(tmptransfostart$V8) != 1) {
1326
				print(tmptransfostart)
1327
				print(tmptransfostop)
1328
  			stop("Never happend condition 3.")
1329
			}
1330
		}
1331
		if (length(tmptransfostop$V8) != 1) {
1332
			# print("many stop")
1333
		  tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1334
			if (length(tmptransfostop$V8) != 1) {
1335
				print(tmptransfostart)
1336
				print(tmptransfostop)
1337
  			stop("Never happend condition 4.")
1338
			}
1339
		}
1340
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1341
			print(tmptransfostart)
1342
			print(tmptransfostop)
1343
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1344
		}
1345
	}
1346
  
1347
  # Deal with strand
1348
  if (tmptransfostart$V8 == 1) {
1349
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1350
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1351
  } else {
1352
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1353
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1354
  }
1355
	# Build returned roi
1356
	roi$strain_ref = strain2
1357
	if (roi$strain_ref == "BY") {
1358
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1359
	} else {
1360
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1361
	}
1362
  roi$begin = begin_2
1363
  roi$end = end_2
1364
	if (sign(roi$end - roi$begin) == 0) {
1365
		roi$length = 1
1366
	} else {
1367
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1368
	}
1369
  return(roi)
1370
}, ex=function(){
1371
	# Define new translate_cur function...
1372
	translate_cur = function(roi, strain2, config) {
1373
		strain1 = roi$strain_ref
1374
		if (strain1 == strain2) {
1375
			return(roi)
1376
		} else {
1377
		  stop("Here is my new translate_cur function...")
1378
		}
1379
	}
1380
	# Binding it by uncomment follwing lines.
1381
	# unlockBinding("translate_cur", as.environment("package:nm"))
1382
	# unlockBinding("translate_cur", getNamespace("nm"))
1383
	# assign("translate_cur", translate_cur, "package:nm")
1384
	# assign("translate_cur", translate_cur, getNamespace("nm"))
1385
	# lockBinding("translate_cur", getNamespace("nm"))
1386
	# lockBinding("translate_cur", as.environment("package:nm"))
1387
})
1388

    
1389

    
1390
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1391
### CURs are regions that can be aligned between the genomes
1392
diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
1393
min_cur_width = 4000, ##<< The minimum width of a CUR
1394
config = NULL ##<< GLOBAL config variable
1395
) {
1396

    
1397
  check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
1398
    c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1399
    check_homogeneity(c2c)
1400
  	if (length(c2c[,1]) == 0 ) {
1401
      stop("WARNING! checking overlapping for a region corresponding to an empty c2c.")
1402
    } else {
1403
  		lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)})
1404
  		upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)})
1405
  		tmp_index = order(lower_bounds)
1406
      lower_bounds = lower_bounds[tmp_index]
1407
      upper_bounds = upper_bounds[tmp_index]
1408
      tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)]
1409
      ov_index = which(tmp_diff < 0)
1410
      if(length(ov_index < 0) !=0 ) {
1411
        ov_index = ov_index[1]
1412
        print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep=""))
1413
        c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),]
1414
        print(c2c_corrupted)
1415
        return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1))
1416
      }
1417
      return(NULL)
1418
    }
1419
  }
1420

    
1421
  check_homogeneity = function(sub_c2c) {
1422
    tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3)
1423
    tmp_signs = tmp_signs[tmp_signs != 0]
1424
  	if (sum(tmp_signs[1]  != tmp_signs)) {
1425
  		print(paste("*************** ERROR, non homogenous region (sign)! ********************"))
1426
      print(tmp_signs)
1427
  	}
1428
    tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10)
1429
    tmp_signs2 = tmp_signs2[tmp_signs2 != 0]
1430
  	if (sum(tmp_signs2[1]  != tmp_signs2)) {
1431
  		print(paste("*************** ERROR, non homogenous region (sign2)! ********************"))
1432
      print(tmp_signs2)
1433
  	}
1434
  	if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1435
  		print("*************** ERROR, non homogenous region chrs or V8! ********************")
1436
  	}
1437
  }
1438

    
1439
  test_and_squeeze_rois = function(foo, config=NULL) {
1440
    is_it_ok = function(list1, list2) {
1441
      bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length))
1442
      ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0
1443
      if (!ok) {
1444
        print(bar[bar[,3] != 0 | bar[,6] != 0, ])
1445
      }
1446
      return (ok)
1447
    }
1448
    squeeze_rois = function(list1, list2) {
1449
      rois = apply(t(1:nrow(list1)), 2, function(i){
1450
        roi = list1[i,]
1451
        roi2 = list2[i,]
1452
        roi$begin = max(roi$begin, roi2$begin)
1453
        roi$end = min(roi$end, roi2$end)
1454
        roi$length =  roi$end - roi$begin + 1
1455
        return(roi)
1456
      })
1457
      return(do.call(rbind, rois))
1458
    }
1459
    # foo_orig = compute_inter_all_strain_curs2(config=config)
1460
    # foo = foo_orig
1461
    STOP = FALSE
1462
    nb_round = 0
1463
    while(!STOP) {
1464
      nb_round = nb_round + 1
1465
      print(paste("2-2 round #", nb_round, sep=""))
1466
      fooby = translate_curs(foo, "BY", config=config)
1467
      fooyjm = translate_curs(foo, "YJM", config=config)
1468
      fooyjmby = translate_curs(fooyjm, "BY", config=config)
1469
      if (!is_it_ok(fooby, fooyjmby)) {
1470
        print("case 1")
1471
        foo = squeeze_rois(fooby, fooyjmby)
1472
    		next
1473
      }
1474
      foorm = translate_curs(foo, "RM", config=config)
1475
      foormby = translate_curs(foorm, "BY", config=config)
1476
      if (!is_it_ok(fooby, foormby)) {
1477
        print("case 2")
1478
        foo = squeeze_rois(fooby, foormby)
1479
    		next
1480
      }
1481
      fooyjmrm = translate_curs(fooyjm, "RM", config=config)
1482
      fooyjmrmyjm = translate_curs(fooyjmrm, "YJM", config=config)
1483
      if (!is_it_ok(fooyjm, fooyjmrmyjm)) {
1484
        print("case 3")
1485
        foo = squeeze_rois(fooyjm, fooyjmrmyjm)
1486
        next
1487
      }
1488
      foormyjm = translate_curs(foorm, "YJM", config=config)
1489
      foormyjmrm = translate_curs(foormyjm, "RM", config=config)
1490
      if (!is_it_ok(foorm, foormyjmrm)) {
1491
        print("case 4")
1492
        foo = squeeze_rois(foorm, foormyjmrm)
1493
        next
1494
      }
1495
      foo = translate_curs(foo, "BY", config=config)
1496
      STOP = TRUE
1497
    }
1498
    STOP = FALSE
1499
    nb_round = 0
1500
    while(!STOP) {
1501
      nb_round = nb_round + 1
1502
      print(paste("3-3 round #", nb_round, sep=""))
1503
      fooby = translate_curs(foo, "BY", config=config)
1504
      foobyrm = translate_curs(fooby, "RM", config=config)
1505
      foobyrmyjm = translate_curs(foobyrm, "YJM", config=config)
1506
      foobyrmyjmby = translate_curs(foobyrmyjm, "BY", config=config)
1507
      if (!is_it_ok(fooby, foobyrmyjmby)) {
1508
        print("case 1")
1509
        foo = squeeze_rois(fooby, foobyrmyjmby)
1510
      }
1511
      fooby = translate_curs(foo, "BY", config=config)
1512
      foobyyjm = translate_curs(fooby, "YJM", config=config)
1513
      foobyyjmrm = translate_curs(foobyyjm, "RM", config=config)
1514
      foobyyjmrmby = translate_curs(foobyyjmrm, "BY", config=config)
1515
      if (!is_it_ok(fooby, foobyyjmrmby)) {
1516
        print("case 2")
1517
        foo = squeeze_rois(fooby, foobyyjmrmby)
1518
        next
1519
      }
1520
      foo = translate_curs(foo, "BY", config=config)
1521
      STOP = TRUE
1522
    }
1523
    print("end")
1524
    return(foo)
1525
  }
1526

    
1527
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) {
1528
    c2c = c2c_extraction(strain1, strain2, config=config)
1529
    # computing diffs
1530
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
1531
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
1532
    # Filtering
1533
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1534
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1535
    rois = apply(t(1:length(indexes_start)), 2, function(i) {
1536
      if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start)))
1537
      returned_rois = NULL
1538
  		start = indexes_start[i]
1539
  		stop = indexes_stop[i]
1540
  		sub_c2c = c2c[start:stop,]
1541
      check_homogeneity(sub_c2c)
1542
  		if (strain1 == "BY") {
1543
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1544
  		} else {
1545
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1546
  		}
1547
  		roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1)
1548
  		roi$length = roi$end - roi$begin + 1
1549
  		if (roi$length >= min_cur_width) {
1550
  			lower_bound = roi$begin
1551
  			upper_bound = roi$end
1552
        check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1553
        while(!is.null(check)) {
1554
          # print(check)
1555
      		roi1 = roi
1556
          roi1$end = check[[1]]
1557
      		roi1$length = roi1$end - roi1$begin + 1
1558
      		if (roi1$length >= min_cur_width) {
1559
            returned_rois = dfadd(returned_rois,roi1)
1560
          }
1561
          roi$begin = check[[2]]
1562
      		roi$length = roi$end - roi$begin + 1
1563
      		if (roi$length >= min_cur_width) {
1564
      			lower_bound = min(roi$begin, roi$end)
1565
      			upper_bound = max(roi$begin, roi$end)
1566
            check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1567
          } else {
1568
            check = NULL
1569
            roi = NULL
1570
          }
1571
        }
1572
        returned_rois = dfadd(returned_rois,roi)
1573
  	  }
1574
    })
1575
    rois = do.call(rbind,rois)
1576
    rois = rois[order(as.numeric(rois$chr), rois$begin), ]
1577
  	return(rois)
1578
  }
1579

    
1580
  translate_curs = function(rois, target_strain, config) {
1581
    tr_rois = apply(t(1:nrow(rois)), 2, function(i){
1582
      roi = rois[i,]
1583
      tr_roi = translate_cur(roi, target_strain, config=config)  
1584
      tmp_begin = min(tr_roi$begin, tr_roi$end)
1585
      tmp_end = max(tr_roi$begin, tr_roi$end)
1586
      tr_roi$begin = tmp_begin
1587
      tr_roi$end = tmp_end
1588
      tr_roi$length =  tr_roi$end - tr_roi$begin + 1
1589
      return(tr_roi)
1590
    })
1591
    tr_rois = do.call(rbind, tr_rois)
1592
    return(tr_rois)
1593
  }
1594

    
1595
  combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"))
1596
  rois = list()
1597
  for (combi in combis) {
1598
    strain1 = combi[1]
1599
    strain2 = combi[2]
1600
    print(paste(strain1, strain2))
1601
    rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1602
    strain1 = combi[2]
1603
    strain2 = combi[1]
1604
    print(paste(strain1, strain2))
1605
    rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1606
    tr_rois_rev = translate_curs(rois_rev, combi[1], config)      
1607
    region1 = rois_fwd
1608
    region2 = tr_rois_rev
1609
    rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev)
1610
  }
1611
  reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
1612
  reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
1613
  tr_reducted_1_rois = translate_curs(reducted_1_rois, "RM", config)  
1614
  reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]])
1615
  reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ]
1616
  reducted_rois = translate_curs(reducted_2_rois, "BY", config)  
1617
  reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
1618
  squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
1619
  return (squeezed_rois)
1620
}
1621

    
1622
intersect_region = function(# Returns the intersection of 2 list on regions.
1623
### This function...
1624
region1, ##<< Original regions.
1625
region2 ##<< Regions to intersect.
1626
) {
1627
  intersection = apply(t(1:nrow(region1)), 2, function(i) {
1628
    roi1 = region1[i, ]
1629
    sub_regions2 = region2[region2$chr == roi1$chr, ]
1630
    sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end |
1631
                                roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end |
1632
                                sub_regions2$begin < roi1$begin  & roi1$end < sub_regions2$end
1633
                                , ]
1634
    if (nrow(sub_regions2) == 0) {
1635
      print("removing a region")
1636
      return(NULL)
1637
    } else if (nrow(sub_regions2) > 1) {
1638
      print("more than one region in intersect_region")          
1639
      return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])})))
1640
    } else {
1641
      roi2 = sub_regions2[1,]
1642
      if (roi1$begin < roi2$begin) {
1643
        print("not the same begin")
1644
        roi1$begin = roi2$begin
1645
        roi1$length =  roi1$end - roi1$begin + 1
1646
      }
1647
      if (roi1$end > roi2$end) {
1648
        print("not the same end")
1649
        roi1$end = roi2$end
1650
        roi1$length =  roi1$end - roi1$begin + 1
1651
      }
1652
      return(roi1)
1653
    }         
1654
  })
1655
  return(do.call(rbind,intersection))
1656
}
1657

    
1658
build_replicates = structure(function(# Stage replicates data
1659
### This function loads in memory data corresponding to the given experiments.
1660
expe, ##<< a list of vector corresponding to vector of replicates.
1661
roi, ##<< the region that we are interested in.
1662
only_fetch=FALSE, ##<< filter or not inputs.
1663
get_genome=FALSE,##<< Load or not corresponding genome.
1664
all_samples, ##<< Global list of samples.
1665
config=NULL ##<< GLOBAL config variable.
1666
) {
1667
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1668
  	samples=list()
1669
  	for (i in samples_ids) {
1670
  		sample = as.list(all_samples[all_samples$id==i,])
1671
      sample$orig_roi = roi
1672
      sample$roi = translate_cur(roi, sample$strain, config = config)
1673
  		if (get_genome) {
1674
  			# Get Genome
1675
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1676
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1677
  		}
1678
  		# Get inputs
1679
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1680
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE)
1681
  		sample$total_reads = sum(sample$inputs[,4])
1682
  		if (!only_fetch) {
1683
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
1684
  	  }
1685
  	  # Get TF outputs for Mnase_Seq samples
1686
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {
1687
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1688
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1689
  			if (!only_fetch) {
1690
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1691
    		}
1692
  		}
1693
  		samples[[length(samples) + 1]] = sample
1694
  	}
1695
  	return(samples)
1696
  }
1697
	replicates = list()
1698
	for(samples_ids in expe) {
1699
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1700
		replicates[[length(replicates) + 1]] = samples
1701
	}
1702
	return(replicates)
1703
  }, ex = function() {
1704
    # library(rjson)
1705
    # library(nucleominer)
1706
    #
1707
    # # Read config file
1708
    # json_conf_file = "nucleo_miner_config.json"
1709
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1710
    # # Read sample file
1711
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
1712
    # # here are the sample ids in a list
1713
    # expes = list(c(1))
1714
    # # here is the region that we wnt to see the coverage
1715
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY")
1716
    # # it displays the corverage
1717
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1718
    # out = watch_samples(replicates, config$READ_LENGTH,
1719
    #       plot_coverage = TRUE,
1720
    #       plot_squared_reads = FALSE,
1721
    #       plot_ref_genome = FALSE,
1722
    #       plot_arrow_raw_reads = FALSE,
1723
    #       plot_arrow_nuc_reads = FALSE,
1724
    #       plot_gaussian_reads = FALSE,
1725
    #       plot_gaussian_unified_reads = FALSE,
1726
    #       plot_ellipse_nucs = FALSE,
1727
    #       plot_wp_nucs = FALSE,
1728
    #       plot_wp_nuc_model = FALSE,
1729
    #       plot_common_nucs = FALSE,
1730
    #       height = 50)
1731
  })
1732

    
1733

    
1734

    
1735

    
1736

    
1737

    
1738

    
1739

    
1740

    
1741

    
1742

    
1743

    
1744

    
1745

    
1746

    
1747

    
1748

    
1749

    
1750

    
1751

    
1752

    
1753

    
1754

    
1755

    
1756

    
1757

    
1758

    
1759

    
1760

    
1761

    
1762

    
1763

    
1764

    
1765

    
1766

    
1767

    
1768

    
1769

    
1770

    
1771

    
1772

    
1773

    
1774

    
1775

    
1776

    
1777

    
1778

    
1779

    
1780

    
1781

    
1782

    
1783

    
1784

    
1785

    
1786

    
1787

    
1788

    
1789

    
1790

    
1791

    
1792

    
1793

    
1794

    
1795

    
1796

    
1797

    
1798

    
1799

    
1800

    
1801

    
1802

    
1803

    
1804

    
1805

    
1806

    
1807

    
1808

    
1809

    
1810

    
1811

    
1812

    
1813

    
1814

    
1815
watch_samples = function(# Watching analysis of samples
1816
### This function allows to view analysis for a particuler region of the genome.
1817
replicates, ##<< replicates under the form...
1818
read_length, ##<< length of the reads
1819
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
1820
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
1821
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
1822
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
1823
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
1824
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
1825
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
1826
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
1827
change_col = TRUE, ##<< Change the color of each nucleosome.
1828
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
1829
plot_fuzzy_nucs = TRUE,  ##<< Plot (or not) cluster of fuzzy
1830
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
1831
plot_common_nucs = FALSE,  ##<< Plot (or not) aligned reads.
1832
plot_common_unrs = FALSE,  ##<< Plot (or not) unaligned nucleosomal refgions (UNRs).
1833
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
1834
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
1835
plot_sample_id = FALSE, ##<<  Plot (or not) the sample id for each sample.
1836
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
1837
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
1838
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1839
main=NULL, ##<< main title of the produced plot
1840
xlab=NULL, ##<< xlab of the produced plot
1841
ylab="#reads (per million reads)", ##<< ylab of the produced plot
1842
config=NULL ##<< GLOBAL config variable
1843
){
1844
  returned_list = list()
1845
  # Computing global display parameters
1846
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
1847
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
1848
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
1849
  } else {
1850
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
1851
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
1852
  }
1853
	base_glo = 0
1854
	nb_rank_glo = 0
1855
  for (samples in replicates) {
1856
  	nb_rank_glo = nb_rank_glo + length(samples)
1857
  }
1858
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
1859
	y_min_glo = min(ylim_glo)
1860
	y_max_glo = max(ylim_glo)
1861
  delta_y_glo = y_max_glo - y_min_glo
1862
  # Plot main frame
1863
  if (is.null(xlab)) {
1864
    xlab = paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr)
1865
  }
1866
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab=ylab, xlab=xlab, main=main )
1867
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
1868
  # Go
1869
	replicates_wp_nucs = list()
1870
  wp_maps = list()
1871
  fuzzy_maps = list()
1872
  for (replicate_rank in 1:length(replicates)) {
1873
		# Computing replicate parameters
1874
		nb_rank = length(samples)
1875
		base = (replicate_rank-1) * height * nb_rank
1876
		ylim = c(base, base + height * nb_rank)
1877
		y_min = min(ylim)
1878
		y_max = max(ylim)
1879
	  delta_y = y_max - y_min
1880
		samples = replicates[[replicate_rank]]
1881
		for (sample_rank in 1:length(samples)) {
1882
			# computing sample parameters
1883
			sample = samples[[sample_rank]]
1884
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
1885
      if (plot_sample_id) {
1886
  			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
1887
      }
1888

    
1889
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
1890
			  x_min = sample$roi[["begin"]]
1891
			  x_max = sample$roi[["end"]]
1892
		  } else {
1893
			  x_min = - sample$roi[["begin"]]
1894
			  x_max = - sample$roi[["end"]]
1895
		  }
1896
			shift = x_min_glo - x_min
1897
	    # Plot Genome seq
1898
			if (plot_ref_genome) {
1899
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
1900
		  }
1901
			# Plot reads
1902
			reads = sample$inputs
1903
			signs = sign_from_strand(reads[,3])
1904
			if (plot_arrow_raw_reads) {
1905
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev,
1906
				col=1, length=0.15/nb_rank)
1907
			}
1908
	    if (plot_squared_reads) {
1909
        # require(plotrix)
1910
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
1911
			}
1912
	    if (plot_coverage) {
1913
        if (length(reads[,1]) != 0) {
1914
          step_h = sign(x_min) * signs * reads[,4]
1915
          step_b = sign(x_min) * reads[,2] + shift
1916
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
1917
          steps_x = min(step_b, step_e):max(step_b, step_e)
1918
          steps_y = rep(0, length(steps_x))
1919
          for (i in 1:length(step_h)) {
1920
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
1921
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
1922
          }
1923
          tmp_index = which(steps_y != 0)
1924
          steps_x = steps_x[tmp_index]
1925
          steps_y = steps_y[tmp_index]
1926
          tmp_current_level = 0
1927
          for (i in 1:length(steps_y)) {
1928
            steps_y[i] = tmp_current_level + steps_y[i]
1929
            tmp_current_level = steps_y[i]
1930
          }
1931
          steps_y = c(0, steps_y)
1932
          steps_y = steps_y * 1000000/sample$total_reads
1933
        } else {
1934
          steps_y = c(0, 0, 0)
1935
          steps_x = c(x_min, x_max)
1936
        }
1937
        # print(steps_x)
1938
        # print(steps_y)
1939
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
1940
        abline(y_lev,0)
1941
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
1942
			}
1943
			# Plot nucs
1944
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
1945
				nucs = sample$outputs
1946
				if (length(nucs$center) > 0) {
1947
					col = 1
1948
		      for (i in 1:length(nucs$center)) {
1949
            foo<<-nucs
1950
            if (change_col) {
1951
  						col = col + 1
1952
              } else {
1953
                col = "blue"
1954
              }
1955
		        nuc = nucs[i,]
1956
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
1957
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
1958
						total_involved_reads = sum(involved_reads[,4])
1959
						if (plot_arrow_nuc_reads ) {
1960
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev,
1961
							col=col, length=0.15/nb_rank)
1962
						}
1963
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
1964
  						flatted_reads = flat_reads(involved_reads, nuc$width)
1965
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1966
		  			}
1967
	          if (plot_gaussian_reads ) {
1968
							flatted_reads = flat_reads(involved_reads, nuc$width)
1969
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1970
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
1971
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
1972
	          }
1973
	          if (plot_gaussian_unified_reads ) {
1974
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=1)
1975
	          }
1976
	          if (plot_ellipse_nucs) {
1977
				      # require(plotrix)
1978
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col)
1979
						}
1980
		      }
1981
		    } else {
1982
		      print("WARNING! No nucs to print.")
1983
		    }
1984
			}
1985
	  }
1986

    
1987
	  # Plot wp nucs
1988
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) {
1989
			if (samples[[1]]$marker == "Mnase_Seq") {
1990
				if (is.null(aggregated_intra_strain_nucs)) {
1991
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
1992
				} else {
1993
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
1994
				}
1995
		  } else {
1996
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
1997
		  }
1998
      if (plot_chain) {
1999
        tf_nucs = lapply(wp_nucs, function(nuc) {
2000
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
2001
            tmp_nuc = tmp_nuc[[1]]
2002
            tmp_nuc$inputs = NULL
2003
            tmp_nuc$original_reads = NULL
2004
            tmp_nuc$wp = nuc$wp
2005
            # print(tmp_nuc)
2006
            return(tmp_nuc)
2007
          })
2008
          return(do.call(rbind, bar))
2009
        })
2010
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
2011
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2
2012
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
2013
        tmp_y_prev = tmp_y[-length(tmp_y)]
2014
        tmp_y_next = tmp_y[-1]
2015
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
2016
        tmp_track = unlist(tf_nucs$track)
2017
        tmp_track_prev = tmp_track[-length(tmp_track)]
2018
        tmp_track_next = tmp_track[-1]
2019
        # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
2020
        if (is.null(config$TRACK_LOD_OFFSET)) {
2021
          config$TRACK_LOD_OFFSET = 0
2022
        }
2023
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LOD_OFFSET * 25
2024
        tmp_x_prev = tmp_x[-length(tmp_x)]
2025
        tmp_x_next = tmp_x[-1]
2026
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
2027
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
2028
        tmp_llr_inter =signif(unlist(tf_nucs$llr_score)[-1], 2)
2029
        new_tmp_x = c()
2030
        new_tmp_y = c()
2031
        index_odd = 1:length(tmp_x) * 2 - 1
2032
        index_even = (1:(length(tmp_x) - 1)) * 2
2033
        new_tmp_x[index_odd] = tmp_x
2034
        new_tmp_y[index_odd] = tmp_y
2035
        new_tmp_x[index_even] = tmp_x_inter
2036
        new_tmp_y[index_even] = tmp_y_inter
2037
        lines(new_tmp_x , new_tmp_y, lwd=2)
2038
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
2039
        points(tmp_x, tmp_y, cex=4, lwd=2)
2040
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
2041
        if (is.null(config$LEGEND_LOD_POS)) {
2042
          pos = 2
2043
        } else {
2044
          pos = config$LEGEND_LOD_POS
2045
        }
2046
        col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
2047
        text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) 
2048
        
2049
      }
2050

    
2051
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
2052
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
2053
        strain = samples[[1]]$strain
2054
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo")
2055
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
2056

    
2057
        if (plot_fuzzy_nucs) {
2058
          fuzzy_map = fuzzy_maps[[strain]]
2059
          if (!is.null(fuzzy_map)) {
2060
            if (nrow(fuzzy_map) > 0) {
2061
              rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1)                    
2062
            }
2063
          }
2064
        } 
2065

    
2066
        if (plot_wp_nucs) {
2067
    			for (wp_nuc in wp_nucs) {
2068
    				if (wp_nuc$wp){
2069
    					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2070
    					if (plot_wp_nuc_model) {
2071
      					all_original_reads = c()
2072
      					for(initial_nuc in wp_nuc$nucs) {
2073
      						all_original_reads = c(all_original_reads, initial_nuc$original_reads)
2074
      					}
2075
      					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2076
    					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
2077
    				  }
2078
  				  }
2079
  				}
2080
  			}
2081
      }
2082
		}
2083
	}
2084

    
2085
	if (plot_common_nucs) {
2086
    if (is.null(aligned_inter_strain_nucs)) {
2087
      aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2088
      if (!is.null(aligned_inter_strain_nucs)) {
2089
        aligned_inter_strain_nucs$cur_index = "foo" 
2090
      }
2091
    }
2092

    
2093
    #Plot common wp nucs
2094
    mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2095
    for (replicate_rank in 1:length(replicates)) {
2096
      nb_rank[[replicate_rank]] = length(samples)
2097
      base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2098
      ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2099
      y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2100
      y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2101
      delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2102
      mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2103

    
2104
      samples = replicates[[replicate_rank]]
2105
      for (sample_rank in 1:length(samples)) {
2106
        sample = samples[[sample_rank]]
2107
        y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2108
        if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2109
          x_min[[replicate_rank]] = sample$roi[["begin"]]
2110
          x_max[[replicate_rank]] = sample$roi[["end"]]
2111
        } else {
2112
          x_min[[replicate_rank]] = - sample$roi[["begin"]]
2113
          x_max[[replicate_rank]] = - sample$roi[["end"]]
2114
        }
2115
        shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2116
      }
2117
    }
2118
    print(aligned_inter_strain_nucs)
2119
    if (!is.null(aligned_inter_strain_nucs)) {
2120
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2121
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2122
        tmp_xs = tmp_ys = c()
2123
        for (replicate_rank in 1:length(replicates)) {
2124
          samples = replicates[[replicate_rank]]
2125
          strain = samples[[1]]$strain
2126
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2127
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2128
        }
2129
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]])*8, cex=dev.size()[1]*400/(x_max[[1]]-x_min[[1]]), pch=19)
2130
      }
2131
    }
2132
    
2133
    if (plot_common_unrs) {
2134
      combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain)
2135
      roi = as.list(samples[[1]]$roi)
2136
      cur_index = "foo"
2137
      common_nuc_results = list()
2138
      common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs
2139
      unrs = get_unrs(combi, roi, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) 
2140
      rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], border=4, lwd=10, col=adjustcolor(4, alpha.f = 0.05))        
2141
    }
2142

    
2143
	}
2144
  return(returned_list)
2145
}
2146

    
2147

    
2148

    
2149
get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain.
2150
### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions.
2151
wp_map, ##<< Well positionned nucleosomes map.
2152
roi, ##<< The region of interest.
2153
strain, ##<< The strain we want to extracvt the fuzzy map.
2154
config=NULL ##<< GLOBAL config variable.
2155
) {
2156
  fuzzy_map = wp_map[wp_map$wp==0, ]
2157
  if (nrow(fuzzy_map) > 0) {
2158
    fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,])
2159
    if (!is.null(fuzzy_map)) {
2160
      fuzzy_map = union_regions(fuzzy_map)
2161
      fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config)
2162
    }
2163
  }
2164
  return(fuzzy_map)
2165
}
2166

    
2167

    
2168

    
2169

    
2170

    
2171
get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs).
2172
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline.
2173
combi, ##<< The strain combination to consider.
2174
roi, ##<< The region of interest.
2175
cur_index, ##<< The region of interest index.
2176
wp_maps, ##<< Well positionned nucleosomes maps.
2177
fuzzy_maps, ##<< Fuzzy nucleosomes maps.
2178
common_nuc_results, ##<< Common wp nuc maps
2179
config=NULL ##<< GLOBAL config variable
2180
) {
2181
  # print(cur_index)
2182

    
2183
  tmp_combi_key = paste(combi[1], combi[2], sep="_")
2184
  tmp_common_nucs = common_nuc_results[[tmp_combi_key]]
2185
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$cur_index == cur_index, ]
2186

    
2187
  # print(paste("Dealing with unr from", combi[1]))
2188
  tmp_fuzzy = fuzzy_maps[[combi[1]]]
2189
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2190
  tmp_wp = wp_maps[[combi[1]]]
2191
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2192
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2193
  # Let's go!
2194
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2195
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2196
      return(NULL)
2197
    } else {
2198
      return (index_nuc)
2199
    }
2200
  }))
2201
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2202
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2203
  if (length(tmp_unr) != 0) {
2204
    tmp_unr = union_regions(tmp_unr)
2205
  }
2206
  tmp_unr_nucs_1 = tmp_unr
2207
  if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)}
2208
  agg_unr_1 = tmp_unr_nucs_1
2209

    
2210
  # print(paste("Dealing with unr from ", combi[2]))
2211
  tmp_fuzzy = fuzzy_maps[[combi[2]]]
2212
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2213
  tmp_wp = wp_maps[[combi[2]]]
2214
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2215
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2216
  # Let's go!
2217
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2218
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2219
      return(NULL)
2220
    } else {
2221
      return (index_nuc)
2222
    }
2223
  }))
2224
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2225
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2226
  if (length(tmp_unr) != 0) {
2227
    tmp_unr = union_regions(tmp_unr)
2228
  }
2229
  tmp_unr_nucs_2 = tmp_unr
2230
  if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)}
2231
  agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config)
2232
  tr_agg_unr_2 = translate_regions(agg_unr_2, combi, cur_index, roi=roi, config=config)
2233
  tr_agg_unr_2 = union_regions(tr_agg_unr_2)
2234

    
2235
  # print("Dealing with unr from both...")
2236
  all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2))
2237

    
2238
  # print(paste("Dealing with wp from", combi[1]))
2239
  tmp_wp = wp_maps[[combi[1]]]
2240
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2241
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2242
  # Let's go!
2243
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2244
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2245
      return (index_nuc)
2246
    } else {
2247
      return(NULL)
2248
    }
2249
  }))
2250
  wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2251
  
2252
  # print(paste("Dealing with wp from", combi[2]))
2253
  tmp_wp = wp_maps[[combi[2]]]
2254
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2255
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2256
  # Let's go!
2257
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2258
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2259
      return (index_nuc)
2260
    } else {
2261
      return(NULL)
2262
    }
2263
  }))
2264
  wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2265
  wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config)
2266
  if (nrow(wp_nucs_2) == 0) {
2267
    tr_wp_nucs_2 = wp_nucs_2
2268
  } else {
2269
    tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, cur_index, roi=roi, config=config)      
2270
  }
2271

    
2272
  # print("Dealing with wp from both...")
2273
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
2274

    
2275
  # print("Dealing with unr and wp...")
2276
  non_inter_unr = substract_region(all_unr, all_wp)
2277
  non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config)
2278
  if (is.null(non_inter_unr)) { return(NULL) }
2279
  non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound
2280
  min_unr_width = 75
2281
  non_inter_unr = non_inter_unr[non_inter_unr$len >= min_unr_width,]
2282

    
2283
  non_inter_unr$index_nuc = 1:length(non_inter_unr[,1])
2284
  return (non_inter_unr)
2285
}
2286

    
2287

    
2288