Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ d973538c

Historique | Voir | Annoter | Télécharger (85,82 ko)

1 6b5a528c Florent Chuffart
FDR = structure(function#  False Discovery Rate
2 6b5a528c Florent Chuffart
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
3 55c1cdff Florent Chuffart
##author<< Gael Yvert,
4 6b5a528c Florent Chuffart
(
5 6b5a528c Florent Chuffart
x, ##<< A vector x of independent p-values.
6 6b5a528c Florent Chuffart
FDR ##<< The specified FDR.
7 6b5a528c Florent Chuffart
) {
8 6b5a528c Florent Chuffart
  x <- sort(na.omit(x))
9 6b5a528c Florent Chuffart
  N = length(x)
10 6b5a528c Florent Chuffart
  i = 1;
11 6b5a528c Florent Chuffart
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
12 6b5a528c Florent Chuffart
  if (i == 1)
13 6b5a528c Florent Chuffart
    return (NA)
14 6b5a528c Florent Chuffart
  else
15 6b5a528c Florent Chuffart
    return( x[i-1] )
16 6b5a528c Florent Chuffart
### Return the the corresponding cutoff.
17 6b5a528c Florent Chuffart
}, ex=function(){
18 6b5a528c Florent Chuffart
  print("example")
19 6b5a528c Florent Chuffart
})
20 6b5a528c Florent Chuffart
21 6b5a528c Florent Chuffart
22 6b5a528c Florent Chuffart
23 7e2d37e1 Florent Chuffart
llr_score_nvecs = structure(function # Likelihood ratio
24 7e2d37e1 Florent Chuffart
### Compute the log likelihood ratio of two or more set of value.
25 6b5a528c Florent Chuffart
(
26 7e2d37e1 Florent Chuffart
  xs ##<< list of vectors.
27 6b5a528c Florent Chuffart
) {
28 7e2d37e1 Florent Chuffart
  l = length(xs)
29 7e2d37e1 Florent Chuffart
  if (l < 1) {
30 7e2d37e1 Florent Chuffart
    return(NA)
31 7e2d37e1 Florent Chuffart
  }
32 7e2d37e1 Florent Chuffart
  if (l == 1) {
33 7e2d37e1 Florent Chuffart
    return(1)
34 7e2d37e1 Florent Chuffart
  }
35 7e2d37e1 Florent Chuffart
  sumllX = 0
36 7e2d37e1 Florent Chuffart
  for (i in 1:l) {
37 7e2d37e1 Florent Chuffart
    x = xs[[i]]
38 7e2d37e1 Florent Chuffart
  	if (length(x) <= 1) {
39 7e2d37e1 Florent Chuffart
  		return(NA)
40 7e2d37e1 Florent Chuffart
  	}
41 7e2d37e1 Florent Chuffart
    meanX = mean(x)
42 7e2d37e1 Florent Chuffart
    sdX = sd(x)
43 7e2d37e1 Florent Chuffart
    llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
44 7e2d37e1 Florent Chuffart
    sumllX = sumllX + llX
45 7e2d37e1 Florent Chuffart
  }
46 7e2d37e1 Florent Chuffart
  meanXglo = mean(unlist(xs))
47 7e2d37e1 Florent Chuffart
  sdXglo = sd(unlist(xs))
48 7e2d37e1 Florent Chuffart
  llXYZ = sum(log(dnorm(unlist(xs),mean=meanXglo,sd=sdXglo)))
49 7e2d37e1 Florent Chuffart
  ratio = sumllX - llXYZ
50 6b5a528c Florent Chuffart
  return(ratio)
51 7e2d37e1 Florent Chuffart
### Returns the log likelihood ratio.
52 6b5a528c Florent Chuffart
}, ex=function(){
53 e5603c3f Florent Chuffart
  # LLR score for 2 set of values
54 6b5a528c Florent Chuffart
  mean1=5; sd1=2; card2 = 250
55 6b5a528c Florent Chuffart
  mean2=6; sd2=3; card1 = 200
56 6b5a528c Florent Chuffart
  x1 = rnorm(card1, mean1, sd1)
57 55c1cdff Florent Chuffart
  x2 = rnorm(card2, mean2, sd2)
58 6b5a528c Florent Chuffart
  min = floor(min(c(x1,x2)))
59 6b5a528c Florent Chuffart
  max = ceiling(max(c(x1,x2)))
60 6b5a528c Florent Chuffart
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
61 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
62 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
63 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
64 7e2d37e1 Florent Chuffart
  llr_score_nvecs(list(x1,x2))
65 6b5a528c Florent Chuffart
 })
66 6b5a528c Florent Chuffart
67 6b5a528c Florent Chuffart
dfadd = structure(function# Adding list to a dataframe.
68 6b5a528c Florent Chuffart
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
69 6b5a528c Florent Chuffart
	(df, ##<<  A dataframe
70 6b5a528c Florent Chuffart
		l ##<<  A list
71 6b5a528c Florent Chuffart
	) {
72 6b5a528c Florent Chuffart
  if (is.null(df)) {
73 6b5a528c Florent Chuffart
    df = data.frame(l,stringsAsFactors=FALSE)
74 6b5a528c Florent Chuffart
  } else {
75 6b5a528c Florent Chuffart
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
76 6b5a528c Florent Chuffart
  }
77 6b5a528c Florent Chuffart
  return(df)
78 6b5a528c Florent Chuffart
### Return the dataframe \emph{df}.
79 6b5a528c Florent Chuffart
}, ex=function(){
80 6b5a528c Florent Chuffart
		## Here dataframe is NULL
81 6b5a528c Florent Chuffart
		print(df)
82 6b5a528c Florent Chuffart
		df = NULL
83 5bfac5a3 Florent Chuffart
84 6b5a528c Florent Chuffart
		# Initialize df
85 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
86 6b5a528c Florent Chuffart
		print(df)
87 5bfac5a3 Florent Chuffart
88 6b5a528c Florent Chuffart
		# Adding elements to df
89 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
90 6b5a528c Florent Chuffart
		print(df)
91 6b5a528c Florent Chuffart
})
92 6b5a528c Florent Chuffart
93 6b5a528c Florent Chuffart
94 6b5a528c Florent Chuffart
sign_from_strand = function(
95 55c1cdff Florent Chuffart
### Get the sign of strand
96 6b5a528c Florent Chuffart
strands) {
97 6b5a528c Florent Chuffart
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
98 6b5a528c Florent Chuffart
### If strand in forward then returns 1 else returns -1
99 6b5a528c Florent Chuffart
}
100 6b5a528c Florent Chuffart
101 6b5a528c Florent Chuffart
flat_reads = function(
102 6b5a528c Florent Chuffart
### Extract reads coordinates from TempleteFilter input sequence
103 55c1cdff Florent Chuffart
reads, ##<< TemplateFilter input reads
104 6b5a528c Florent Chuffart
nuc_width ##<< Width used to shift F and R reads.
105 6b5a528c Florent Chuffart
) {
106 6b5a528c Florent Chuffart
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
107 6b5a528c Florent Chuffart
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
108 6b5a528c Florent Chuffart
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
109 6b5a528c Florent Chuffart
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
110 6b5a528c Florent Chuffart
### Returns a list of F reads, R reads and joint/shifted F and R reads.
111 6b5a528c Florent Chuffart
}
112 6b5a528c Florent Chuffart
113 6b5a528c Florent Chuffart
114 6b5a528c Florent Chuffart
115 6b5a528c Florent Chuffart
filter_tf_outputs = function(# Filter TemplateFilter outputs
116 ec2936ea Florent Chuffart
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlapping threshold.
117 6b5a528c Florent Chuffart
tf_outputs, ##<< TemplateFilter outputs.
118 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
119 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
120 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
121 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
122 6b5a528c Florent Chuffart
ol_bp = 59, ##<< Overlap Threshold.
123 6b5a528c Florent Chuffart
corr_thres = 0.5 ##<< Correlation threshold.
124 6b5a528c Florent Chuffart
) {
125 6b5a528c Florent Chuffart
  if (x_min < 0) {
126 0d5fbf02 Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= -x_max & tf_outputs$center + tf_outputs$width/2 <=  -x_min,]
127 6b5a528c Florent Chuffart
	} else {
128 0d5fbf02 Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= x_min & tf_outputs$center + tf_outputs$width/2 <= x_max,]
129 6b5a528c Florent Chuffart
  }
130 6b5a528c Florent Chuffart
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
131 6b5a528c Florent Chuffart
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
132 7e2d37e1 Florent Chuffart
  tf_outputs = tf_outputs[tf_outputs$correlation.score >= corr_thres,]
133 7e2d37e1 Florent Chuffart
  tf_outputs = tf_outputs[order(tf_outputs$correlation.score, decreasing=TRUE),]
134 6b5a528c Florent Chuffart
  i = 1
135 55c1cdff Florent Chuffart
  while (i <= length(tf_outputs[,1])) {
136 7e2d37e1 Florent Chuffart
    lb = tf_outputs[i,]$lower_bound
137 7e2d37e1 Florent Chuffart
    ub = tf_outputs[i,]$upper_bound
138 7e2d37e1 Florent Chuffart
    tf_outputs = tf_outputs[!(tf_outputs$lower_bound <= (ub-ol_bp) & tf_outputs$upper_bound > ub) & !(tf_outputs$upper_bound >= (lb+ol_bp) & tf_outputs$lower_bound < lb),]
139 6b5a528c Florent Chuffart
    i = i+1
140 6b5a528c Florent Chuffart
  }
141 6b5a528c Florent Chuffart
  return(tf_outputs)
142 6b5a528c Florent Chuffart
### Returns filtered TemplateFilter Outputs
143 6b5a528c Florent Chuffart
}
144 6b5a528c Florent Chuffart
145 6b5a528c Florent Chuffart
146 6b5a528c Florent Chuffart
147 6b5a528c Florent Chuffart
filter_tf_inputs = function(# Filter TemplateFilter inputs
148 6b5a528c Florent Chuffart
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
149 6b5a528c Florent Chuffart
inputs, ##<< TF inputs to be filtered.
150 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
151 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
152 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
153 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
154 6b5a528c Florent Chuffart
only_f = FALSE, ##<< Filter only F reads.
155 b8a95426 Florent Chuffart
only_r = FALSE, ##<< Filter only R reads.
156 d973538c Florent Chuffart
filter_for_coverage = FALSE, ##<< Does it filter for plot coverage?
157 d973538c Florent Chuffart
USE_DPLYR = TRUE ##<< Use dplyr lib to filter reads.
158 6b5a528c Florent Chuffart
) {
159 d973538c Florent Chuffart
  n = names(inputs)
160 d973538c Florent Chuffart
161 d973538c Florent Chuffart
  if (!USE_DPLYR) {  
162 d973538c Florent Chuffart
    if (only_f) {
163 d973538c Florent Chuffart
      inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
164 d973538c Florent Chuffart
    } else if (only_r) {
165 d973538c Florent Chuffart
      inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
166 d973538c Florent Chuffart
    } else {
167 d973538c Florent Chuffart
      inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
168 d973538c Florent Chuffart
    }
169 d973538c Florent Chuffart
  } else {
170 d973538c Florent Chuffart
    names(inputs) = c("chr", "pos", "str", "lev")
171 d973538c Florent Chuffart
    if (only_f) {
172 d973538c Florent Chuffart
      inputs_out = filter(inputs, chr == chr,  pos >= x_min - nuc_width, str == "F", pos <= x_max + nuc_width)
173 d973538c Florent Chuffart
    } else if (only_r) {
174 d973538c Florent Chuffart
      inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, str == "R" & pos <= x_max + nuc_width)
175 d973538c Florent Chuffart
    } else {
176 d973538c Florent Chuffart
      inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, pos <= x_max + nuc_width)
177 d973538c Florent Chuffart
    }
178 d973538c Florent Chuffart
      # if (!filter_for_coverage) {
179 d973538c Florent Chuffart
      #   inputs$corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
180 d973538c Florent Chuffart
      #   inputs = filter(inputs, chr == chr, corrected_inputs_coords >= x_min, corrected_inputs_coords <= x_max)
181 d973538c Florent Chuffart
      #   inputs$corrected_inputs_coords = NULL
182 d973538c Florent Chuffart
      # }  
183 d973538c Florent Chuffart
  }
184 d973538c Florent Chuffart
185 b8a95426 Florent Chuffart
  if (!filter_for_coverage) {
186 d973538c Florent Chuffart
    corrected_inputs_coords = inputs_out[,2] + nuc_width/2 * sign_from_strand(inputs_out[,3])
187 d973538c Florent Chuffart
    inputs_out = inputs_out[inputs_out[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
188 b8a95426 Florent Chuffart
  }
189 d973538c Florent Chuffart
190 d973538c Florent Chuffart
  names(inputs_out) = n 
191 d973538c Florent Chuffart
	return(inputs_out)
192 6b5a528c Florent Chuffart
### Returns filtred inputs.
193 6b5a528c Florent Chuffart
}
194 6b5a528c Florent Chuffart
195 6b5a528c Florent Chuffart
196 6b5a528c Florent Chuffart
197 6b5a528c Florent Chuffart
get_comp_strand = function(
198 6b5a528c Florent Chuffart
### Compute the complementatry strand.
199 6b5a528c Florent Chuffart
strand ##<< The original strand.
200 6b5a528c Florent Chuffart
) {
201 6b5a528c Florent Chuffart
	apply(t(strand),2, function(n){
202 55c1cdff Florent Chuffart
	  if (n=="a") {return("t")}
203 6b5a528c Florent Chuffart
		if (n=="t") {return("a")}
204 55c1cdff Florent Chuffart
		if (n=="c") {return("g")}
205 55c1cdff Florent Chuffart
		if (n=="g") {return("c")}
206 6b5a528c Florent Chuffart
	})
207 6b5a528c Florent Chuffart
### Returns the complementatry strand.
208 6b5a528c Florent Chuffart
}
209 6b5a528c Florent Chuffart
210 6b5a528c Florent Chuffart
211 55c1cdff Florent Chuffart
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
212 5badc2fd Florent Chuffart
### This function aggregates nucleosomes from replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center (dyad). A chain of nucleosomes is builts across all replicates. Adjacent nucleosomes of the chain are compared two by two. Comparison is based on a log likelihood ratio (LLR1). depending on the LLR1 value nucleosomes are merged (low LLR) or separated (high LLR). Finally the function returns a list of clusters and all computed llr_scores. Each cluster ows an attribute wp for "well positioned". This attribute is set to TRUE if the cluster is composed of exactly one nucleosome of each sample.
213 6b5a528c Florent Chuffart
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
214 e5603c3f Florent Chuffart
llr_thres=20, ##<< Log likelihood ratio threshold to decide between merging and separating
215 6b5a528c Florent Chuffart
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
216 6b5a528c Florent Chuffart
){
217 6b5a528c Florent Chuffart
	end_of_tracks = function(tracks) {
218 6b5a528c Florent Chuffart
		if (length(tracks) == 0) {
219 6b5a528c Florent Chuffart
			return(TRUE)
220 6b5a528c Florent Chuffart
		}
221 6b5a528c Florent Chuffart
	  for (lower_bound in tracks) {
222 6b5a528c Florent Chuffart
			if(!is.na(lower_bound)) {
223 6b5a528c Florent Chuffart
	      if (lower_bound < coord_max) {
224 6b5a528c Florent Chuffart
	        return(FALSE)
225 6b5a528c Florent Chuffart
	      }
226 6b5a528c Florent Chuffart
	  	}
227 6b5a528c Florent Chuffart
	  }
228 6b5a528c Florent Chuffart
	  return(TRUE)
229 6b5a528c Florent Chuffart
	}
230 d973538c Florent Chuffart
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center) {
231 6b5a528c Florent Chuffart
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
232 6b5a528c Florent Chuffart
			new_cluster$wp = TRUE
233 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
234 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
235 6b5a528c Florent Chuffart
		  	clusters[[length(clusters) + 1]] = new_cluster
236 6b5a528c Florent Chuffart
				# print(new_cluster)
237 6b5a528c Florent Chuffart
		  }
238 6b5a528c Florent Chuffart
		} else {
239 6b5a528c Florent Chuffart
			new_cluster$wp = FALSE
240 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
241 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
242 6b5a528c Florent Chuffart
			  clusters[[length(clusters) + 1]] = new_cluster
243 6b5a528c Florent Chuffart
			}
244 6b5a528c Florent Chuffart
		}
245 6b5a528c Florent Chuffart
		return(clusters)
246 6b5a528c Florent Chuffart
	}
247 6b5a528c Florent Chuffart
	strain = samples[[1]]$strain
248 7e2d37e1 Florent Chuffart
	llr_scores = c()
249 6b5a528c Florent Chuffart
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
250 6b5a528c Florent Chuffart
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
251 6b5a528c Florent Chuffart
  # compute clusters
252 6b5a528c Florent Chuffart
  clusters = list()
253 6b5a528c Florent Chuffart
  cluster_contents = list()
254 6b5a528c Florent Chuffart
  # Init reader
255 6b5a528c Florent Chuffart
  indexes = c()
256 6b5a528c Florent Chuffart
  track_readers = c()
257 6b5a528c Florent Chuffart
  current_nuc = NULL
258 7e2d37e1 Florent Chuffart
	llr_score = llr_thres + 1
259 55c1cdff Florent Chuffart
  # Read nucs from TF outputs
260 6b5a528c Florent Chuffart
  tf_outs = list()
261 6b5a528c Florent Chuffart
	i = 1
262 6b5a528c Florent Chuffart
  for (sample in samples) {
263 6b5a528c Florent Chuffart
		tf_outs[[i]] = sample$outputs
264 55c1cdff Florent Chuffart
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]
265 6b5a528c Florent Chuffart
    indexes[i] = 1
266 6b5a528c Florent Chuffart
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
267 5bfac5a3 Florent Chuffart
      track_readers[i] = coord_max
268 6b5a528c Florent Chuffart
	  } else {
269 5bfac5a3 Florent Chuffart
      track_readers[i] = tf_outs[[i]][indexes[i],]$center
270 6b5a528c Florent Chuffart
		}
271 5bfac5a3 Florent Chuffart
		i = i+1
272 6b5a528c Florent Chuffart
  }
273 d973538c Florent Chuffart
  nb_tracks = length(tf_outs)
274 6b5a528c Florent Chuffart
	# print(track_readers)
275 6b5a528c Florent Chuffart
  new_cluster = NULL
276 6b5a528c Florent Chuffart
  nb_nucs_in_cluster = 0
277 6b5a528c Florent Chuffart
  nuc_from_track = c()
278 d973538c Florent Chuffart
  for (i in 1:nb_tracks){
279 6b5a528c Florent Chuffart
    nuc_from_track[i] = FALSE
280 6b5a528c Florent Chuffart
  }
281 6b5a528c Florent Chuffart
  # Start clustering
282 6b5a528c Florent Chuffart
  while (!end_of_tracks(track_readers)) {
283 6b5a528c Florent Chuffart
    new_center = min(track_readers)
284 5bfac5a3 Florent Chuffart
		current_track = which(track_readers == new_center)[1]
285 6b5a528c Florent Chuffart
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
286 6b5a528c Florent Chuffart
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
287 6b5a528c Florent Chuffart
		new_nuc$inputs = samples[[current_track]]$inputs
288 6b5a528c Florent Chuffart
		new_nuc$chr = samples[[current_track]]$roi$chr
289 6b5a528c Florent Chuffart
		new_nuc$track = current_track
290 5bfac5a3 Florent Chuffart
291 55c1cdff Florent Chuffart
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width)
292 6b5a528c Florent Chuffart
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
293 6b5a528c Florent Chuffart
		new_nuc$original_reads = flatted_reads[[3]]
294 6b5a528c Florent Chuffart
295 6b5a528c Florent Chuffart
    new_upper_bound = new_nuc$upper_bound
296 6b5a528c Florent Chuffart
297 6b5a528c Florent Chuffart
    if (!is.null(current_nuc)) {
298 7e2d37e1 Florent Chuffart
			llr_score = llr_score_nvecs(list(current_nuc$original_reads,new_nuc$original_reads))
299 7e2d37e1 Florent Chuffart
			llr_scores = c(llr_scores,llr_score)
300 6b5a528c Florent Chuffart
		}
301 7e2d37e1 Florent Chuffart
		# print(paste(llr_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
302 7e2d37e1 Florent Chuffart
		if (is.na(llr_score)) {
303 7e2d37e1 Florent Chuffart
			llr_score = llr_thres + 1
304 6b5a528c Florent Chuffart
		}
305 7e2d37e1 Florent Chuffart
		# Store llr_score
306 7e2d37e1 Florent Chuffart
		new_nuc$llr_score = llr_score
307 7e2d37e1 Florent Chuffart
	  if (llr_score < llr_thres) {
308 6b5a528c Florent Chuffart
      # aggregate to current cluster
309 6b5a528c Florent Chuffart
      #   update bound
310 6b5a528c Florent Chuffart
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
311 55c1cdff Florent Chuffart
        new_cluster$upper_bound = new_nuc$upper_bound
312 6b5a528c Florent Chuffart
      }
313 6b5a528c Florent Chuffart
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
314 55c1cdff Florent Chuffart
        new_cluster$lower_bound = new_nuc$lower_bound
315 6b5a528c Florent Chuffart
      }
316 6b5a528c Florent Chuffart
      #   add nucleosome to current cluster
317 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
318 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
319 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
320 6b5a528c Florent Chuffart
    } else {
321 6b5a528c Florent Chuffart
			if (!is.null(new_cluster)) {
322 6b5a528c Florent Chuffart
        # store old cluster
323 d973538c Florent Chuffart
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
324 6b5a528c Florent Chuffart
			}
325 6b5a528c Florent Chuffart
      # Reinit current cluster composition stuff
326 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = 0
327 6b5a528c Florent Chuffart
      nuc_from_track = c()
328 d973538c Florent Chuffart
      for (i in 1:nb_tracks){
329 6b5a528c Florent Chuffart
        nuc_from_track[i] = FALSE
330 6b5a528c Florent Chuffart
      }
331 55c1cdff Florent Chuffart
      # create new cluster
332 6b5a528c Florent Chuffart
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
333 6b5a528c Florent Chuffart
      # update upper bound
334 6b5a528c Florent Chuffart
      current_upper_bound = new_upper_bound
335 6b5a528c Florent Chuffart
      # add nucleosome to current cluster
336 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
337 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
338 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
339 5bfac5a3 Florent Chuffart
340 6b5a528c Florent Chuffart
		}
341 5bfac5a3 Florent Chuffart
342 6b5a528c Florent Chuffart
		current_nuc = new_nuc
343 6b5a528c Florent Chuffart
344 6b5a528c Florent Chuffart
    # update indexes
345 55c1cdff Florent Chuffart
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {
346 6b5a528c Florent Chuffart
      indexes[current_track] = indexes[current_track] + 1
347 6b5a528c Florent Chuffart
      # update track
348 6b5a528c Florent Chuffart
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
349 6b5a528c Florent Chuffart
    } else {
350 6b5a528c Florent Chuffart
      # update track
351 6b5a528c Florent Chuffart
      track_readers[current_track] = coord_max
352 6b5a528c Florent Chuffart
    }
353 6b5a528c Florent Chuffart
  }
354 6b5a528c Florent Chuffart
  # store last cluster
355 6b5a528c Florent Chuffart
  if (!is.null(new_cluster)) {
356 6b5a528c Florent Chuffart
    # store old cluster
357 d973538c Florent Chuffart
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
358 6b5a528c Florent Chuffart
  }
359 7e2d37e1 Florent Chuffart
	return(list(clusters, llr_scores))
360 7e2d37e1 Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed llr scores.
361 5bfac5a3 Florent Chuffart
}, ex=function(){
362 6b5a528c Florent Chuffart
	# Dealing with a region of interest
363 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
364 6b5a528c Florent Chuffart
	samples = list()
365 6b5a528c Florent Chuffart
	for (i in 1:3) {
366 6b5a528c Florent Chuffart
		# Create TF output
367 6b5a528c Florent Chuffart
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
368 6b5a528c Florent Chuffart
		outputs = dfadd(NULL,tf_nuc)
369 6b5a528c Florent Chuffart
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
370 6b5a528c Florent Chuffart
		# Generate corresponding reads
371 6b5a528c Florent Chuffart
		nb_reads = round(runif(1,170,230))
372 6b5a528c Florent Chuffart
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
373 6b5a528c Florent Chuffart
		u_reads = sort(unique(reads))
374 6b5a528c Florent Chuffart
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
375 6b5a528c Florent Chuffart
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
376 6b5a528c Florent Chuffart
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
377 6b5a528c Florent Chuffart
		u_reads = u_reads + shifts
378 55c1cdff Florent Chuffart
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
379 55c1cdff Florent Chuffart
		                         "V2" = u_reads,
380 55c1cdff Florent Chuffart
														 "V3" = strands,
381 6b5a528c Florent Chuffart
														 "V4" = counts), stringsAsFactors=FALSE)
382 6b5a528c Florent Chuffart
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
383 6b5a528c Florent Chuffart
	}
384 6b5a528c Florent Chuffart
	print(aggregate_intra_strain_nucs(samples))
385 6b5a528c Florent Chuffart
})
386 6b5a528c Florent Chuffart
387 7646593d Florent Chuffart
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
388 7646593d Florent Chuffart
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
389 7646593d Florent Chuffart
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
390 d973538c Florent Chuffart
cur_index, ##<< the index of the roi involved
391 d973538c Florent Chuffart
nb_tracks=3 ##<< the number of replicates
392 7646593d Florent Chuffart
) {
393 7646593d Florent Chuffart
	if  (length(partial_strain_maps) == 0 ){
394 4016229d Florent Chuffart
		print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." ))
395 55c1cdff Florent Chuffart
    tmp_strain_maps = list()
396 7646593d Florent Chuffart
	} else {
397 7646593d Florent Chuffart
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
398 7646593d Florent Chuffart
			tmp_nuc = partial_strain_maps[[i]]
399 7646593d Florent Chuffart
			tmp_nuc_as_list = list()
400 7646593d Florent Chuffart
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
401 7646593d Florent Chuffart
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
402 7646593d Florent Chuffart
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
403 4016229d Florent Chuffart
			tmp_nuc_as_list[["cur_index"]] = cur_index
404 7646593d Florent Chuffart
			tmp_nuc_as_list[["index_nuc"]] = i
405 7646593d Florent Chuffart
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
406 7646593d Florent Chuffart
			all_original_reads = c()
407 7646593d Florent Chuffart
			for (j in 1:length(tmp_nuc$nucs)) {
408 7646593d Florent Chuffart
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
409 7646593d Florent Chuffart
			}
410 7646593d Florent Chuffart
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
411 0d5fbf02 Florent Chuffart
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
412 7646593d Florent Chuffart
			if (tmp_nuc$wp) {
413 d973538c Florent Chuffart
        for (i in 1:(nb_tracks-1)) {
414 d973538c Florent Chuffart
  				tmp_nuc_as_list[[paste("llr", i, sep="_")]] = signif(tmp_nuc$nucs[[i + 1]]$llr_score,5)          
415 d973538c Florent Chuffart
        }
416 7646593d Florent Chuffart
			} else {
417 d973538c Florent Chuffart
        for (i in 1:(nb_tracks-1)) {
418 d973538c Florent Chuffart
  				tmp_nuc_as_list[[paste("llr", i, sep="_")]] = NA
419 d973538c Florent Chuffart
        }
420 7646593d Florent Chuffart
			}
421 7646593d Florent Chuffart
      return(tmp_nuc_as_list)
422 7646593d Florent Chuffart
    })
423 7646593d Florent Chuffart
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
424 7646593d Florent Chuffart
	}
425 21b8928f Florent Chuffart
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
426 7646593d Florent Chuffart
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
427 7646593d Florent Chuffart
}
428 6b5a528c Florent Chuffart
429 6b5a528c Florent Chuffart
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
430 e5603c3f Florent Chuffart
### This function aligns nucleosomes between two strains for a given genome region.
431 55c1cdff Florent Chuffart
replicates, ##<< Set of replicates, ideally 3 per strain.
432 e5603c3f Florent Chuffart
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's NULL this list will be computed.
433 e5603c3f Florent Chuffart
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's NULL this list will be computed.
434 6b5a528c Florent Chuffart
corr_thres=0.5, ##<< Correlation threshold.
435 e5603c3f Florent Chuffart
llr_thres=100, ##<< Log likelihood ratio threshold to decide between merging and separating
436 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
437 5bfac5a3 Florent Chuffart
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
438 6b5a528c Florent Chuffart
) {
439 6b5a528c Florent Chuffart
	if (length(replicates) < 2) {
440 6b5a528c Florent Chuffart
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
441 6b5a528c Florent Chuffart
	} else if (length(replicates) > 2) {
442 5bfac5a3 Florent Chuffart
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
443 6b5a528c Florent Chuffart
	}
444 6b5a528c Florent Chuffart
	common_nuc = NULL
445 7e2d37e1 Florent Chuffart
	llr_scores = c()
446 6b5a528c Florent Chuffart
	chr = replicates[[1]][[1]]$roi$chr
447 6b5a528c Florent Chuffart
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
448 6b5a528c Florent Chuffart
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
449 6b5a528c Florent Chuffart
	strain_ref1 = replicates[[1]][[1]]$strain
450 6b5a528c Florent Chuffart
	strain_ref2 = replicates[[2]][[1]]$strain
451 4016229d Florent Chuffart
	big_cur = replicates[[1]][[1]]$roi
452 4016229d Florent Chuffart
  orig_big_cur = replicates[[1]][[1]]$orig_roi
453 4016229d Florent Chuffart
	if(big_cur$end - big_cur$begin < 0) {
454 4016229d Florent Chuffart
		tmp_begin = big_cur$begin
455 4016229d Florent Chuffart
		big_cur$begin =  big_cur$end
456 4016229d Florent Chuffart
		big_cur$end =  tmp_begin
457 6b5a528c Florent Chuffart
	}
458 6b5a528c Florent Chuffart
	# GO!
459 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref1)) {
460 6b5a528c Florent Chuffart
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
461 6b5a528c Florent Chuffart
	}
462 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref2)) {
463 6b5a528c Florent Chuffart
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
464 6b5a528c Florent Chuffart
  }
465 6b5a528c Florent Chuffart
	lws = c()
466 6b5a528c Florent Chuffart
	ups = c()
467 6b5a528c Florent Chuffart
	for (na in wp_nucs_strain_ref2) {
468 6b5a528c Florent Chuffart
		lws = c(lws, na$lower_bound)
469 6b5a528c Florent Chuffart
		ups = c(ups, na$upper_bound)
470 6b5a528c Florent Chuffart
	}
471 6b5a528c Florent Chuffart
472 5bfac5a3 Florent Chuffart
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))
473 6b5a528c Florent Chuffart
	roi_strain_ref1 = NULL
474 6b5a528c Florent Chuffart
	roi_strain_ref2 = NULL
475 6b5a528c Florent Chuffart
	if (length(wp_nucs_strain_ref1) > 0) {
476 6b5a528c Florent Chuffart
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
477 6b5a528c Florent Chuffart
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
478 6b5a528c Florent Chuffart
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
479 6b5a528c Florent Chuffart
			# Filtering on Well Positionned
480 883439d6 Florent Chuffart
			if (nuc_strain_ref1$wp) {
481 6b5a528c Florent Chuffart
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
482 4016229d Florent Chuffart
				roi_strain_ref2 = translate_cur(roi_strain_ref1, strain_ref2, big_cur=orig_big_cur, config=config)
483 1d833b97 Florent Chuffart
        if (!is.null(roi_strain_ref2)){
484 6b5a528c Florent Chuffart
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
485 6b5a528c Florent Chuffart
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
486 6b5a528c Florent Chuffart
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
487 6b5a528c Florent Chuffart
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
488 6b5a528c Florent Chuffart
					for (index_nuc_strain_ref2 in matching_nas) {
489 6b5a528c Florent Chuffart
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
490 6b5a528c Florent Chuffart
						# Filtering on Well Positionned
491 55c1cdff Florent Chuffart
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)
492 4016229d Florent Chuffart
						if (!is.null(translate_cur(nuc_strain_ref2_to_roi, strain_ref1, big_cur=orig_big_cur, config=config)) &
493 1e34bb1f Florent Chuffart
                nuc_strain_ref2$wp) {
494 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score and collecting reads
495 6b5a528c Florent Chuffart
							SKIP = FALSE
496 6b5a528c Florent Chuffart
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
497 6b5a528c Florent Chuffart
							reads_strain_ref1 = c()
498 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref1$nucs){
499 5bfac5a3 Florent Chuffart
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)
500 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
501 6b5a528c Florent Chuffart
									SKIP = TRUE
502 6b5a528c Florent Chuffart
								}
503 6b5a528c Florent Chuffart
							}
504 6b5a528c Florent Chuffart
							reads_strain_ref2 = c()
505 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref2$nucs){
506 5bfac5a3 Florent Chuffart
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)
507 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
508 6b5a528c Florent Chuffart
									SKIP = TRUE
509 6b5a528c Florent Chuffart
								}
510 6b5a528c Florent Chuffart
							}
511 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score
512 6b5a528c Florent Chuffart
							if (!SKIP) {
513 6b5a528c Florent Chuffart
								# tranlation of reads into strain 2 coords
514 6b5a528c Florent Chuffart
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
515 6b5a528c Florent Chuffart
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
516 7e2d37e1 Florent Chuffart
								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
517 7e2d37e1 Florent Chuffart
								llr_scores = c(llr_scores, llr_score)
518 e5603c3f Florent Chuffart
								# Filtering on LLR Score
519 67157457 Florent Chuffart
                if (llr_score < llr_thres) {
520 6b5a528c Florent Chuffart
									tmp_nuc = list()
521 6b5a528c Florent Chuffart
									# strain_ref1
522 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
523 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
524 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
525 6b5a528c Florent Chuffart
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
526 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
527 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
528 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
529 6b5a528c Florent Chuffart
									# strain_ref2
530 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
531 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
532 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
533 6b5a528c Florent Chuffart
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
534 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
535 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
536 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
537 6b5a528c Florent Chuffart
									# common
538 7e2d37e1 Florent Chuffart
									tmp_nuc[["llr_score"]] = signif(llr_score,5)
539 dadb6a4d Florent Chuffart
                  tmp_nuc[["common_wp_pval"]] = signif(1-pchisq(2*tmp_nuc[["llr_score"]], df=2),5)
540 dadb6a4d Florent Chuffart
									tmp_nuc[["diff"]] = abs(abs((roi_strain_ref2$begin - roi_strain_ref2$end)) / 2 - abs((nuc_strain_ref2$lower_bound - nuc_strain_ref2$upper_bound)) / 2)
541 6b5a528c Florent Chuffart
									common_nuc = dfadd(common_nuc, tmp_nuc)
542 ec2936ea Florent Chuffart
                }
543 6b5a528c Florent Chuffart
							}
544 6b5a528c Florent Chuffart
						}
545 5bfac5a3 Florent Chuffart
					}
546 6b5a528c Florent Chuffart
				} else {
547 6b5a528c Florent Chuffart
		      print("WARNING! No roi for strain ref 2.")
548 6b5a528c Florent Chuffart
			  }
549 6b5a528c Florent Chuffart
		  }
550 6b5a528c Florent Chuffart
		}
551 5bfac5a3 Florent Chuffart
552 67157457 Florent Chuffart
    if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
553 67157457 Florent Chuffart
      index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
554 67157457 Florent Chuffart
      to_remove_list = c()
555 67157457 Florent Chuffart
      for (i in 1:length(index_redundant)) {
556 67157457 Florent Chuffart
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
557 67157457 Florent Chuffart
          to_remove = index_redundant[i]
558 67157457 Florent Chuffart
        }   else {
559 67157457 Florent Chuffart
          to_remove = index_redundant[i] + 1
560 67157457 Florent Chuffart
        }
561 67157457 Florent Chuffart
        to_remove_list = c(to_remove_list, to_remove)
562 67157457 Florent Chuffart
      }
563 67157457 Florent Chuffart
      common_nuc = common_nuc[-to_remove_list,]
564 67157457 Florent Chuffart
    }
565 5bfac5a3 Florent Chuffart
566 67157457 Florent Chuffart
    if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
567 67157457 Florent Chuffart
      index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
568 67157457 Florent Chuffart
      to_remove_list = c()
569 67157457 Florent Chuffart
      for (i in 1:length(index_redundant)) {
570 67157457 Florent Chuffart
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
571 67157457 Florent Chuffart
          to_remove = index_redundant[i]
572 67157457 Florent Chuffart
        }   else {
573 67157457 Florent Chuffart
          to_remove = index_redundant[i] + 1
574 67157457 Florent Chuffart
        }
575 67157457 Florent Chuffart
        to_remove_list = c(to_remove_list, to_remove)
576 67157457 Florent Chuffart
      }
577 67157457 Florent Chuffart
      common_nuc = common_nuc[-to_remove_list,]
578 67157457 Florent Chuffart
    }
579 5bfac5a3 Florent Chuffart
580 7e2d37e1 Florent Chuffart
		return(list(common_nuc, llr_scores))
581 6b5a528c Florent Chuffart
	} else {
582 6b5a528c Florent Chuffart
		print("WARNING, no nucs for strain_ref1.")
583 6b5a528c Florent Chuffart
		return(NULL)
584 6b5a528c Florent Chuffart
	}
585 7e2d37e1 Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed llr scores.
586 5bfac5a3 Florent Chuffart
}, ex=function(){
587 1d833b97 Florent Chuffart
588 4016229d Florent Chuffart
    # Define new translate_cur function...
589 4016229d Florent Chuffart
    translate_cur = function(roi, strain2, big_cur=NULL, config=NULL) {
590 1d833b97 Florent Chuffart
      return(roi)
591 1d833b97 Florent Chuffart
    }
592 1d833b97 Florent Chuffart
    # Binding it by uncomment follwing lines.
593 4016229d Florent Chuffart
    unlockBinding("translate_cur", as.environment("package:nucleominer"))
594 4016229d Florent Chuffart
    unlockBinding("translate_cur", getNamespace("nucleominer"))
595 4016229d Florent Chuffart
    assign("translate_cur", translate_cur, "package:nucleominer")
596 4016229d Florent Chuffart
    assign("translate_cur", translate_cur, getNamespace("nucleominer"))
597 4016229d Florent Chuffart
    lockBinding("translate_cur", getNamespace("nucleominer"))
598 4016229d Florent Chuffart
    lockBinding("translate_cur", as.environment("package:nucleominer"))
599 1d833b97 Florent Chuffart
600 6b5a528c Florent Chuffart
	# Dealing with a region of interest
601 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
602 4016229d Florent Chuffart
	roi2 = translate_cur(roi, roi$strain_ref1)
603 6b5a528c Florent Chuffart
	replicates = list()
604 6b5a528c Florent Chuffart
	for (j in 1:2) {
605 6b5a528c Florent Chuffart
		samples = list()
606 6b5a528c Florent Chuffart
		for (i in 1:3) {
607 6b5a528c Florent Chuffart
			# Create TF output
608 6b5a528c Florent Chuffart
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
609 6b5a528c Florent Chuffart
			outputs = dfadd(NULL,tf_nuc)
610 6b5a528c Florent Chuffart
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
611 6b5a528c Florent Chuffart
			# Generate corresponding reads
612 6b5a528c Florent Chuffart
			nb_reads = round(runif(1,170,230))
613 6b5a528c Florent Chuffart
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
614 6b5a528c Florent Chuffart
			u_reads = sort(unique(reads))
615 6b5a528c Florent Chuffart
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
616 6b5a528c Florent Chuffart
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
617 6b5a528c Florent Chuffart
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
618 6b5a528c Florent Chuffart
			u_reads = u_reads + shifts
619 55c1cdff Florent Chuffart
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
620 55c1cdff Florent Chuffart
			                         "V2" = u_reads,
621 55c1cdff Florent Chuffart
															 "V3" = strands,
622 6b5a528c Florent Chuffart
															 "V4" = counts), stringsAsFactors=FALSE)
623 6b5a528c Florent Chuffart
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
624 6b5a528c Florent Chuffart
		}
625 6b5a528c Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
626 6b5a528c Florent Chuffart
	}
627 6b5a528c Florent Chuffart
	print(align_inter_strain_nucs(replicates))
628 6b5a528c Florent Chuffart
})
629 6b5a528c Florent Chuffart
630 6b5a528c Florent Chuffart
631 6b5a528c Florent Chuffart
632 6b5a528c Florent Chuffart
633 6b5a528c Florent Chuffart
634 6b5a528c Florent Chuffart
635 6b5a528c Florent Chuffart
636 6b5a528c Florent Chuffart
637 6b5a528c Florent Chuffart
638 6b5a528c Florent Chuffart
639 6b5a528c Florent Chuffart
640 6b5a528c Florent Chuffart
641 6b5a528c Florent Chuffart
642 6b5a528c Florent Chuffart
643 6b5a528c Florent Chuffart
644 6b5a528c Florent Chuffart
645 6b5a528c Florent Chuffart
646 6b5a528c Florent Chuffart
647 6b5a528c Florent Chuffart
648 6b5a528c Florent Chuffart
649 6b5a528c Florent Chuffart
650 6b5a528c Florent Chuffart
651 6b5a528c Florent Chuffart
652 6b5a528c Florent Chuffart
653 6b5a528c Florent Chuffart
654 6b5a528c Florent Chuffart
655 6b5a528c Florent Chuffart
fetch_mnase_replicates = function(# Prefetch data
656 6b5a528c Florent Chuffart
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
657 6b5a528c Florent Chuffart
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
658 6b5a528c Florent Chuffart
roi, ##<< Region of interest.
659 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
660 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
661 6b5a528c Florent Chuffart
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
662 6b5a528c Florent Chuffart
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
663 6b5a528c Florent Chuffart
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
664 6b5a528c Florent Chuffart
) {
665 6b5a528c Florent Chuffart
	samples=list()
666 6b5a528c Florent Chuffart
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
667 6b5a528c Florent Chuffart
	for (i in samples_ids) {
668 6b5a528c Florent Chuffart
		sample = as.list(all_samples[all_samples$id==i,])
669 a0b91fee Florent Chuffart
    sample$orig_roi = roi
670 4016229d Florent Chuffart
    sample$roi = translate_cur(roi, sample$strain, config = config)
671 6b5a528c Florent Chuffart
		if (get_genome) {
672 6b5a528c Florent Chuffart
			# Get Genome
673 5bfac5a3 Florent Chuffart
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
674 6b5a528c Florent Chuffart
		}
675 6b5a528c Florent Chuffart
		# Get inputs
676 55c1cdff Florent Chuffart
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
677 5bfac5a3 Florent Chuffart
		sample$total_reads = sum(sample$inputs[,4])
678 6b5a528c Florent Chuffart
		if (!only_fetch) {
679 6b5a528c Florent Chuffart
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
680 6b5a528c Florent Chuffart
	  }
681 6b5a528c Florent Chuffart
	  # Get TF outputs for Mnase_Seq samples
682 5bfac5a3 Florent Chuffart
		if (sample$marker == "Mnase_Seq" & get_ouputs) {
683 a52f85d7 Florent Chuffart
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
684 6b5a528c Florent Chuffart
			if (!only_fetch) {
685 6b5a528c Florent Chuffart
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
686 6b5a528c Florent Chuffart
  		}
687 6b5a528c Florent Chuffart
		}
688 5bfac5a3 Florent Chuffart
		samples[[length(samples) + 1]] = sample
689 6b5a528c Florent Chuffart
	}
690 6b5a528c Florent Chuffart
  return(samples)
691 6b5a528c Florent Chuffart
}
692 6b5a528c Florent Chuffart
693 6b5a528c Florent Chuffart
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
694 6b5a528c Florent Chuffart
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
695 55c1cdff Florent Chuffart
region1, ##<< Original regions.
696 6b5a528c Florent Chuffart
region2 ##<< Regions to substract.
697 6b5a528c Florent Chuffart
) {
698 6b5a528c Florent Chuffart
  rec_substract_region = function(region1, region2) {
699 6b5a528c Florent Chuffart
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
700 6b5a528c Florent Chuffart
    cur_fuzzy = region1[i,]
701 6b5a528c Florent Chuffart
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
702 6b5a528c Florent Chuffart
    if (length(inter_wp[,1]) > 0) {
703 6b5a528c Florent Chuffart
      ret = c()
704 6b5a528c Florent Chuffart
      for (j in 1:length(inter_wp[,1])) {
705 6b5a528c Florent Chuffart
        cur_wp = inter_wp[j,]
706 6b5a528c Florent Chuffart
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
707 6b5a528c Florent Chuffart
          # remove cur_fuzzy
708 6b5a528c Florent Chuffart
          ret = c()
709 6b5a528c Florent Chuffart
          break
710 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
711 55c1cdff Florent Chuffart
          # crop fuzzy
712 55c1cdff Florent Chuffart
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1
713 6b5a528c Florent Chuffart
          ret = cur_fuzzy
714 6b5a528c Florent Chuffart
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
715 55c1cdff Florent Chuffart
          # crop fuzzy
716 55c1cdff Florent Chuffart
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1
717 6b5a528c Florent Chuffart
          ret = cur_fuzzy
718 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
719 6b5a528c Florent Chuffart
          # split fuzzy
720 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_1 = cur_fuzzy
721 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1
722 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_2 = cur_fuzzy
723 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1
724 6b5a528c Florent Chuffart
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
725 6b5a528c Florent Chuffart
          # print(ret)
726 6b5a528c Florent Chuffart
          # ret = cur_fuzzy
727 6b5a528c Florent Chuffart
          break
728 6b5a528c Florent Chuffart
        } else {
729 6b5a528c Florent Chuffart
          stop("WARNING NO ADAPTED CASE!")
730 55c1cdff Florent Chuffart
        }
731 6b5a528c Florent Chuffart
      }
732 6b5a528c Florent Chuffart
      return(ret)
733 6b5a528c Florent Chuffart
    } else {
734 6b5a528c Florent Chuffart
      return(cur_fuzzy)
735 6b5a528c Florent Chuffart
    }
736 6b5a528c Florent Chuffart
  })
737 6b5a528c Florent Chuffart
  }
738 21b8928f Florent Chuffart
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
739 6b5a528c Florent Chuffart
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
740 b20637ed Florent Chuffart
  tmp_ulist = unlist(non_inter_fuzzy)
741 b20637ed Florent Chuffart
  tmp_names = names(tmp_ulist)[1:4]
742 55c1cdff Florent Chuffart
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE)
743 b20637ed Florent Chuffart
  names(non_inter_fuzzy) = tmp_names
744 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)
745 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)
746 b20637ed Florent Chuffart
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
747 b20637ed Florent Chuffart
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
748 55c1cdff Florent Chuffart
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]
749 b20637ed Florent Chuffart
  return(non_inter_fuzzy)
750 6b5a528c Florent Chuffart
}
751 6b5a528c Florent Chuffart
752 e5603c3f Florent Chuffart
union_regions = function(# Aggregate regions that intersect themselves.
753 55c1cdff Florent Chuffart
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
754 6b5a528c Florent Chuffart
regions ##<< The Regions to be aggregated
755 6b5a528c Florent Chuffart
) {
756 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {return(regions)}
757 0303dbe8 Florent Chuffart
  if (nrow(regions) == 0) {return(regions)}
758 6b5a528c Florent Chuffart
  old_length = length(regions[,1])
759 6b5a528c Florent Chuffart
  new_length = 0
760 55c1cdff Florent Chuffart
761 6b5a528c Florent Chuffart
  while (old_length != new_length) {
762 6b5a528c Florent Chuffart
    regions = regions[order(regions$lower_bound), ]
763 55c1cdff Florent Chuffart
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE)
764 6b5a528c Florent Chuffart
    vec_end_1 = which(regions$stop)
765 6b5a528c Florent Chuffart
    if (length(vec_end_1) == 0) {
766 55c1cdff Florent Chuffart
      vec_end_1 = c(length(regions$stop))
767 55c1cdff Florent Chuffart
    }
768 6b5a528c Florent Chuffart
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
769 6b5a528c Florent Chuffart
      vec_end_1 = c(vec_end_1, length(regions$stop))
770 6b5a528c Florent Chuffart
    }
771 6b5a528c Florent Chuffart
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
772 6b5a528c Florent Chuffart
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
773 6b5a528c Florent Chuffart
      chr = regions$chr[vec_beg_1[i]]
774 6b5a528c Florent Chuffart
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
775 6b5a528c Florent Chuffart
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
776 4016229d Florent Chuffart
      cur_index = regions$cur_index[vec_beg_1[i]]
777 4016229d Florent Chuffart
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, cur_index=cur_index))
778 6b5a528c Florent Chuffart
      })
779 59ad95ca Florent Chuffart
    union = collapse_regions(union)
780 6b5a528c Florent Chuffart
    old_length = length(regions[,1])
781 6b5a528c Florent Chuffart
    new_length = length(union[,1])
782 6b5a528c Florent Chuffart
    regions = union
783 6b5a528c Florent Chuffart
  }
784 6b5a528c Florent Chuffart
  return(union)
785 6b5a528c Florent Chuffart
}
786 6b5a528c Florent Chuffart
787 0303dbe8 Florent Chuffart
# remove_aligned_wp = function(# Remove wp nucs from common nucs list.
788 0303dbe8 Florent Chuffart
# ### It is based on common wp nucs index on nucs and region.
789 0303dbe8 Florent Chuffart
# strain_maps, ##<< Nuc maps.
790 4016229d Florent Chuffart
# cur_index, ##<< The region of interest index.
791 0303dbe8 Florent Chuffart
# tmp_common_nucs, ##<< the list of wp nucs.
792 0303dbe8 Florent Chuffart
# strain##<< The strain to consider.
793 0303dbe8 Florent Chuffart
# ){
794 0303dbe8 Florent Chuffart
#   fuzzy_nucs = strain_maps[[strain]]
795 4016229d Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$cur_index == cur_index,]
796 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
797 0303dbe8 Florent Chuffart
#   if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
798 0303dbe8 Florent Chuffart
#   if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
799 0303dbe8 Florent Chuffart
#   anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
800 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
801 0303dbe8 Florent Chuffart
#   return(fuzzy_nucs)
802 0303dbe8 Florent Chuffart
# }
803 6b5a528c Florent Chuffart
804 6b5a528c Florent Chuffart
translate_regions = function(# Translate a list of regions from a strain ref to another.
805 e5603c3f Florent Chuffart
### This function is an elaborated call to translate_cur.
806 55c1cdff Florent Chuffart
regions, ##<< Regions to be translated.
807 6b5a528c Florent Chuffart
combi, ##<< Combination of strains.
808 4016229d Florent Chuffart
cur_index, ##<< The region of interest index.
809 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
810 6b5a528c Florent Chuffart
roi ##<< The region of interest.
811 6b5a528c Florent Chuffart
) {
812 6b5a528c Florent Chuffart
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
813 6b5a528c Florent Chuffart
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
814 4016229d Florent Chuffart
    big_cur =  roi
815 4016229d Florent Chuffart
    trs_tmp_regions_ref2 = translate_cur(tmp_regions_ref2, combi[1], config = config, big_cur = big_cur)
816 5bfac5a3 Florent Chuffart
    if (is.null(trs_tmp_regions_ref2)) {
817 5bfac5a3 Florent Chuffart
      return(NULL)
818 5bfac5a3 Florent Chuffart
    } else {
819 4016229d Florent Chuffart
      return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), cur_index=cur_index)))
820 5bfac5a3 Florent Chuffart
    }
821 5bfac5a3 Florent Chuffart
  })
822 5bfac5a3 Florent Chuffart
823 59ad95ca Florent Chuffart
  return(collapse_regions(tr_regions))
824 59ad95ca Florent Chuffart
}
825 59ad95ca Florent Chuffart
826 59ad95ca Florent Chuffart
collapse_regions = function(# reformat an "apply  manipulated" list of regions
827 59ad95ca Florent Chuffart
### Utils to reformat an "apply  manipulated" list of regions
828 59ad95ca Florent Chuffart
regions ##< a list of regions
829 59ad95ca Florent Chuffart
) {
830 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {
831 5bfac5a3 Florent Chuffart
    return(NULL)
832 5bfac5a3 Florent Chuffart
  } else {
833 5bfac5a3 Florent Chuffart
    regions = do.call(rbind, regions)
834 5bfac5a3 Florent Chuffart
    regions$chr = as.character(regions$chr)
835 5bfac5a3 Florent Chuffart
    regions$chr = as.numeric(regions$chr)
836 5bfac5a3 Florent Chuffart
    regions$lower_bound = as.numeric(regions$lower_bound)
837 5bfac5a3 Florent Chuffart
    regions$upper_bound = as.numeric(regions$upper_bound)
838 5bfac5a3 Florent Chuffart
    regions = regions[order(regions$lower_bound),]
839 5bfac5a3 Florent Chuffart
    return(regions)
840 5bfac5a3 Florent Chuffart
  }
841 6b5a528c Florent Chuffart
}
842 6b5a528c Florent Chuffart
843 6b5a528c Florent Chuffart
844 6b5a528c Florent Chuffart
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
845 4016229d Florent Chuffart
### The fucntion is no more necessary since we remove "big_cur" bug in translate_cur function.
846 6b5a528c Florent Chuffart
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
847 6b5a528c Florent Chuffart
roi, ##<< The region of interest.
848 1d833b97 Florent Chuffart
strain, ##<< The strain to consider.
849 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
850 6b5a528c Florent Chuffart
) {
851 4016229d Florent Chuffart
  tr_roi = translate_cur(roi, strain, config = config)
852 6b5a528c Florent Chuffart
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
853 6b5a528c Florent Chuffart
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
854 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
855 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
856 6b5a528c Florent Chuffart
  }
857 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
858 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
859 6b5a528c Florent Chuffart
  }
860 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
861 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
862 6b5a528c Florent Chuffart
  }
863 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
864 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
865 6b5a528c Florent Chuffart
  }
866 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
867 6b5a528c Florent Chuffart
  return(tmp_fuzzy_nucs)
868 6b5a528c Florent Chuffart
}
869 6b5a528c Florent Chuffart
870 55c1cdff Florent Chuffart
get_all_reads = function(# Retrieve Reads
871 6b5a528c Florent Chuffart
### Retrieve reads for a given marker, combi, form.
872 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
873 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
874 cc54c799 Florent Chuffart
form="wp", ##<< The nuc form to considere.
875 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
876 6b5a528c Florent Chuffart
) {
877 6b5a528c Florent Chuffart
	all_reads = NULL
878 6b5a528c Florent Chuffart
  for (manip in c("Mnase_Seq", marker)) {
879 4016229d Florent Chuffart
    if (form == "unr") {
880 4016229d Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
881 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
882 6b5a528c Florent Chuffart
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
883 6b5a528c Florent Chuffart
      tmp_res$form = form
884 6b5a528c Florent Chuffart
    } else if (form == "wp") {
885 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
886 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
887 6b5a528c Florent Chuffart
      tmp_res$form = form
888 4016229d Florent Chuffart
    } else if (form == "wpunr") {
889 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
890 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
891 6b5a528c Florent Chuffart
      tmp_res$form = "wp"
892 4016229d Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
893 5bfac5a3 Florent Chuffart
  		tmp_res2 = read.table(file=out_filename, header=TRUE)
894 6b5a528c Florent Chuffart
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
895 4016229d Florent Chuffart
      tmp_res2$form = "unr"
896 6b5a528c Florent Chuffart
      tmp_res = rbind(tmp_res, tmp_res2)
897 6b5a528c Florent Chuffart
    }
898 6b5a528c Florent Chuffart
		if (is.null(all_reads)) {
899 6b5a528c Florent Chuffart
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
900 55c1cdff Florent Chuffart
		}
901 6b5a528c Florent Chuffart
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
902 6b5a528c Florent Chuffart
		all_reads = cbind(all_reads, tmp_res)
903 6b5a528c Florent Chuffart
  }
904 5bfac5a3 Florent Chuffart
  return(all_reads)
905 6b5a528c Florent Chuffart
}
906 6b5a528c Florent Chuffart
907 e5603c3f Florent Chuffart
get_design = function(# Build the design for DESeq
908 6b5a528c Florent Chuffart
### This function build the design according sample properties.
909 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
910 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
911 6b5a528c Florent Chuffart
all_samples ##<< Global list of samples.
912 6b5a528c Florent Chuffart
) {
913 6b5a528c Florent Chuffart
  off1 = 0
914 6b5a528c Florent Chuffart
  off2 = 0
915 6b5a528c Florent Chuffart
	manips = c("Mnase_Seq", marker)
916 6b5a528c Florent Chuffart
	design_rownames = c()
917 6b5a528c Florent Chuffart
	design_manip = c()
918 6b5a528c Florent Chuffart
	design_strain = c()
919 5bfac5a3 Florent Chuffart
  off2index = function(off) {
920 55c1cdff Florent Chuffart
  	switch(toString(off),
921 6b5a528c Florent Chuffart
  		"1"=c(0,1,1),
922 6b5a528c Florent Chuffart
  	  "2"=c(1,0,1),
923 55c1cdff Florent Chuffart
    	"3"=c(1,1,0),
924 6b5a528c Florent Chuffart
  		c(1,1,1)
925 5bfac5a3 Florent Chuffart
  		)
926 6b5a528c Florent Chuffart
  }
927 6b5a528c Florent Chuffart
	for (manip in manips) {
928 6b5a528c Florent Chuffart
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
929 6b5a528c Florent Chuffart
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
930 6b5a528c Florent Chuffart
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
931 6b5a528c Florent Chuffart
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
932 6b5a528c Florent Chuffart
		} else {
933 6b5a528c Florent Chuffart
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
934 6b5a528c Florent Chuffart
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
935 6b5a528c Florent Chuffart
			}
936 6b5a528c Florent Chuffart
		}
937 5bfac5a3 Florent Chuffart
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))
938 5bfac5a3 Florent Chuffart
		for (strain in combi) {
939 6b5a528c Florent Chuffart
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
940 6b5a528c Florent Chuffart
			design_strain = c(design_strain, rep(strain, length(cols)))
941 6b5a528c Florent Chuffart
			design_rownames = c(design_rownames, cols)
942 6b5a528c Florent Chuffart
		}
943 6b5a528c Florent Chuffart
	}
944 6b5a528c Florent Chuffart
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
945 5bfac5a3 Florent Chuffart
	return(snep_design)
946 6b5a528c Florent Chuffart
}
947 6b5a528c Florent Chuffart
948 6b5a528c Florent Chuffart
plot_dist_samples = function(# Plot the distribution of reads.
949 e5603c3f Florent Chuffart
### This fuxntion use the DESeq nomalization feature to compare qualitatively the distribution.
950 6b5a528c Florent Chuffart
strain, ##<< The strain to considere.
951 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
952 6b5a528c Florent Chuffart
res, ##<< Data
953 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
954 6b5a528c Florent Chuffart
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
955 5bfac5a3 Florent Chuffart
) {
956 6b5a528c Florent Chuffart
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
957 6b5a528c Florent Chuffart
	snepCountTable = res[,cols]
958 6b5a528c Florent Chuffart
	snepDesign = data.frame(
959 6b5a528c Florent Chuffart
		row.names = cols,
960 6b5a528c Florent Chuffart
		manip = rep(marker, length(cols)),
961 6b5a528c Florent Chuffart
		strain = rep(strain, length(cols))
962 6b5a528c Florent Chuffart
		)
963 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
964 6b5a528c Florent Chuffart
	sizeFactors = estimateSizeFactors(cdsFull)
965 6b5a528c Florent Chuffart
	# print(sizeFactors[[1]])
966 6b5a528c Florent Chuffart
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
967 6b5a528c Florent Chuffart
	if (NEWPLOT) {
968 5bfac5a3 Florent Chuffart
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))
969 6b5a528c Florent Chuffart
		NEWPLOT = FALSE
970 6b5a528c Florent Chuffart
	}
971 6b5a528c Florent Chuffart
	for (it in 1:length(sample_ids)) {
972 6b5a528c Florent Chuffart
		sample_id = sample_ids[it]
973 6b5a528c Florent Chuffart
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
974 6b5a528c Florent Chuffart
	}
975 7164d3ac Florent Chuffart
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
976 6b5a528c Florent Chuffart
}
977 6b5a528c Florent Chuffart
978 e5603c3f Florent Chuffart
analyse_design = function(# Launch DESeq methods.
979 e5603c3f Florent Chuffart
### This function is based on DESeq example. It normalizes data, fit data to GLM model with and without interaction term and compares the two models.
980 e5603c3f Florent Chuffart
snep_design, ##<< The design to consider.
981 e5603c3f Florent Chuffart
reads ##<< The data to consider.
982 6b5a528c Florent Chuffart
) {
983 6b5a528c Florent Chuffart
	snep_count_table = reads[, rownames(snep_design)]
984 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snep_count_table, snep_design)
985 6b5a528c Florent Chuffart
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
986 6b5a528c Florent Chuffart
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
987 6b5a528c Florent Chuffart
988 6b5a528c Florent Chuffart
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
989 6b5a528c Florent Chuffart
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
990 6b5a528c Florent Chuffart
991 6b5a528c Florent Chuffart
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
992 5bfac5a3 Florent Chuffart
	return(list(fit1, fit0, snep_design, pvalsGLM))
993 6b5a528c Florent Chuffart
}
994 6b5a528c Florent Chuffart
995 6b5a528c Florent Chuffart
996 6b5a528c Florent Chuffart
997 6b5a528c Florent Chuffart
998 6b5a528c Florent Chuffart
999 6b5a528c Florent Chuffart
1000 6b5a528c Florent Chuffart
1001 6b5a528c Florent Chuffart
1002 6b5a528c Florent Chuffart
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1003 55c1cdff Florent Chuffart
### This function uses
1004 6b5a528c Florent Chuffart
marker, ##<< The marker involved.
1005 6b5a528c Florent Chuffart
combi, ##<< The strain combination involved.
1006 55c1cdff Florent Chuffart
form, ##<< the nuc form involved.
1007 cc54c799 Florent Chuffart
all_samples, ##<< Global list of samples.
1008 e5603c3f Florent Chuffart
FDR = 0.0001, ## the specific False Discover Rate
1009 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1010 6b5a528c Florent Chuffart
) {
1011 6b5a528c Florent Chuffart
  # PRETREAT
1012 4016229d Florent Chuffart
  snep_design = get_design(marker, combi, all_samples)
1013 cc54c799 Florent Chuffart
  reads = get_all_reads(marker, combi, form, config=config)
1014 6b5a528c Florent Chuffart
  # RUN ANALYSE
1015 4016229d Florent Chuffart
  tmp_analyse = analyse_design(snep_design, reads)
1016 6b5a528c Florent Chuffart
  # RESULTS
1017 6b5a528c Florent Chuffart
	fit1 = tmp_analyse[[1]]
1018 6b5a528c Florent Chuffart
	fit0 = tmp_analyse[[2]]
1019 6b5a528c Florent Chuffart
  k = names(fit1)
1020 6b5a528c Florent Chuffart
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1021 6b5a528c Florent Chuffart
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1022 6b5a528c Florent Chuffart
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1023 6b5a528c Florent Chuffart
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1024 6b5a528c Florent Chuffart
	snep_design = tmp_analyse[[3]]
1025 4016229d Florent Chuffart
  # print(snep_design)
1026 e5603c3f Florent Chuffart
	thres = FDR(reads$pvalsGLM, FDR)
1027 6b5a528c Florent Chuffart
	reads$snep_index = reads$pvalsGLM < thres
1028 5badc2fd Florent Chuffart
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", FDR*100,"% of FDR.", sep = ""))
1029 6b5a528c Florent Chuffart
  return(reads)
1030 5bfac5a3 Florent Chuffart
  },  ex=function(){
1031 6b5a528c Florent Chuffart
    marker = "H3K4me1"
1032 55c1cdff Florent Chuffart
    combi = c("BY", "YJM")
1033 4016229d Florent Chuffart
    form = "wpunr" # "wp" | "unr" | "wpunr"
1034 6b5a528c Florent Chuffart
    # foo = get_sneps(marker, combi, form)
1035 6b5a528c Florent Chuffart
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1036 6b5a528c Florent Chuffart
})
1037 6b5a528c Florent Chuffart
1038 6b5a528c Florent Chuffart
1039 6b5a528c Florent Chuffart
1040 6b5a528c Florent Chuffart
1041 6b5a528c Florent Chuffart
1042 6b5a528c Florent Chuffart
1043 6b5a528c Florent Chuffart
1044 6b5a528c Florent Chuffart
1045 6b5a528c Florent Chuffart
1046 6b5a528c Florent Chuffart
1047 6b5a528c Florent Chuffart
1048 6b5a528c Florent Chuffart
1049 6b5a528c Florent Chuffart
1050 6b5a528c Florent Chuffart
1051 6b5a528c Florent Chuffart
1052 6b5a528c Florent Chuffart
1053 6b5a528c Florent Chuffart
1054 6b5a528c Florent Chuffart
1055 6b5a528c Florent Chuffart
1056 6b5a528c Florent Chuffart
1057 6b5a528c Florent Chuffart
1058 6b5a528c Florent Chuffart
1059 6b5a528c Florent Chuffart
1060 6b5a528c Florent Chuffart
1061 1d833b97 Florent Chuffart
ROM2ARAB = function(# Roman to Arabic pair list.
1062 e5603c3f Florent Chuffart
### Utility to convert Roman numbers into Arabic numbers
1063 1d833b97 Florent Chuffart
){list(
1064 1d833b97 Florent Chuffart
  "I" = 1,
1065 1d833b97 Florent Chuffart
  "II" = 2,
1066 1d833b97 Florent Chuffart
  "III" = 3,
1067 1d833b97 Florent Chuffart
  "IV" = 4,
1068 1d833b97 Florent Chuffart
  "V" = 5,
1069 1d833b97 Florent Chuffart
  "VI" = 6,
1070 1d833b97 Florent Chuffart
  "VII" = 7,
1071 1d833b97 Florent Chuffart
  "VIII" = 8,
1072 1d833b97 Florent Chuffart
  "IX" = 9,
1073 1d833b97 Florent Chuffart
  "X" = 10,
1074 1d833b97 Florent Chuffart
  "XI" = 11,
1075 1d833b97 Florent Chuffart
  "XII" = 12,
1076 1d833b97 Florent Chuffart
  "XIII" = 13,
1077 1d833b97 Florent Chuffart
  "XIV" = 14,
1078 1d833b97 Florent Chuffart
  "XV" = 15,
1079 1d833b97 Florent Chuffart
  "XVI" = 16,
1080 1d833b97 Florent Chuffart
  "XVII" = 17,
1081 1d833b97 Florent Chuffart
  "XVIII" = 18,
1082 1d833b97 Florent Chuffart
  "XIX" = 19,
1083 1d833b97 Florent Chuffart
  "XX" = 20
1084 1d833b97 Florent Chuffart
)}
1085 1d833b97 Florent Chuffart
1086 1d833b97 Florent Chuffart
switch_pairlist = structure(function(# Switch a pairlist
1087 1d833b97 Florent Chuffart
### Take a pairlist key:value and return the switched pairlist value:key.
1088 55c1cdff Florent Chuffart
l ##<< The pairlist to switch.
1089 1d833b97 Florent Chuffart
) {
1090 1d833b97 Florent Chuffart
	ret = list()
1091 1d833b97 Florent Chuffart
	for (name in names(l)) {
1092 1d833b97 Florent Chuffart
		ret[[as.character(l[[name]])]] = name
1093 1d833b97 Florent Chuffart
	}
1094 1d833b97 Florent Chuffart
	ret
1095 1d833b97 Florent Chuffart
### The switched pairlist.
1096 1d833b97 Florent Chuffart
}, ex=function(){
1097 1d833b97 Florent Chuffart
	l = list(key1 = "value1", key2 = "value2")
1098 1d833b97 Florent Chuffart
	print(switch_pairlist(l))
1099 1d833b97 Florent Chuffart
})
1100 1d833b97 Florent Chuffart
1101 1d833b97 Florent Chuffart
ARAB2ROM = function(# Arabic to Roman pair list.
1102 e5603c3f Florent Chuffart
### Utility to convert Arabic numbers to Roman numbers
1103 1d833b97 Florent Chuffart
){switch_pairlist(ROM2ARAB())}
1104 1d833b97 Florent Chuffart
1105 1d833b97 Florent Chuffart
1106 1d833b97 Florent Chuffart
1107 1d833b97 Florent Chuffart
1108 5bfac5a3 Florent Chuffart
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1109 e5603c3f Florent Chuffart
### This fonction allows to access to a specific part of the c2c file.
1110 5bfac5a3 Florent Chuffart
strain1, ##<< the key strain
1111 5bfac5a3 Florent Chuffart
strain2, ##<< the target strain
1112 e5603c3f Florent Chuffart
chr=NULL, ##<< if defined, the c2c will be filtered according to the chromosome value
1113 e5603c3f Florent Chuffart
lower_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome upper than lower_bound
1114 e5603c3f Florent Chuffart
upper_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome lower than upper_bound
1115 5bfac5a3 Florent Chuffart
config=NULL##<<  GLOBAL config variable
1116 1d833b97 Florent Chuffart
) {
1117 1d833b97 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1118 1d833b97 Florent Chuffart
	# Launch c2c file
1119 1d833b97 Florent Chuffart
	if (reverse) {
1120 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]
1121 1d833b97 Florent Chuffart
	} else {
1122 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]
1123 1d833b97 Florent Chuffart
	}
1124 5bfac5a3 Florent Chuffart
	c2c = get_content(c2c_filename, "table", stringsAsFactors=FALSE)
1125 5bfac5a3 Florent Chuffart
  # Filtering unagapped
1126 1d833b97 Florent Chuffart
  c2c = c2c[c2c$V6=="-",]
1127 1d833b97 Florent Chuffart
	# Reverse
1128 1d833b97 Florent Chuffart
	if (reverse) {
1129 1d833b97 Florent Chuffart
		tmp_col = c2c$V1
1130 1d833b97 Florent Chuffart
		c2c$V1 = c2c$V7
1131 1d833b97 Florent Chuffart
		c2c$V7 = tmp_col
1132 1d833b97 Florent Chuffart
		tmp_col = c2c$V2
1133 1d833b97 Florent Chuffart
		c2c$V2 = c2c$V9
1134 1d833b97 Florent Chuffart
		c2c$V9 = tmp_col
1135 1d833b97 Florent Chuffart
		tmp_col = c2c$V3
1136 1d833b97 Florent Chuffart
		c2c$V3 = c2c$V10
1137 1d833b97 Florent Chuffart
		c2c$V10 = tmp_col
1138 1d833b97 Florent Chuffart
	}
1139 5bfac5a3 Florent Chuffart
  if (!is.null(chr)) {
1140 5bfac5a3 Florent Chuffart
  	if (strain1 == "BY") {
1141 5bfac5a3 Florent Chuffart
  		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1142 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "RM") {
1143 5bfac5a3 Florent Chuffart
  	  chro_1 = paste("supercontig_1.",chr,sep="")
1144 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "YJM") {
1145 5bfac5a3 Florent Chuffart
  	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1146 5bfac5a3 Florent Chuffart
  	}
1147 5bfac5a3 Florent Chuffart
  	c2c = c2c[c2c$V1 == chro_1,]
1148 5bfac5a3 Florent Chuffart
    if (!is.null(lower_bound)) {
1149 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound}
1150 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound}      
1151 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1152 5bfac5a3 Florent Chuffart
    }
1153 5bfac5a3 Florent Chuffart
    if (!is.null(upper_bound)) {
1154 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound}
1155 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound}      
1156 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1157 5bfac5a3 Florent Chuffart
    }
1158 5bfac5a3 Florent Chuffart
  }
1159 5bfac5a3 Florent Chuffart
  return(c2c)
1160 5bfac5a3 Florent Chuffart
# It retruns the appropriate c2c file part.
1161 5bfac5a3 Florent Chuffart
}
1162 5bfac5a3 Florent Chuffart
1163 4016229d Florent Chuffart
translate_cur = structure(function(# Translate coords of a genome region.
1164 5bfac5a3 Florent Chuffart
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1165 5bfac5a3 Florent Chuffart
roi, ##<< Original genome region of interest.
1166 5bfac5a3 Florent Chuffart
strain2, ##<< The strain in wich you want the genome region of interest.
1167 5bfac5a3 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
1168 4016229d Florent Chuffart
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1169 5bfac5a3 Florent Chuffart
) {
1170 d973538c Florent Chuffart
	strain1 = roi$strain_ref  
1171 d973538c Florent Chuffart
  # Do something or nothing?
1172 d973538c Florent Chuffart
  if (is.null(config$NEUTRAL_TRANSLATE_CUR)) {
1173 d973538c Florent Chuffart
    config$NEUTRAL_TRANSLATE_CUR = FALSE
1174 d973538c Florent Chuffart
  }
1175 d973538c Florent Chuffart
	if (strain1 == strain2 | config$NEUTRAL_TRANSLATE_CUR) {
1176 d973538c Florent Chuffart
    roi$strain_ref = strain2
1177 d973538c Florent Chuffart
    roi$length = roi$end - roi$begin + sign(roi$end - roi$begin)
1178 5bfac5a3 Florent Chuffart
		return(roi)
1179 5bfac5a3 Florent Chuffart
	}
1180 d973538c Florent Chuffart
  
1181 5bfac5a3 Florent Chuffart
	# Extract c2c file
1182 4016229d Florent Chuffart
	if (!is.null(big_cur)) {
1183 4016229d Florent Chuffart
  	# Dealing with big_cur
1184 4016229d Florent Chuffart
		if (roi$strain_ref != big_cur$strain_ref) {
1185 4016229d Florent Chuffart
      big_cur = translate_cur(big_cur, roi$strain_ref, config=config)
1186 62d69d53 Florent Chuffart
    }
1187 4016229d Florent Chuffart
    if (big_cur$end < big_cur$begin) {
1188 4016229d Florent Chuffart
      tmp_var = big_cur$begin
1189 4016229d Florent Chuffart
      big_cur$begin = big_cur$end
1190 4016229d Florent Chuffart
      big_cur$end = tmp_var
1191 4016229d Florent Chuffart
      big_cur$length = big_cur$end - big_cur$begin + 1
1192 55c1cdff Florent Chuffart
    }
1193 4016229d Florent Chuffart
    if (big_cur$chr!=roi$chr | roi$end > big_cur$end | roi$end < big_cur$begin | roi$begin > big_cur$end | roi$begin < big_cur$begin) {
1194 4016229d Florent Chuffart
      print("WARNING! Trying to translate a roi not included in a big_cur.")
1195 ded736c8 Florent Chuffart
      return(NULL)
1196 ded736c8 Florent Chuffart
    }
1197 4016229d Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, big_cur$chr, big_cur$begin, big_cur$end, config=config)
1198 5bfac5a3 Florent Chuffart
  } else {
1199 4016229d Florent Chuffart
    # No big_cur
1200 5bfac5a3 Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, roi$chr, config=config)    
1201 5bfac5a3 Florent Chuffart
  }
1202 5bfac5a3 Florent Chuffart
  
1203 1d833b97 Florent Chuffart
  #	Convert initial roi$chr into c2c format
1204 5bfac5a3 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1205 1d833b97 Florent Chuffart
	begin_1 = roi$begin
1206 1d833b97 Florent Chuffart
  end_1 = roi$end
1207 5bfac5a3 Florent Chuffart
  if (reverse) {
1208 5bfac5a3 Florent Chuffart
  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1209 5bfac5a3 Florent Chuffart
    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1210 1d833b97 Florent Chuffart
	} else {
1211 5bfac5a3 Florent Chuffart
		tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1212 5bfac5a3 Florent Chuffart
	  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1213 1d833b97 Florent Chuffart
	}
1214 5bfac5a3 Florent Chuffart
1215 5bfac5a3 Florent Chuffart
	# Never happend conditions ...
1216 1d833b97 Florent Chuffart
	{
1217 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) == 0) {
1218 1d833b97 Florent Chuffart
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1219 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1220 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2>=begin_1,]
1221 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V2)
1222 54cc8283 Florent Chuffart
      } else {
1223 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3>=begin_1,]
1224 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V3)
1225 54cc8283 Florent Chuffart
      }
1226 1d833b97 Florent Chuffart
			if (reverse) {
1227 5bfac5a3 Florent Chuffart
		  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1228 1d833b97 Florent Chuffart
			} else {
1229 5bfac5a3 Florent Chuffart
				tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1230 1d833b97 Florent Chuffart
			}
1231 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) == 0) {
1232 4016229d Florent Chuffart
				if (!is.null(big_cur)) {
1233 1d833b97 Florent Chuffart
					return(NULL)
1234 4016229d Florent Chuffart
					tmptransfostart = c2c[c2c$V3>=big_cur$begin & c2c$V2<=big_cur$begin,]
1235 55c1cdff Florent Chuffart
				} else {
1236 1d833b97 Florent Chuffart
					print(tmptransfostart)
1237 1d833b97 Florent Chuffart
					print(tmptransfostop)
1238 5bfac5a3 Florent Chuffart
					stop("Never happend condition 1.")
1239 1d833b97 Florent Chuffart
				}
1240 1d833b97 Florent Chuffart
			}
1241 55c1cdff Florent Chuffart
		}
1242 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) == 0) {
1243 1d833b97 Florent Chuffart
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1244 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1245 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3<=end_1,]
1246 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V3)
1247 54cc8283 Florent Chuffart
      } else {
1248 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2<=end_1,]
1249 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V2)
1250 54cc8283 Florent Chuffart
      }
1251 1d833b97 Florent Chuffart
			if (reverse) {
1252 5bfac5a3 Florent Chuffart
		    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1253 1d833b97 Florent Chuffart
			} else {
1254 5bfac5a3 Florent Chuffart
			  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1255 1d833b97 Florent Chuffart
			}
1256 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) == 0) {
1257 4016229d Florent Chuffart
				if (!is.null(big_cur)) {
1258 1d833b97 Florent Chuffart
					return(NULL)
1259 4016229d Florent Chuffart
				  tmptransfostop = c2c[c2c$V3>=big_cur$end & c2c$V2<=big_cur$end,]
1260 1d833b97 Florent Chuffart
				} else {
1261 1d833b97 Florent Chuffart
					print(tmptransfostart)
1262 1d833b97 Florent Chuffart
					print(tmptransfostop)
1263 1d833b97 Florent Chuffart
					stop("Never happend condition 2.")
1264 55c1cdff Florent Chuffart
				}
1265 55c1cdff Florent Chuffart
			}
1266 1d833b97 Florent Chuffart
		}
1267 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) != 1) {
1268 1d833b97 Florent Chuffart
			# print("many start")
1269 5bfac5a3 Florent Chuffart
			tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1270 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) != 1) {
1271 1d833b97 Florent Chuffart
				print(tmptransfostart)
1272 1d833b97 Florent Chuffart
				print(tmptransfostop)
1273 1d833b97 Florent Chuffart
  			stop("Never happend condition 3.")
1274 1d833b97 Florent Chuffart
			}
1275 1d833b97 Florent Chuffart
		}
1276 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) != 1) {
1277 1d833b97 Florent Chuffart
			# print("many stop")
1278 5bfac5a3 Florent Chuffart
		  tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1279 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) != 1) {
1280 1d833b97 Florent Chuffart
				print(tmptransfostart)
1281 1d833b97 Florent Chuffart
				print(tmptransfostop)
1282 1d833b97 Florent Chuffart
  			stop("Never happend condition 4.")
1283 1d833b97 Florent Chuffart
			}
1284 5bfac5a3 Florent Chuffart
		}
1285 1d833b97 Florent Chuffart
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1286 1d833b97 Florent Chuffart
			print(tmptransfostart)
1287 5bfac5a3 Florent Chuffart
			print(tmptransfostop)
1288 1d833b97 Florent Chuffart
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1289 1d833b97 Florent Chuffart
		}
1290 55c1cdff Florent Chuffart
	}
1291 5bfac5a3 Florent Chuffart
  
1292 1d833b97 Florent Chuffart
  # Deal with strand
1293 1d833b97 Florent Chuffart
  if (tmptransfostart$V8 == 1) {
1294 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1295 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1296 1d833b97 Florent Chuffart
  } else {
1297 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1298 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1299 1d833b97 Florent Chuffart
  }
1300 1d833b97 Florent Chuffart
	# Build returned roi
1301 1d833b97 Florent Chuffart
	roi$strain_ref = strain2
1302 1d833b97 Florent Chuffart
	if (roi$strain_ref == "BY") {
1303 1d833b97 Florent Chuffart
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1304 1d833b97 Florent Chuffart
	} else {
1305 1d833b97 Florent Chuffart
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1306 1d833b97 Florent Chuffart
	}
1307 1d833b97 Florent Chuffart
  roi$begin = begin_2
1308 1d833b97 Florent Chuffart
  roi$end = end_2
1309 1d833b97 Florent Chuffart
	if (sign(roi$end - roi$begin) == 0) {
1310 5bfac5a3 Florent Chuffart
		roi$length = 1
1311 5bfac5a3 Florent Chuffart
	} else {
1312 5bfac5a3 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1313 1d833b97 Florent Chuffart
	}
1314 55c1cdff Florent Chuffart
  return(roi)
1315 1d833b97 Florent Chuffart
}, ex=function(){
1316 4016229d Florent Chuffart
	# Define new translate_cur function...
1317 4016229d Florent Chuffart
	translate_cur = function(roi, strain2, config) {
1318 1d833b97 Florent Chuffart
		strain1 = roi$strain_ref
1319 1d833b97 Florent Chuffart
		if (strain1 == strain2) {
1320 1d833b97 Florent Chuffart
			return(roi)
1321 1d833b97 Florent Chuffart
		} else {
1322 4016229d Florent Chuffart
		  stop("Here is my new translate_cur function...")
1323 5bfac5a3 Florent Chuffart
		}
1324 1d833b97 Florent Chuffart
	}
1325 1d833b97 Florent Chuffart
	# Binding it by uncomment follwing lines.
1326 4016229d Florent Chuffart
	# unlockBinding("translate_cur", as.environment("package:nm"))
1327 4016229d Florent Chuffart
	# unlockBinding("translate_cur", getNamespace("nm"))
1328 4016229d Florent Chuffart
	# assign("translate_cur", translate_cur, "package:nm")
1329 4016229d Florent Chuffart
	# assign("translate_cur", translate_cur, getNamespace("nm"))
1330 4016229d Florent Chuffart
	# lockBinding("translate_cur", getNamespace("nm"))
1331 4016229d Florent Chuffart
	# lockBinding("translate_cur", as.environment("package:nm"))
1332 1d833b97 Florent Chuffart
})
1333 1d833b97 Florent Chuffart
1334 1d833b97 Florent Chuffart
1335 5bfac5a3 Florent Chuffart
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1336 5bfac5a3 Florent Chuffart
### CURs are regions that can be aligned between the genomes
1337 5bfac5a3 Florent Chuffart
diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
1338 5bfac5a3 Florent Chuffart
min_cur_width = 4000, ##<< The minimum width of a CUR
1339 5bfac5a3 Florent Chuffart
config = NULL ##<< GLOBAL config variable
1340 5bfac5a3 Florent Chuffart
) {
1341 1d833b97 Florent Chuffart
1342 5bfac5a3 Florent Chuffart
  check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
1343 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1344 5bfac5a3 Florent Chuffart
    check_homogeneity(c2c)
1345 5bfac5a3 Florent Chuffart
  	if (length(c2c[,1]) == 0 ) {
1346 5bfac5a3 Florent Chuffart
      stop("WARNING! checking overlapping for a region corresponding to an empty c2c.")
1347 5bfac5a3 Florent Chuffart
    } else {
1348 5bfac5a3 Florent Chuffart
  		lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)})
1349 5bfac5a3 Florent Chuffart
  		upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)})
1350 5bfac5a3 Florent Chuffart
  		tmp_index = order(lower_bounds)
1351 5bfac5a3 Florent Chuffart
      lower_bounds = lower_bounds[tmp_index]
1352 5bfac5a3 Florent Chuffart
      upper_bounds = upper_bounds[tmp_index]
1353 5bfac5a3 Florent Chuffart
      tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)]
1354 5bfac5a3 Florent Chuffart
      ov_index = which(tmp_diff < 0)
1355 5bfac5a3 Florent Chuffart
      if(length(ov_index < 0) !=0 ) {
1356 5bfac5a3 Florent Chuffart
        ov_index = ov_index[1]
1357 5bfac5a3 Florent Chuffart
        print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep=""))
1358 5bfac5a3 Florent Chuffart
        c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),]
1359 5bfac5a3 Florent Chuffart
        print(c2c_corrupted)
1360 5bfac5a3 Florent Chuffart
        return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1))
1361 5bfac5a3 Florent Chuffart
      }
1362 5bfac5a3 Florent Chuffart
      return(NULL)
1363 5bfac5a3 Florent Chuffart
    }
1364 5bfac5a3 Florent Chuffart
  }
1365 1d833b97 Florent Chuffart
1366 5bfac5a3 Florent Chuffart
  check_homogeneity = function(sub_c2c) {
1367 5bfac5a3 Florent Chuffart
    tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3)
1368 5bfac5a3 Florent Chuffart
    tmp_signs = tmp_signs[tmp_signs != 0]
1369 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs[1]  != tmp_signs)) {
1370 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign)! ********************"))
1371 5bfac5a3 Florent Chuffart
      print(tmp_signs)
1372 5bfac5a3 Florent Chuffart
  	}
1373 5bfac5a3 Florent Chuffart
    tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10)
1374 5bfac5a3 Florent Chuffart
    tmp_signs2 = tmp_signs2[tmp_signs2 != 0]
1375 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs2[1]  != tmp_signs2)) {
1376 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign2)! ********************"))
1377 5bfac5a3 Florent Chuffart
      print(tmp_signs2)
1378 5bfac5a3 Florent Chuffart
  	}
1379 5bfac5a3 Florent Chuffart
  	if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1380 5bfac5a3 Florent Chuffart
  		print("*************** ERROR, non homogenous region chrs or V8! ********************")
1381 5bfac5a3 Florent Chuffart
  	}
1382 5bfac5a3 Florent Chuffart
  }
1383 1d833b97 Florent Chuffart
1384 5bfac5a3 Florent Chuffart
  test_and_squeeze_rois = function(foo, config=NULL) {
1385 5bfac5a3 Florent Chuffart
    is_it_ok = function(list1, list2) {
1386 5bfac5a3 Florent Chuffart
      bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length))
1387 5bfac5a3 Florent Chuffart
      ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0
1388 5bfac5a3 Florent Chuffart
      if (!ok) {
1389 5bfac5a3 Florent Chuffart
        print(bar[bar[,3] != 0 | bar[,6] != 0, ])
1390 5bfac5a3 Florent Chuffart
      }
1391 5bfac5a3 Florent Chuffart
      return (ok)
1392 5bfac5a3 Florent Chuffart
    }
1393 5bfac5a3 Florent Chuffart
    squeeze_rois = function(list1, list2) {
1394 5bfac5a3 Florent Chuffart
      rois = apply(t(1:nrow(list1)), 2, function(i){
1395 5bfac5a3 Florent Chuffart
        roi = list1[i,]
1396 5bfac5a3 Florent Chuffart
        roi2 = list2[i,]
1397 5bfac5a3 Florent Chuffart
        roi$begin = max(roi$begin, roi2$begin)
1398 5bfac5a3 Florent Chuffart
        roi$end = min(roi$end, roi2$end)
1399 5bfac5a3 Florent Chuffart
        roi$length =  roi$end - roi$begin + 1
1400 5bfac5a3 Florent Chuffart
        return(roi)
1401 5bfac5a3 Florent Chuffart
      })
1402 5bfac5a3 Florent Chuffart
      return(do.call(rbind, rois))
1403 5bfac5a3 Florent Chuffart
    }
1404 5bfac5a3 Florent Chuffart
    # foo_orig = compute_inter_all_strain_curs2(config=config)
1405 5bfac5a3 Florent Chuffart
    # foo = foo_orig
1406 5bfac5a3 Florent Chuffart
    STOP = FALSE
1407 5bfac5a3 Florent Chuffart
    nb_round = 0
1408 5bfac5a3 Florent Chuffart
    while(!STOP) {
1409 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1410 5bfac5a3 Florent Chuffart
      print(paste("2-2 round #", nb_round, sep=""))
1411 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1412 4016229d Florent Chuffart
      fooyjm = translate_curs(foo, "YJM", config=config)
1413 4016229d Florent Chuffart
      fooyjmby = translate_curs(fooyjm, "BY", config=config)
1414 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, fooyjmby)) {
1415 5bfac5a3 Florent Chuffart
        print("case 1")
1416 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, fooyjmby)
1417 5bfac5a3 Florent Chuffart
    		next
1418 5bfac5a3 Florent Chuffart
      }
1419 4016229d Florent Chuffart
      foorm = translate_curs(foo, "RM", config=config)
1420 4016229d Florent Chuffart
      foormby = translate_curs(foorm, "BY", config=config)
1421 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foormby)) {
1422 5bfac5a3 Florent Chuffart
        print("case 2")
1423 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foormby)
1424 5bfac5a3 Florent Chuffart
    		next
1425 5bfac5a3 Florent Chuffart
      }
1426 4016229d Florent Chuffart
      fooyjmrm = translate_curs(fooyjm, "RM", config=config)
1427 4016229d Florent Chuffart
      fooyjmrmyjm = translate_curs(fooyjmrm, "YJM", config=config)
1428 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooyjm, fooyjmrmyjm)) {
1429 5bfac5a3 Florent Chuffart
        print("case 3")
1430 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooyjm, fooyjmrmyjm)
1431 5bfac5a3 Florent Chuffart
        next
1432 5bfac5a3 Florent Chuffart
      }
1433 4016229d Florent Chuffart
      foormyjm = translate_curs(foorm, "YJM", config=config)
1434 4016229d Florent Chuffart
      foormyjmrm = translate_curs(foormyjm, "RM", config=config)
1435 5bfac5a3 Florent Chuffart
      if (!is_it_ok(foorm, foormyjmrm)) {
1436 5bfac5a3 Florent Chuffart
        print("case 4")
1437 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(foorm, foormyjmrm)
1438 5bfac5a3 Florent Chuffart
        next
1439 5bfac5a3 Florent Chuffart
      }
1440 4016229d Florent Chuffart
      foo = translate_curs(foo, "BY", config=config)
1441 5bfac5a3 Florent Chuffart
      STOP = TRUE
1442 5bfac5a3 Florent Chuffart
    }
1443 5bfac5a3 Florent Chuffart
    STOP = FALSE
1444 5bfac5a3 Florent Chuffart
    nb_round = 0
1445 5bfac5a3 Florent Chuffart
    while(!STOP) {
1446 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1447 5bfac5a3 Florent Chuffart
      print(paste("3-3 round #", nb_round, sep=""))
1448 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1449 4016229d Florent Chuffart
      foobyrm = translate_curs(fooby, "RM", config=config)
1450 4016229d Florent Chuffart
      foobyrmyjm = translate_curs(foobyrm, "YJM", config=config)
1451 4016229d Florent Chuffart
      foobyrmyjmby = translate_curs(foobyrmyjm, "BY", config=config)
1452 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyrmyjmby)) {
1453 5bfac5a3 Florent Chuffart
        print("case 1")
1454 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyrmyjmby)
1455 5bfac5a3 Florent Chuffart
      }
1456 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1457 4016229d Florent Chuffart
      foobyyjm = translate_curs(fooby, "YJM", config=config)
1458 4016229d Florent Chuffart
      foobyyjmrm = translate_curs(foobyyjm, "RM", config=config)
1459 4016229d Florent Chuffart
      foobyyjmrmby = translate_curs(foobyyjmrm, "BY", config=config)
1460 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyyjmrmby)) {
1461 5bfac5a3 Florent Chuffart
        print("case 2")
1462 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyyjmrmby)
1463 5bfac5a3 Florent Chuffart
        next
1464 5bfac5a3 Florent Chuffart
      }
1465 4016229d Florent Chuffart
      foo = translate_curs(foo, "BY", config=config)
1466 5bfac5a3 Florent Chuffart
      STOP = TRUE
1467 5bfac5a3 Florent Chuffart
    }
1468 5bfac5a3 Florent Chuffart
    print("end")
1469 5bfac5a3 Florent Chuffart
    return(foo)
1470 5bfac5a3 Florent Chuffart
  }
1471 1d833b97 Florent Chuffart
1472 5bfac5a3 Florent Chuffart
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) {
1473 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, config=config)
1474 5bfac5a3 Florent Chuffart
    # computing diffs
1475 55c1cdff Florent Chuffart
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
1476 55c1cdff Florent Chuffart
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
1477 1d833b97 Florent Chuffart
    # Filtering
1478 1d833b97 Florent Chuffart
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1479 1d833b97 Florent Chuffart
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1480 5bfac5a3 Florent Chuffart
    rois = apply(t(1:length(indexes_start)), 2, function(i) {
1481 5bfac5a3 Florent Chuffart
      if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start)))
1482 5bfac5a3 Florent Chuffart
      returned_rois = NULL
1483 1d833b97 Florent Chuffart
  		start = indexes_start[i]
1484 1d833b97 Florent Chuffart
  		stop = indexes_stop[i]
1485 1d833b97 Florent Chuffart
  		sub_c2c = c2c[start:stop,]
1486 5bfac5a3 Florent Chuffart
      check_homogeneity(sub_c2c)
1487 1d833b97 Florent Chuffart
  		if (strain1 == "BY") {
1488 1d833b97 Florent Chuffart
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1489 5bfac5a3 Florent Chuffart
  		} else {
1490 1d833b97 Florent Chuffart
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1491 1d833b97 Florent Chuffart
  		}
1492 5bfac5a3 Florent Chuffart
  		roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1)
1493 5bfac5a3 Florent Chuffart
  		roi$length = roi$end - roi$begin + 1
1494 1d833b97 Florent Chuffart
  		if (roi$length >= min_cur_width) {
1495 5bfac5a3 Florent Chuffart
  			lower_bound = roi$begin
1496 5bfac5a3 Florent Chuffart
  			upper_bound = roi$end
1497 5bfac5a3 Florent Chuffart
        check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1498 5bfac5a3 Florent Chuffart
        while(!is.null(check)) {
1499 5bfac5a3 Florent Chuffart
          # print(check)
1500 5bfac5a3 Florent Chuffart
      		roi1 = roi
1501 5bfac5a3 Florent Chuffart
          roi1$end = check[[1]]
1502 5bfac5a3 Florent Chuffart
      		roi1$length = roi1$end - roi1$begin + 1
1503 5bfac5a3 Florent Chuffart
      		if (roi1$length >= min_cur_width) {
1504 5bfac5a3 Florent Chuffart
            returned_rois = dfadd(returned_rois,roi1)
1505 5bfac5a3 Florent Chuffart
          }
1506 5bfac5a3 Florent Chuffart
          roi$begin = check[[2]]
1507 5bfac5a3 Florent Chuffart
      		roi$length = roi$end - roi$begin + 1
1508 5bfac5a3 Florent Chuffart
      		if (roi$length >= min_cur_width) {
1509 5bfac5a3 Florent Chuffart
      			lower_bound = min(roi$begin, roi$end)
1510 5bfac5a3 Florent Chuffart
      			upper_bound = max(roi$begin, roi$end)
1511 5bfac5a3 Florent Chuffart
            check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1512 5bfac5a3 Florent Chuffart
          } else {
1513 5bfac5a3 Florent Chuffart
            check = NULL
1514 5bfac5a3 Florent Chuffart
            roi = NULL
1515 5bfac5a3 Florent Chuffart
          }
1516 5bfac5a3 Florent Chuffart
        }
1517 5bfac5a3 Florent Chuffart
        returned_rois = dfadd(returned_rois,roi)
1518 1d833b97 Florent Chuffart
  	  }
1519 5bfac5a3 Florent Chuffart
    })
1520 5bfac5a3 Florent Chuffart
    rois = do.call(rbind,rois)
1521 5bfac5a3 Florent Chuffart
    rois = rois[order(as.numeric(rois$chr), rois$begin), ]
1522 1d833b97 Florent Chuffart
  	return(rois)
1523 1d833b97 Florent Chuffart
  }
1524 1d833b97 Florent Chuffart
1525 4016229d Florent Chuffart
  translate_curs = function(rois, target_strain, config) {
1526 5bfac5a3 Florent Chuffart
    tr_rois = apply(t(1:nrow(rois)), 2, function(i){
1527 5bfac5a3 Florent Chuffart
      roi = rois[i,]
1528 4016229d Florent Chuffart
      tr_roi = translate_cur(roi, target_strain, config=config)  
1529 5bfac5a3 Florent Chuffart
      tmp_begin = min(tr_roi$begin, tr_roi$end)
1530 5bfac5a3 Florent Chuffart
      tmp_end = max(tr_roi$begin, tr_roi$end)
1531 5bfac5a3 Florent Chuffart
      tr_roi$begin = tmp_begin
1532 5bfac5a3 Florent Chuffart
      tr_roi$end = tmp_end
1533 5bfac5a3 Florent Chuffart
      tr_roi$length =  tr_roi$end - tr_roi$begin + 1
1534 5bfac5a3 Florent Chuffart
      return(tr_roi)
1535 5bfac5a3 Florent Chuffart
    })
1536 5bfac5a3 Florent Chuffart
    tr_rois = do.call(rbind, tr_rois)
1537 5bfac5a3 Florent Chuffart
    return(tr_rois)
1538 5bfac5a3 Florent Chuffart
  }
1539 729c934e Florent Chuffart
1540 5bfac5a3 Florent Chuffart
  combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"))
1541 5bfac5a3 Florent Chuffart
  rois = list()
1542 5bfac5a3 Florent Chuffart
  for (combi in combis) {
1543 5bfac5a3 Florent Chuffart
    strain1 = combi[1]
1544 5bfac5a3 Florent Chuffart
    strain2 = combi[2]
1545 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1546 5bfac5a3 Florent Chuffart
    rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1547 5bfac5a3 Florent Chuffart
    strain1 = combi[2]
1548 5bfac5a3 Florent Chuffart
    strain2 = combi[1]
1549 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1550 5bfac5a3 Florent Chuffart
    rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1551 4016229d Florent Chuffart
    tr_rois_rev = translate_curs(rois_rev, combi[1], config)      
1552 5bfac5a3 Florent Chuffart
    region1 = rois_fwd
1553 5bfac5a3 Florent Chuffart
    region2 = tr_rois_rev
1554 5bfac5a3 Florent Chuffart
    rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev)
1555 5bfac5a3 Florent Chuffart
  }
1556 5bfac5a3 Florent Chuffart
  reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
1557 5bfac5a3 Florent Chuffart
  reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
1558 4016229d Florent Chuffart
  tr_reducted_1_rois = translate_curs(reducted_1_rois, "RM", config)  
1559 5bfac5a3 Florent Chuffart
  reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]])
1560 5bfac5a3 Florent Chuffart
  reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ]
1561 4016229d Florent Chuffart
  reducted_rois = translate_curs(reducted_2_rois, "BY", config)  
1562 5bfac5a3 Florent Chuffart
  reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
1563 5bfac5a3 Florent Chuffart
  squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
1564 5bfac5a3 Florent Chuffart
  return (squeezed_rois)
1565 1d833b97 Florent Chuffart
}
1566 1d833b97 Florent Chuffart
1567 5bfac5a3 Florent Chuffart
intersect_region = function(# Returns the intersection of 2 list on regions.
1568 5bfac5a3 Florent Chuffart
### This function...
1569 5bfac5a3 Florent Chuffart
region1, ##<< Original regions.
1570 5bfac5a3 Florent Chuffart
region2 ##<< Regions to intersect.
1571 5bfac5a3 Florent Chuffart
) {
1572 5bfac5a3 Florent Chuffart
  intersection = apply(t(1:nrow(region1)), 2, function(i) {
1573 5bfac5a3 Florent Chuffart
    roi1 = region1[i, ]
1574 5bfac5a3 Florent Chuffart
    sub_regions2 = region2[region2$chr == roi1$chr, ]
1575 5bfac5a3 Florent Chuffart
    sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end |
1576 5bfac5a3 Florent Chuffart
                                roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end |
1577 5bfac5a3 Florent Chuffart
                                sub_regions2$begin < roi1$begin  & roi1$end < sub_regions2$end
1578 5bfac5a3 Florent Chuffart
                                , ]
1579 5bfac5a3 Florent Chuffart
    if (nrow(sub_regions2) == 0) {
1580 5bfac5a3 Florent Chuffart
      print("removing a region")
1581 5bfac5a3 Florent Chuffart
      return(NULL)
1582 5bfac5a3 Florent Chuffart
    } else if (nrow(sub_regions2) > 1) {
1583 5bfac5a3 Florent Chuffart
      print("more than one region in intersect_region")          
1584 5bfac5a3 Florent Chuffart
      return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])})))
1585 5bfac5a3 Florent Chuffart
    } else {
1586 5bfac5a3 Florent Chuffart
      roi2 = sub_regions2[1,]
1587 5bfac5a3 Florent Chuffart
      if (roi1$begin < roi2$begin) {
1588 5bfac5a3 Florent Chuffart
        print("not the same begin")
1589 5bfac5a3 Florent Chuffart
        roi1$begin = roi2$begin
1590 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1591 5bfac5a3 Florent Chuffart
      }
1592 5bfac5a3 Florent Chuffart
      if (roi1$end > roi2$end) {
1593 5bfac5a3 Florent Chuffart
        print("not the same end")
1594 5bfac5a3 Florent Chuffart
        roi1$end = roi2$end
1595 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1596 5bfac5a3 Florent Chuffart
      }
1597 5bfac5a3 Florent Chuffart
      return(roi1)
1598 5bfac5a3 Florent Chuffart
    }         
1599 5bfac5a3 Florent Chuffart
  })
1600 5bfac5a3 Florent Chuffart
  return(do.call(rbind,intersection))
1601 5bfac5a3 Florent Chuffart
}
1602 1d833b97 Florent Chuffart
1603 55c1cdff Florent Chuffart
build_replicates = structure(function(# Stage replicates data
1604 e5603c3f Florent Chuffart
### This function loads in memory the data corresponding to the given experiments.
1605 e5603c3f Florent Chuffart
expe, ##<< a list of vectors corresponding to replicates.
1606 61914235 Florent Chuffart
roi, ##<< the region that we are interested in.
1607 61914235 Florent Chuffart
only_fetch=FALSE, ##<< filter or not inputs.
1608 61914235 Florent Chuffart
get_genome=FALSE,##<< Load or not corresponding genome.
1609 61914235 Florent Chuffart
all_samples, ##<< Global list of samples.
1610 61914235 Florent Chuffart
config=NULL ##<< GLOBAL config variable.
1611 61914235 Florent Chuffart
) {
1612 61914235 Florent Chuffart
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1613 61914235 Florent Chuffart
  	samples=list()
1614 61914235 Florent Chuffart
  	for (i in samples_ids) {
1615 61914235 Florent Chuffart
  		sample = as.list(all_samples[all_samples$id==i,])
1616 a0b91fee Florent Chuffart
      sample$orig_roi = roi
1617 4016229d Florent Chuffart
      sample$roi = translate_cur(roi, sample$strain, config = config)
1618 61914235 Florent Chuffart
  		if (get_genome) {
1619 61914235 Florent Chuffart
  			# Get Genome
1620 61914235 Florent Chuffart
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1621 5bfac5a3 Florent Chuffart
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1622 61914235 Florent Chuffart
  		}
1623 61914235 Florent Chuffart
  		# Get inputs
1624 61914235 Florent Chuffart
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1625 55c1cdff Florent Chuffart
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE)
1626 5bfac5a3 Florent Chuffart
  		sample$total_reads = sum(sample$inputs[,4])
1627 61914235 Florent Chuffart
  		if (!only_fetch) {
1628 b8a95426 Florent Chuffart
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
1629 61914235 Florent Chuffart
  	  }
1630 61914235 Florent Chuffart
  	  # Get TF outputs for Mnase_Seq samples
1631 5bfac5a3 Florent Chuffart
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {
1632 61914235 Florent Chuffart
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1633 61914235 Florent Chuffart
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1634 61914235 Florent Chuffart
  			if (!only_fetch) {
1635 61914235 Florent Chuffart
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1636 61914235 Florent Chuffart
    		}
1637 61914235 Florent Chuffart
  		}
1638 5bfac5a3 Florent Chuffart
  		samples[[length(samples) + 1]] = sample
1639 61914235 Florent Chuffart
  	}
1640 61914235 Florent Chuffart
  	return(samples)
1641 61914235 Florent Chuffart
  }
1642 61914235 Florent Chuffart
	replicates = list()
1643 61914235 Florent Chuffart
	for(samples_ids in expe) {
1644 61914235 Florent Chuffart
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1645 61914235 Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
1646 61914235 Florent Chuffart
	}
1647 61914235 Florent Chuffart
	return(replicates)
1648 61914235 Florent Chuffart
  }, ex = function() {
1649 61914235 Florent Chuffart
    # library(rjson)
1650 61914235 Florent Chuffart
    # library(nucleominer)
1651 55c1cdff Florent Chuffart
    #
1652 61914235 Florent Chuffart
    # # Read config file
1653 e5603c3f Florent Chuffart
    # json_conf_file = "nucleominer_config.json"
1654 61914235 Florent Chuffart
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1655 61914235 Florent Chuffart
    # # Read sample file
1656 55c1cdff Florent Chuffart
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
1657 61914235 Florent Chuffart
    # # here are the sample ids in a list
1658 61914235 Florent Chuffart
    # expes = list(c(1))
1659 61914235 Florent Chuffart
    # # here is the region that we wnt to see the coverage
1660 55c1cdff Florent Chuffart
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY")
1661 61914235 Florent Chuffart
    # # it displays the corverage
1662 61914235 Florent Chuffart
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1663 55c1cdff Florent Chuffart
    # out = watch_samples(replicates, config$READ_LENGTH,
1664 55c1cdff Florent Chuffart
    #       plot_coverage = TRUE,
1665 55c1cdff Florent Chuffart
    #       plot_squared_reads = FALSE,
1666 55c1cdff Florent Chuffart
    #       plot_ref_genome = FALSE,
1667 55c1cdff Florent Chuffart
    #       plot_arrow_raw_reads = FALSE,
1668 55c1cdff Florent Chuffart
    #       plot_arrow_nuc_reads = FALSE,
1669 55c1cdff Florent Chuffart
    #       plot_gaussian_reads = FALSE,
1670 55c1cdff Florent Chuffart
    #       plot_gaussian_unified_reads = FALSE,
1671 55c1cdff Florent Chuffart
    #       plot_ellipse_nucs = FALSE,
1672 55c1cdff Florent Chuffart
    #       plot_wp_nucs = FALSE,
1673 55c1cdff Florent Chuffart
    #       plot_wp_nuc_model = FALSE,
1674 55c1cdff Florent Chuffart
    #       plot_common_nucs = FALSE,
1675 61914235 Florent Chuffart
    #       height = 50)
1676 61914235 Florent Chuffart
  })
1677 61914235 Florent Chuffart
1678 61914235 Florent Chuffart
1679 61914235 Florent Chuffart
1680 61914235 Florent Chuffart
1681 61914235 Florent Chuffart
1682 61914235 Florent Chuffart
1683 61914235 Florent Chuffart
1684 61914235 Florent Chuffart
1685 61914235 Florent Chuffart
1686 61914235 Florent Chuffart
1687 61914235 Florent Chuffart
1688 61914235 Florent Chuffart
1689 61914235 Florent Chuffart
1690 61914235 Florent Chuffart
1691 61914235 Florent Chuffart
1692 61914235 Florent Chuffart
1693 61914235 Florent Chuffart
1694 61914235 Florent Chuffart
1695 61914235 Florent Chuffart
1696 61914235 Florent Chuffart
1697 61914235 Florent Chuffart
1698 61914235 Florent Chuffart
1699 61914235 Florent Chuffart
1700 61914235 Florent Chuffart
1701 61914235 Florent Chuffart
1702 61914235 Florent Chuffart
1703 61914235 Florent Chuffart
1704 61914235 Florent Chuffart
1705 61914235 Florent Chuffart
1706 61914235 Florent Chuffart
1707 61914235 Florent Chuffart
1708 61914235 Florent Chuffart
1709 61914235 Florent Chuffart
1710 1d833b97 Florent Chuffart
1711 1d833b97 Florent Chuffart
1712 1d833b97 Florent Chuffart
1713 1d833b97 Florent Chuffart
1714 1d833b97 Florent Chuffart
1715 1d833b97 Florent Chuffart
1716 1d833b97 Florent Chuffart
1717 1d833b97 Florent Chuffart
1718 1d833b97 Florent Chuffart
1719 1d833b97 Florent Chuffart
1720 1d833b97 Florent Chuffart
1721 1d833b97 Florent Chuffart
1722 1d833b97 Florent Chuffart
1723 1d833b97 Florent Chuffart
1724 1d833b97 Florent Chuffart
1725 1d833b97 Florent Chuffart
1726 1d833b97 Florent Chuffart
1727 1d833b97 Florent Chuffart
1728 1d833b97 Florent Chuffart
1729 1d833b97 Florent Chuffart
1730 1d833b97 Florent Chuffart
1731 1d833b97 Florent Chuffart
1732 1d833b97 Florent Chuffart
1733 1d833b97 Florent Chuffart
1734 1d833b97 Florent Chuffart
1735 1d833b97 Florent Chuffart
1736 1d833b97 Florent Chuffart
1737 1d833b97 Florent Chuffart
1738 1d833b97 Florent Chuffart
1739 1d833b97 Florent Chuffart
1740 1d833b97 Florent Chuffart
1741 1d833b97 Florent Chuffart
1742 1d833b97 Florent Chuffart
1743 1d833b97 Florent Chuffart
1744 6b5a528c Florent Chuffart
1745 6b5a528c Florent Chuffart
1746 6b5a528c Florent Chuffart
1747 6b5a528c Florent Chuffart
1748 6b5a528c Florent Chuffart
1749 6b5a528c Florent Chuffart
1750 6b5a528c Florent Chuffart
1751 6b5a528c Florent Chuffart
1752 6b5a528c Florent Chuffart
1753 6b5a528c Florent Chuffart
1754 6b5a528c Florent Chuffart
1755 6b5a528c Florent Chuffart
1756 6b5a528c Florent Chuffart
1757 6b5a528c Florent Chuffart
1758 6b5a528c Florent Chuffart
1759 6b5a528c Florent Chuffart
1760 55c1cdff Florent Chuffart
watch_samples = function(# Watching analysis of samples
1761 6b5a528c Florent Chuffart
### This function allows to view analysis for a particuler region of the genome.
1762 6b5a528c Florent Chuffart
replicates, ##<< replicates under the form...
1763 6b5a528c Florent Chuffart
read_length, ##<< length of the reads
1764 6b5a528c Florent Chuffart
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
1765 6b5a528c Florent Chuffart
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
1766 6b5a528c Florent Chuffart
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
1767 6b5a528c Florent Chuffart
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
1768 6b5a528c Florent Chuffart
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
1769 6b5a528c Florent Chuffart
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
1770 6b5a528c Florent Chuffart
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
1771 6b5a528c Florent Chuffart
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
1772 7646593d Florent Chuffart
change_col = TRUE, ##<< Change the color of each nucleosome.
1773 6b5a528c Florent Chuffart
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
1774 9fdbfada Florent Chuffart
plot_fuzzy_nucs = TRUE,  ##<< Plot (or not) cluster of fuzzy
1775 6b5a528c Florent Chuffart
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
1776 9fdbfada Florent Chuffart
plot_common_nucs = FALSE,  ##<< Plot (or not) aligned reads.
1777 9fdbfada Florent Chuffart
plot_common_unrs = FALSE,  ##<< Plot (or not) unaligned nucleosomal refgions (UNRs).
1778 6b5a528c Florent Chuffart
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
1779 729c934e Florent Chuffart
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
1780 6e0010bc Florent Chuffart
plot_sample_id = FALSE, ##<<  Plot (or not) the sample id for each sample.
1781 55c1cdff Florent Chuffart
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
1782 6b5a528c Florent Chuffart
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
1783 1d833b97 Florent Chuffart
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1784 6e0010bc Florent Chuffart
main=NULL, ##<< main title of the produced plot
1785 6e0010bc Florent Chuffart
xlab=NULL, ##<< xlab of the produced plot
1786 6e0010bc Florent Chuffart
ylab="#reads (per million reads)", ##<< ylab of the produced plot
1787 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1788 6b5a528c Florent Chuffart
){
1789 6b5a528c Florent Chuffart
  returned_list = list()
1790 6b5a528c Florent Chuffart
  # Computing global display parameters
1791 6b5a528c Florent Chuffart
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
1792 6b5a528c Florent Chuffart
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
1793 6b5a528c Florent Chuffart
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
1794 6b5a528c Florent Chuffart
  } else {
1795 6b5a528c Florent Chuffart
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
1796 6b5a528c Florent Chuffart
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
1797 6b5a528c Florent Chuffart
  }
1798 6b5a528c Florent Chuffart
	base_glo = 0
1799 6b5a528c Florent Chuffart
	nb_rank_glo = 0
1800 6b5a528c Florent Chuffart
  for (samples in replicates) {
1801 6b5a528c Florent Chuffart
  	nb_rank_glo = nb_rank_glo + length(samples)
1802 6b5a528c Florent Chuffart
  }
1803 6b5a528c Florent Chuffart
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
1804 6b5a528c Florent Chuffart
	y_min_glo = min(ylim_glo)
1805 6b5a528c Florent Chuffart
	y_max_glo = max(ylim_glo)
1806 6b5a528c Florent Chuffart
  delta_y_glo = y_max_glo - y_min_glo
1807 6b5a528c Florent Chuffart
  # Plot main frame
1808 6e0010bc Florent Chuffart
  if (is.null(xlab)) {
1809 6e0010bc Florent Chuffart
    xlab = paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr)
1810 6e0010bc Florent Chuffart
  }
1811 6e0010bc Florent Chuffart
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab=ylab, xlab=xlab, main=main )
1812 6b5a528c Florent Chuffart
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
1813 6b5a528c Florent Chuffart
  # Go
1814 6b5a528c Florent Chuffart
	replicates_wp_nucs = list()
1815 9fdbfada Florent Chuffart
  wp_maps = list()
1816 9fdbfada Florent Chuffart
  fuzzy_maps = list()
1817 6b5a528c Florent Chuffart
  for (replicate_rank in 1:length(replicates)) {
1818 6b5a528c Florent Chuffart
		# Computing replicate parameters
1819 6b5a528c Florent Chuffart
		nb_rank = length(samples)
1820 6b5a528c Florent Chuffart
		base = (replicate_rank-1) * height * nb_rank
1821 6b5a528c Florent Chuffart
		ylim = c(base, base + height * nb_rank)
1822 6b5a528c Florent Chuffart
		y_min = min(ylim)
1823 6b5a528c Florent Chuffart
		y_max = max(ylim)
1824 6b5a528c Florent Chuffart
	  delta_y = y_max - y_min
1825 6b5a528c Florent Chuffart
		samples = replicates[[replicate_rank]]
1826 6b5a528c Florent Chuffart
		for (sample_rank in 1:length(samples)) {
1827 6b5a528c Florent Chuffart
			# computing sample parameters
1828 6b5a528c Florent Chuffart
			sample = samples[[sample_rank]]
1829 6b5a528c Florent Chuffart
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
1830 6e0010bc Florent Chuffart
      if (plot_sample_id) {
1831 6e0010bc Florent Chuffart
  			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
1832 6e0010bc Florent Chuffart
      }
1833 6b5a528c Florent Chuffart
1834 6b5a528c Florent Chuffart
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
1835 6b5a528c Florent Chuffart
			  x_min = sample$roi[["begin"]]
1836 6b5a528c Florent Chuffart
			  x_max = sample$roi[["end"]]
1837 6b5a528c Florent Chuffart
		  } else {
1838 6b5a528c Florent Chuffart
			  x_min = - sample$roi[["begin"]]
1839 6b5a528c Florent Chuffart
			  x_max = - sample$roi[["end"]]
1840 5bfac5a3 Florent Chuffart
		  }
1841 6b5a528c Florent Chuffart
			shift = x_min_glo - x_min
1842 6b5a528c Florent Chuffart
	    # Plot Genome seq
1843 6b5a528c Florent Chuffart
			if (plot_ref_genome) {
1844 6b5a528c Florent Chuffart
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
1845 6b5a528c Florent Chuffart
		  }
1846 6b5a528c Florent Chuffart
			# Plot reads
1847 6b5a528c Florent Chuffart
			reads = sample$inputs
1848 5bfac5a3 Florent Chuffart
			signs = sign_from_strand(reads[,3])
1849 6b5a528c Florent Chuffart
			if (plot_arrow_raw_reads) {
1850 55c1cdff Florent Chuffart
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev,
1851 5bfac5a3 Florent Chuffart
				col=1, length=0.15/nb_rank)
1852 6b5a528c Florent Chuffart
			}
1853 5bfac5a3 Florent Chuffart
	    if (plot_squared_reads) {
1854 6b5a528c Florent Chuffart
        # require(plotrix)
1855 6b5a528c Florent Chuffart
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
1856 6b5a528c Florent Chuffart
			}
1857 5bfac5a3 Florent Chuffart
	    if (plot_coverage) {
1858 a52f85d7 Florent Chuffart
        if (length(reads[,1]) != 0) {
1859 a52f85d7 Florent Chuffart
          step_h = sign(x_min) * signs * reads[,4]
1860 a52f85d7 Florent Chuffart
          step_b = sign(x_min) * reads[,2] + shift
1861 a52f85d7 Florent Chuffart
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
1862 55c1cdff Florent Chuffart
          steps_x = min(step_b, step_e):max(step_b, step_e)
1863 a52f85d7 Florent Chuffart
          steps_y = rep(0, length(steps_x))
1864 55c1cdff Florent Chuffart
          for (i in 1:length(step_h)) {
1865 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
1866 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
1867 a52f85d7 Florent Chuffart
          }
1868 a52f85d7 Florent Chuffart
          tmp_index = which(steps_y != 0)
1869 a52f85d7 Florent Chuffart
          steps_x = steps_x[tmp_index]
1870 a52f85d7 Florent Chuffart
          steps_y = steps_y[tmp_index]
1871 a52f85d7 Florent Chuffart
          tmp_current_level = 0
1872 55c1cdff Florent Chuffart
          for (i in 1:length(steps_y)) {
1873 a52f85d7 Florent Chuffart
            steps_y[i] = tmp_current_level + steps_y[i]
1874 a52f85d7 Florent Chuffart
            tmp_current_level = steps_y[i]
1875 a52f85d7 Florent Chuffart
          }
1876 a52f85d7 Florent Chuffart
          steps_y = c(0, steps_y)
1877 a52f85d7 Florent Chuffart
          steps_y = steps_y * 1000000/sample$total_reads
1878 a52f85d7 Florent Chuffart
        } else {
1879 a52f85d7 Florent Chuffart
          steps_y = c(0, 0, 0)
1880 1e310114 Florent Chuffart
          steps_x = c(x_min, x_max)
1881 6b5a528c Florent Chuffart
        }
1882 0d1475ac Florent Chuffart
        # print(steps_x)
1883 0d1475ac Florent Chuffart
        # print(steps_y)
1884 6b5a528c Florent Chuffart
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
1885 6b5a528c Florent Chuffart
        abline(y_lev,0)
1886 6b5a528c Florent Chuffart
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
1887 6b5a528c Florent Chuffart
			}
1888 6b5a528c Florent Chuffart
			# Plot nucs
1889 6b5a528c Florent Chuffart
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
1890 6b5a528c Florent Chuffart
				nucs = sample$outputs
1891 6b5a528c Florent Chuffart
				if (length(nucs$center) > 0) {
1892 6b5a528c Florent Chuffart
					col = 1
1893 6b5a528c Florent Chuffart
		      for (i in 1:length(nucs$center)) {
1894 7646593d Florent Chuffart
            if (change_col) {
1895 55c1cdff Florent Chuffart
  						col = col + 1
1896 6e0010bc Florent Chuffart
              } else {
1897 6e0010bc Florent Chuffart
                col = "blue"
1898 6e0010bc Florent Chuffart
              }
1899 6b5a528c Florent Chuffart
		        nuc = nucs[i,]
1900 6b5a528c Florent Chuffart
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
1901 6b5a528c Florent Chuffart
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
1902 55c1cdff Florent Chuffart
						total_involved_reads = sum(involved_reads[,4])
1903 6b5a528c Florent Chuffart
						if (plot_arrow_nuc_reads ) {
1904 55c1cdff Florent Chuffart
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev,
1905 5bfac5a3 Florent Chuffart
							col=col, length=0.15/nb_rank)
1906 6b5a528c Florent Chuffart
						}
1907 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
1908 6b5a528c Florent Chuffart
  						flatted_reads = flat_reads(involved_reads, nuc$width)
1909 6b5a528c Florent Chuffart
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1910 6b5a528c Florent Chuffart
		  			}
1911 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads ) {
1912 6b5a528c Florent Chuffart
							flatted_reads = flat_reads(involved_reads, nuc$width)
1913 6b5a528c Florent Chuffart
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1914 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
1915 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
1916 6b5a528c Florent Chuffart
	          }
1917 6b5a528c Florent Chuffart
	          if (plot_gaussian_unified_reads ) {
1918 6e0010bc Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=1)
1919 6b5a528c Florent Chuffart
	          }
1920 6b5a528c Florent Chuffart
	          if (plot_ellipse_nucs) {
1921 6b5a528c Florent Chuffart
				      # require(plotrix)
1922 55c1cdff Florent Chuffart
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col)
1923 6b5a528c Florent Chuffart
						}
1924 6b5a528c Florent Chuffart
		      }
1925 6b5a528c Florent Chuffart
		    } else {
1926 6b5a528c Florent Chuffart
		      print("WARNING! No nucs to print.")
1927 6b5a528c Florent Chuffart
		    }
1928 6b5a528c Florent Chuffart
			}
1929 6b5a528c Florent Chuffart
	  }
1930 55c1cdff Florent Chuffart
1931 6b5a528c Florent Chuffart
	  # Plot wp nucs
1932 9fdbfada Florent Chuffart
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) {
1933 6b5a528c Florent Chuffart
			if (samples[[1]]$marker == "Mnase_Seq") {
1934 55c1cdff Florent Chuffart
				if (is.null(aggregated_intra_strain_nucs)) {
1935 5bfac5a3 Florent Chuffart
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
1936 6b5a528c Florent Chuffart
				} else {
1937 6b5a528c Florent Chuffart
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
1938 6b5a528c Florent Chuffart
				}
1939 6b5a528c Florent Chuffart
		  } else {
1940 6b5a528c Florent Chuffart
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
1941 6b5a528c Florent Chuffart
		  }
1942 729c934e Florent Chuffart
      if (plot_chain) {
1943 55c1cdff Florent Chuffart
        tf_nucs = lapply(wp_nucs, function(nuc) {
1944 729c934e Florent Chuffart
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
1945 729c934e Florent Chuffart
            tmp_nuc = tmp_nuc[[1]]
1946 729c934e Florent Chuffart
            tmp_nuc$inputs = NULL
1947 729c934e Florent Chuffart
            tmp_nuc$original_reads = NULL
1948 729c934e Florent Chuffart
            tmp_nuc$wp = nuc$wp
1949 729c934e Florent Chuffart
            # print(tmp_nuc)
1950 729c934e Florent Chuffart
            return(tmp_nuc)
1951 55c1cdff Florent Chuffart
          })
1952 dc58ee6c Florent Chuffart
          return(do.call(rbind, bar))
1953 dc58ee6c Florent Chuffart
        })
1954 729c934e Florent Chuffart
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
1955 55c1cdff Florent Chuffart
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2
1956 dc58ee6c Florent Chuffart
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
1957 dc58ee6c Florent Chuffart
        tmp_y_prev = tmp_y[-length(tmp_y)]
1958 dc58ee6c Florent Chuffart
        tmp_y_next = tmp_y[-1]
1959 dc58ee6c Florent Chuffart
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
1960 dc58ee6c Florent Chuffart
        tmp_track = unlist(tf_nucs$track)
1961 dc58ee6c Florent Chuffart
        tmp_track_prev = tmp_track[-length(tmp_track)]
1962 dc58ee6c Florent Chuffart
        tmp_track_next = tmp_track[-1]
1963 6e0010bc Florent Chuffart
        # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
1964 e5603c3f Florent Chuffart
        if (is.null(config$TRACK_LLR_OFFSET)) {
1965 e5603c3f Florent Chuffart
          config$TRACK_LLR_OFFSET = 0
1966 6e0010bc Florent Chuffart
        }
1967 e5603c3f Florent Chuffart
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LLR_OFFSET * 25
1968 dc58ee6c Florent Chuffart
        tmp_x_prev = tmp_x[-length(tmp_x)]
1969 dc58ee6c Florent Chuffart
        tmp_x_next = tmp_x[-1]
1970 dc58ee6c Florent Chuffart
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
1971 dc58ee6c Florent Chuffart
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
1972 7e2d37e1 Florent Chuffart
        tmp_llr_inter =signif(unlist(tf_nucs$llr_score)[-1], 2)
1973 55c1cdff Florent Chuffart
        new_tmp_x = c()
1974 55c1cdff Florent Chuffart
        new_tmp_y = c()
1975 dc58ee6c Florent Chuffart
        index_odd = 1:length(tmp_x) * 2 - 1
1976 55c1cdff Florent Chuffart
        index_even = (1:(length(tmp_x) - 1)) * 2
1977 55c1cdff Florent Chuffart
        new_tmp_x[index_odd] = tmp_x
1978 dc58ee6c Florent Chuffart
        new_tmp_y[index_odd] = tmp_y
1979 dc58ee6c Florent Chuffart
        new_tmp_x[index_even] = tmp_x_inter
1980 dc58ee6c Florent Chuffart
        new_tmp_y[index_even] = tmp_y_inter
1981 ec2936ea Florent Chuffart
        lines(new_tmp_x , new_tmp_y, lwd=2)
1982 dc58ee6c Florent Chuffart
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
1983 ec2936ea Florent Chuffart
        points(tmp_x, tmp_y, cex=4, lwd=2)
1984 dc58ee6c Florent Chuffart
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
1985 e5603c3f Florent Chuffart
        if (is.null(config$LEGEND_LLR_POS)) {
1986 6e0010bc Florent Chuffart
          pos = 2
1987 6e0010bc Florent Chuffart
        } else {
1988 e5603c3f Florent Chuffart
          pos = config$LEGEND_LLR_POS
1989 6e0010bc Florent Chuffart
        }
1990 7e2d37e1 Florent Chuffart
        col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
1991 7e2d37e1 Florent Chuffart
        text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) 
1992 37b11e9b Florent Chuffart
        
1993 729c934e Florent Chuffart
      }
1994 55c1cdff Florent Chuffart
1995 9fdbfada Florent Chuffart
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
1996 729c934e Florent Chuffart
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
1997 9fdbfada Florent Chuffart
        strain = samples[[1]]$strain
1998 d973538c Florent Chuffart
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo", nb_tracks)
1999 9fdbfada Florent Chuffart
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
2000 9fdbfada Florent Chuffart
2001 9fdbfada Florent Chuffart
        if (plot_fuzzy_nucs) {
2002 9fdbfada Florent Chuffart
          fuzzy_map = fuzzy_maps[[strain]]
2003 4016229d Florent Chuffart
          if (!is.null(fuzzy_map)) {
2004 4016229d Florent Chuffart
            if (nrow(fuzzy_map) > 0) {
2005 4016229d Florent Chuffart
              rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1)                    
2006 4016229d Florent Chuffart
            }
2007 d492bca4 Florent Chuffart
          }
2008 9fdbfada Florent Chuffart
        } 
2009 9fdbfada Florent Chuffart
2010 9fdbfada Florent Chuffart
        if (plot_wp_nucs) {
2011 9fdbfada Florent Chuffart
    			for (wp_nuc in wp_nucs) {
2012 9fdbfada Florent Chuffart
    				if (wp_nuc$wp){
2013 9fdbfada Florent Chuffart
    					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2014 9fdbfada Florent Chuffart
    					if (plot_wp_nuc_model) {
2015 9fdbfada Florent Chuffart
      					all_original_reads = c()
2016 9fdbfada Florent Chuffart
      					for(initial_nuc in wp_nuc$nucs) {
2017 9fdbfada Florent Chuffart
      						all_original_reads = c(all_original_reads, initial_nuc$original_reads)
2018 9fdbfada Florent Chuffart
      					}
2019 9fdbfada Florent Chuffart
      					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2020 9fdbfada Florent Chuffart
    					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
2021 9fdbfada Florent Chuffart
    				  }
2022 729c934e Florent Chuffart
  				  }
2023 729c934e Florent Chuffart
  				}
2024 729c934e Florent Chuffart
  			}
2025 729c934e Florent Chuffart
      }
2026 6b5a528c Florent Chuffart
		}
2027 5bfac5a3 Florent Chuffart
	}
2028 55c1cdff Florent Chuffart
2029 5bfac5a3 Florent Chuffart
	if (plot_common_nucs) {
2030 9fdbfada Florent Chuffart
    if (is.null(aligned_inter_strain_nucs)) {
2031 9fdbfada Florent Chuffart
      aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2032 b69443b5 Florent Chuffart
      if (!is.null(aligned_inter_strain_nucs)) {
2033 4016229d Florent Chuffart
        aligned_inter_strain_nucs$cur_index = "foo" 
2034 b69443b5 Florent Chuffart
      }
2035 9fdbfada Florent Chuffart
    }
2036 55c1cdff Florent Chuffart
2037 9fdbfada Florent Chuffart
    #Plot common wp nucs
2038 9fdbfada Florent Chuffart
    mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2039 9fdbfada Florent Chuffart
    for (replicate_rank in 1:length(replicates)) {
2040 9fdbfada Florent Chuffart
      nb_rank[[replicate_rank]] = length(samples)
2041 9fdbfada Florent Chuffart
      base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2042 9fdbfada Florent Chuffart
      ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2043 9fdbfada Florent Chuffart
      y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2044 9fdbfada Florent Chuffart
      y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2045 9fdbfada Florent Chuffart
      delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2046 9fdbfada Florent Chuffart
      mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2047 9fdbfada Florent Chuffart
2048 9fdbfada Florent Chuffart
      samples = replicates[[replicate_rank]]
2049 9fdbfada Florent Chuffart
      for (sample_rank in 1:length(samples)) {
2050 9fdbfada Florent Chuffart
        sample = samples[[sample_rank]]
2051 9fdbfada Florent Chuffart
        y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2052 9fdbfada Florent Chuffart
        if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2053 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = sample$roi[["begin"]]
2054 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = sample$roi[["end"]]
2055 9fdbfada Florent Chuffart
        } else {
2056 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = - sample$roi[["begin"]]
2057 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = - sample$roi[["end"]]
2058 6b5a528c Florent Chuffart
        }
2059 9fdbfada Florent Chuffart
        shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2060 55c1cdff Florent Chuffart
      }
2061 9fdbfada Florent Chuffart
    }
2062 b69443b5 Florent Chuffart
    print(aligned_inter_strain_nucs)
2063 b69443b5 Florent Chuffart
    if (!is.null(aligned_inter_strain_nucs)) {
2064 b69443b5 Florent Chuffart
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2065 b69443b5 Florent Chuffart
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2066 b69443b5 Florent Chuffart
        tmp_xs = tmp_ys = c()
2067 b69443b5 Florent Chuffart
        for (replicate_rank in 1:length(replicates)) {
2068 b69443b5 Florent Chuffart
          samples = replicates[[replicate_rank]]
2069 b69443b5 Florent Chuffart
          strain = samples[[1]]$strain
2070 b69443b5 Florent Chuffart
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2071 b69443b5 Florent Chuffart
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2072 b69443b5 Florent Chuffart
        }
2073 37b11e9b Florent Chuffart
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]])*8, cex=dev.size()[1]*400/(x_max[[1]]-x_min[[1]]), pch=19)
2074 55c1cdff Florent Chuffart
      }
2075 9fdbfada Florent Chuffart
    }
2076 9fdbfada Florent Chuffart
    
2077 9fdbfada Florent Chuffart
    if (plot_common_unrs) {
2078 9fdbfada Florent Chuffart
      combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain)
2079 9fdbfada Florent Chuffart
      roi = as.list(samples[[1]]$roi)
2080 4016229d Florent Chuffart
      cur_index = "foo"
2081 9fdbfada Florent Chuffart
      common_nuc_results = list()
2082 9fdbfada Florent Chuffart
      common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs
2083 4016229d Florent Chuffart
      unrs = get_unrs(combi, roi, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) 
2084 ec2936ea Florent Chuffart
      rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], border=4, lwd=10, col=adjustcolor(4, alpha.f = 0.05))        
2085 9fdbfada Florent Chuffart
    }
2086 9fdbfada Florent Chuffart
2087 6b5a528c Florent Chuffart
	}
2088 6b5a528c Florent Chuffart
  return(returned_list)
2089 6b5a528c Florent Chuffart
}
2090 6b5a528c Florent Chuffart
2091 6b5a528c Florent Chuffart
2092 6b5a528c Florent Chuffart
2093 9fdbfada Florent Chuffart
get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain.
2094 9fdbfada Florent Chuffart
### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions.
2095 0a13b5e3 Florent Chuffart
wp_map, ##<< Well positionned nucleosomes map.
2096 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2097 9fdbfada Florent Chuffart
strain, ##<< The strain we want to extracvt the fuzzy map.
2098 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable.
2099 9fdbfada Florent Chuffart
) {
2100 9fdbfada Florent Chuffart
  fuzzy_map = wp_map[wp_map$wp==0, ]
2101 9fdbfada Florent Chuffart
  if (nrow(fuzzy_map) > 0) {
2102 9fdbfada Florent Chuffart
    fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,])
2103 9fdbfada Florent Chuffart
    if (!is.null(fuzzy_map)) {
2104 9fdbfada Florent Chuffart
      fuzzy_map = union_regions(fuzzy_map)
2105 9fdbfada Florent Chuffart
      fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config)
2106 9fdbfada Florent Chuffart
    }
2107 9fdbfada Florent Chuffart
  }
2108 9fdbfada Florent Chuffart
  return(fuzzy_map)
2109 9fdbfada Florent Chuffart
}
2110 9fdbfada Florent Chuffart
2111 9fdbfada Florent Chuffart
2112 9fdbfada Florent Chuffart
2113 9fdbfada Florent Chuffart
2114 9fdbfada Florent Chuffart
2115 9fdbfada Florent Chuffart
get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs).
2116 9fdbfada Florent Chuffart
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline.
2117 9fdbfada Florent Chuffart
combi, ##<< The strain combination to consider.
2118 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2119 4016229d Florent Chuffart
cur_index, ##<< The region of interest index.
2120 9fdbfada Florent Chuffart
wp_maps, ##<< Well positionned nucleosomes maps.
2121 9fdbfada Florent Chuffart
fuzzy_maps, ##<< Fuzzy nucleosomes maps.
2122 9fdbfada Florent Chuffart
common_nuc_results, ##<< Common wp nuc maps
2123 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable
2124 9fdbfada Florent Chuffart
) {
2125 4016229d Florent Chuffart
  # print(cur_index)
2126 9fdbfada Florent Chuffart
2127 9fdbfada Florent Chuffart
  tmp_combi_key = paste(combi[1], combi[2], sep="_")
2128 9fdbfada Florent Chuffart
  tmp_common_nucs = common_nuc_results[[tmp_combi_key]]
2129 4016229d Florent Chuffart
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$cur_index == cur_index, ]
2130 9fdbfada Florent Chuffart
2131 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from", combi[1]))
2132 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[1]]]
2133 4016229d Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2134 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2135 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2136 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2137 9fdbfada Florent Chuffart
  # Let's go!
2138 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2139 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2140 9fdbfada Florent Chuffart
      return(NULL)
2141 9fdbfada Florent Chuffart
    } else {
2142 9fdbfada Florent Chuffart
      return (index_nuc)
2143 9fdbfada Florent Chuffart
    }
2144 9fdbfada Florent Chuffart
  }))
2145 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2146 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2147 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2148 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2149 9fdbfada Florent Chuffart
  }
2150 9fdbfada Florent Chuffart
  tmp_unr_nucs_1 = tmp_unr
2151 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)}
2152 9fdbfada Florent Chuffart
  agg_unr_1 = tmp_unr_nucs_1
2153 9fdbfada Florent Chuffart
2154 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from ", combi[2]))
2155 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[2]]]
2156 4016229d Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2157 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2158 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2159 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2160 9fdbfada Florent Chuffart
  # Let's go!
2161 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2162 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2163 9fdbfada Florent Chuffart
      return(NULL)
2164 9fdbfada Florent Chuffart
    } else {
2165 9fdbfada Florent Chuffart
      return (index_nuc)
2166 9fdbfada Florent Chuffart
    }
2167 9fdbfada Florent Chuffart
  }))
2168 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2169 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2170 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2171 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2172 9fdbfada Florent Chuffart
  }
2173 9fdbfada Florent Chuffart
  tmp_unr_nucs_2 = tmp_unr
2174 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)}
2175 9fdbfada Florent Chuffart
  agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config)
2176 4016229d Florent Chuffart
  tr_agg_unr_2 = translate_regions(agg_unr_2, combi, cur_index, roi=roi, config=config)
2177 9fdbfada Florent Chuffart
  tr_agg_unr_2 = union_regions(tr_agg_unr_2)
2178 9fdbfada Florent Chuffart
2179 f0703205 Florent Chuffart
  # print("Dealing with unr from both...")
2180 9fdbfada Florent Chuffart
  all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2))
2181 9fdbfada Florent Chuffart
2182 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[1]))
2183 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2184 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2185 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2186 9fdbfada Florent Chuffart
  # Let's go!
2187 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2188 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2189 9fdbfada Florent Chuffart
      return (index_nuc)
2190 9fdbfada Florent Chuffart
    } else {
2191 9fdbfada Florent Chuffart
      return(NULL)
2192 9fdbfada Florent Chuffart
    }
2193 9fdbfada Florent Chuffart
  }))
2194 9fdbfada Florent Chuffart
  wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2195 9fdbfada Florent Chuffart
  
2196 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[2]))
2197 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2198 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2199 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2200 9fdbfada Florent Chuffart
  # Let's go!
2201 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2202 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2203 9fdbfada Florent Chuffart
      return (index_nuc)
2204 9fdbfada Florent Chuffart
    } else {
2205 9fdbfada Florent Chuffart
      return(NULL)
2206 9fdbfada Florent Chuffart
    }
2207 9fdbfada Florent Chuffart
  }))
2208 9fdbfada Florent Chuffart
  wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2209 9fdbfada Florent Chuffart
  wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config)
2210 9fdbfada Florent Chuffart
  if (nrow(wp_nucs_2) == 0) {
2211 9fdbfada Florent Chuffart
    tr_wp_nucs_2 = wp_nucs_2
2212 9fdbfada Florent Chuffart
  } else {
2213 4016229d Florent Chuffart
    tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, cur_index, roi=roi, config=config)      
2214 9fdbfada Florent Chuffart
  }
2215 9fdbfada Florent Chuffart
2216 f0703205 Florent Chuffart
  # print("Dealing with wp from both...")
2217 9fdbfada Florent Chuffart
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
2218 9fdbfada Florent Chuffart
2219 f0703205 Florent Chuffart
  # print("Dealing with unr and wp...")
2220 9fdbfada Florent Chuffart
  non_inter_unr = substract_region(all_unr, all_wp)
2221 9fdbfada Florent Chuffart
  non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config)
2222 9fdbfada Florent Chuffart
  if (is.null(non_inter_unr)) { return(NULL) }
2223 9fdbfada Florent Chuffart
  non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound
2224 37b11e9b Florent Chuffart
  min_unr_width = 75
2225 37b11e9b Florent Chuffart
  non_inter_unr = non_inter_unr[non_inter_unr$len >= min_unr_width,]
2226 9fdbfada Florent Chuffart
2227 9fdbfada Florent Chuffart
  non_inter_unr$index_nuc = 1:length(non_inter_unr[,1])
2228 9fdbfada Florent Chuffart
  return (non_inter_unr)
2229 9fdbfada Florent Chuffart
}
2230 9fdbfada Florent Chuffart
2231 9fdbfada Florent Chuffart