Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ cc54d805

Historique | Voir | Annoter | Télécharger (87,29 ko)

1 6b5a528c Florent Chuffart
FDR = structure(function#  False Discovery Rate
2 6b5a528c Florent Chuffart
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
3 55c1cdff Florent Chuffart
##author<< Gael Yvert,
4 6b5a528c Florent Chuffart
(
5 6b5a528c Florent Chuffart
x, ##<< A vector x of independent p-values.
6 6b5a528c Florent Chuffart
FDR ##<< The specified FDR.
7 6b5a528c Florent Chuffart
) {
8 6b5a528c Florent Chuffart
  x <- sort(na.omit(x))
9 6b5a528c Florent Chuffart
  N = length(x)
10 6b5a528c Florent Chuffart
  i = 1;
11 6b5a528c Florent Chuffart
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
12 6b5a528c Florent Chuffart
  if (i == 1)
13 6b5a528c Florent Chuffart
    return (NA)
14 6b5a528c Florent Chuffart
  else
15 6b5a528c Florent Chuffart
    return( x[i-1] )
16 6b5a528c Florent Chuffart
### Return the the corresponding cutoff.
17 6b5a528c Florent Chuffart
}, ex=function(){
18 6b5a528c Florent Chuffart
  print("example")
19 6b5a528c Florent Chuffart
})
20 6b5a528c Florent Chuffart
21 6b5a528c Florent Chuffart
22 6b5a528c Florent Chuffart
23 7e2d37e1 Florent Chuffart
llr_score_nvecs = structure(function # Likelihood ratio
24 7e2d37e1 Florent Chuffart
### Compute the log likelihood ratio of two or more set of value.
25 6b5a528c Florent Chuffart
(
26 7e2d37e1 Florent Chuffart
  xs ##<< list of vectors.
27 6b5a528c Florent Chuffart
) {
28 7e2d37e1 Florent Chuffart
  l = length(xs)
29 7e2d37e1 Florent Chuffart
  if (l < 1) {
30 7e2d37e1 Florent Chuffart
    return(NA)
31 7e2d37e1 Florent Chuffart
  }
32 7e2d37e1 Florent Chuffart
  if (l == 1) {
33 7e2d37e1 Florent Chuffart
    return(1)
34 7e2d37e1 Florent Chuffart
  }
35 7e2d37e1 Florent Chuffart
  sumllX = 0
36 7e2d37e1 Florent Chuffart
  for (i in 1:l) {
37 7e2d37e1 Florent Chuffart
    x = xs[[i]]
38 7e2d37e1 Florent Chuffart
  	if (length(x) <= 1) {
39 7e2d37e1 Florent Chuffart
  		return(NA)
40 7e2d37e1 Florent Chuffart
  	}
41 7e2d37e1 Florent Chuffart
    meanX = mean(x)
42 7e2d37e1 Florent Chuffart
    sdX = sd(x)
43 7e2d37e1 Florent Chuffart
    llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
44 7e2d37e1 Florent Chuffart
    sumllX = sumllX + llX
45 7e2d37e1 Florent Chuffart
  }
46 7e2d37e1 Florent Chuffart
  meanXglo = mean(unlist(xs))
47 7e2d37e1 Florent Chuffart
  sdXglo = sd(unlist(xs))
48 7e2d37e1 Florent Chuffart
  llXYZ = sum(log(dnorm(unlist(xs),mean=meanXglo,sd=sdXglo)))
49 7e2d37e1 Florent Chuffart
  ratio = sumllX - llXYZ
50 6b5a528c Florent Chuffart
  return(ratio)
51 7e2d37e1 Florent Chuffart
### Returns the log likelihood ratio.
52 6b5a528c Florent Chuffart
}, ex=function(){
53 e5603c3f Florent Chuffart
  # LLR score for 2 set of values
54 6b5a528c Florent Chuffart
  mean1=5; sd1=2; card2 = 250
55 6b5a528c Florent Chuffart
  mean2=6; sd2=3; card1 = 200
56 6b5a528c Florent Chuffart
  x1 = rnorm(card1, mean1, sd1)
57 55c1cdff Florent Chuffart
  x2 = rnorm(card2, mean2, sd2)
58 6b5a528c Florent Chuffart
  min = floor(min(c(x1,x2)))
59 6b5a528c Florent Chuffart
  max = ceiling(max(c(x1,x2)))
60 6b5a528c Florent Chuffart
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
61 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
62 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
63 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
64 7e2d37e1 Florent Chuffart
  llr_score_nvecs(list(x1,x2))
65 6b5a528c Florent Chuffart
 })
66 6b5a528c Florent Chuffart
67 6b5a528c Florent Chuffart
dfadd = structure(function# Adding list to a dataframe.
68 6b5a528c Florent Chuffart
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
69 6b5a528c Florent Chuffart
	(df, ##<<  A dataframe
70 6b5a528c Florent Chuffart
		l ##<<  A list
71 6b5a528c Florent Chuffart
	) {
72 6b5a528c Florent Chuffart
  if (is.null(df)) {
73 6b5a528c Florent Chuffart
    df = data.frame(l,stringsAsFactors=FALSE)
74 6b5a528c Florent Chuffart
  } else {
75 6b5a528c Florent Chuffart
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
76 6b5a528c Florent Chuffart
  }
77 6b5a528c Florent Chuffart
  return(df)
78 6b5a528c Florent Chuffart
### Return the dataframe \emph{df}.
79 6b5a528c Florent Chuffart
}, ex=function(){
80 6b5a528c Florent Chuffart
		## Here dataframe is NULL
81 6b5a528c Florent Chuffart
		print(df)
82 6b5a528c Florent Chuffart
		df = NULL
83 5bfac5a3 Florent Chuffart
84 6b5a528c Florent Chuffart
		# Initialize df
85 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
86 6b5a528c Florent Chuffart
		print(df)
87 5bfac5a3 Florent Chuffart
88 6b5a528c Florent Chuffart
		# Adding elements to df
89 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
90 6b5a528c Florent Chuffart
		print(df)
91 6b5a528c Florent Chuffart
})
92 6b5a528c Florent Chuffart
93 6b5a528c Florent Chuffart
94 6b5a528c Florent Chuffart
sign_from_strand = function(
95 55c1cdff Florent Chuffart
### Get the sign of strand
96 6b5a528c Florent Chuffart
strands) {
97 6b5a528c Florent Chuffart
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
98 6b5a528c Florent Chuffart
### If strand in forward then returns 1 else returns -1
99 6b5a528c Florent Chuffart
}
100 6b5a528c Florent Chuffart
101 6b5a528c Florent Chuffart
flat_reads = function(
102 6b5a528c Florent Chuffart
### Extract reads coordinates from TempleteFilter input sequence
103 55c1cdff Florent Chuffart
reads, ##<< TemplateFilter input reads
104 6b5a528c Florent Chuffart
nuc_width ##<< Width used to shift F and R reads.
105 6b5a528c Florent Chuffart
) {
106 6b5a528c Florent Chuffart
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
107 6b5a528c Florent Chuffart
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
108 6b5a528c Florent Chuffart
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
109 6b5a528c Florent Chuffart
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
110 6b5a528c Florent Chuffart
### Returns a list of F reads, R reads and joint/shifted F and R reads.
111 6b5a528c Florent Chuffart
}
112 6b5a528c Florent Chuffart
113 6b5a528c Florent Chuffart
114 6b5a528c Florent Chuffart
115 6b5a528c Florent Chuffart
filter_tf_outputs = function(# Filter TemplateFilter outputs
116 ec2936ea Florent Chuffart
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlapping threshold.
117 6b5a528c Florent Chuffart
tf_outputs, ##<< TemplateFilter outputs.
118 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
119 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
120 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
121 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
122 6b5a528c Florent Chuffart
ol_bp = 59, ##<< Overlap Threshold.
123 6b5a528c Florent Chuffart
corr_thres = 0.5 ##<< Correlation threshold.
124 6b5a528c Florent Chuffart
) {
125 6b5a528c Florent Chuffart
  if (x_min < 0) {
126 0d5fbf02 Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= -x_max & tf_outputs$center + tf_outputs$width/2 <=  -x_min,]
127 6b5a528c Florent Chuffart
	} else {
128 0d5fbf02 Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= x_min & tf_outputs$center + tf_outputs$width/2 <= x_max,]
129 6b5a528c Florent Chuffart
  }
130 6b5a528c Florent Chuffart
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
131 6b5a528c Florent Chuffart
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
132 7e2d37e1 Florent Chuffart
  tf_outputs = tf_outputs[tf_outputs$correlation.score >= corr_thres,]
133 7e2d37e1 Florent Chuffart
  tf_outputs = tf_outputs[order(tf_outputs$correlation.score, decreasing=TRUE),]
134 6b5a528c Florent Chuffart
  i = 1
135 55c1cdff Florent Chuffart
  while (i <= length(tf_outputs[,1])) {
136 7e2d37e1 Florent Chuffart
    lb = tf_outputs[i,]$lower_bound
137 7e2d37e1 Florent Chuffart
    ub = tf_outputs[i,]$upper_bound
138 7e2d37e1 Florent Chuffart
    tf_outputs = tf_outputs[!(tf_outputs$lower_bound <= (ub-ol_bp) & tf_outputs$upper_bound > ub) & !(tf_outputs$upper_bound >= (lb+ol_bp) & tf_outputs$lower_bound < lb),]
139 6b5a528c Florent Chuffart
    i = i+1
140 6b5a528c Florent Chuffart
  }
141 6b5a528c Florent Chuffart
  return(tf_outputs)
142 6b5a528c Florent Chuffart
### Returns filtered TemplateFilter Outputs
143 6b5a528c Florent Chuffart
}
144 6b5a528c Florent Chuffart
145 6b5a528c Florent Chuffart
146 6b5a528c Florent Chuffart
147 6b5a528c Florent Chuffart
filter_tf_inputs = function(# Filter TemplateFilter inputs
148 6b5a528c Florent Chuffart
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
149 6b5a528c Florent Chuffart
inputs, ##<< TF inputs to be filtered.
150 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
151 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
152 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
153 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
154 6b5a528c Florent Chuffart
only_f = FALSE, ##<< Filter only F reads.
155 b8a95426 Florent Chuffart
only_r = FALSE, ##<< Filter only R reads.
156 3761ede2 Florent Chuffart
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
157 6b5a528c Florent Chuffart
) {
158 3761ede2 Florent Chuffart
	if (only_f) {
159 3761ede2 Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
160 3761ede2 Florent Chuffart
	} else if (only_r) {
161 3761ede2 Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]			
162 3761ede2 Florent Chuffart
	} else {
163 3761ede2 Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
164 3761ede2 Florent Chuffart
	}
165 b8a95426 Florent Chuffart
  if (!filter_for_coverage) {
166 3761ede2 Florent Chuffart
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
167 3761ede2 Florent Chuffart
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]    
168 b8a95426 Florent Chuffart
  }
169 3761ede2 Florent Chuffart
	return(inputs)
170 6b5a528c Florent Chuffart
### Returns filtred inputs.
171 6b5a528c Florent Chuffart
}
172 6b5a528c Florent Chuffart
173 6b5a528c Florent Chuffart
174 6b5a528c Florent Chuffart
175 3761ede2 Florent Chuffart
# filter_tf_inputs = function(# Filter TemplateFilter inputs
176 3761ede2 Florent Chuffart
# ### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
177 3761ede2 Florent Chuffart
# inputs, ##<< TF inputs to be filtered.
178 3761ede2 Florent Chuffart
# chr, ##<< Chromosome observed, here chr is an integer.
179 3761ede2 Florent Chuffart
# x_min, ##<< Coordinate of the first bp observed.
180 3761ede2 Florent Chuffart
# x_max, ##<< Coordinate of the last bp observed.
181 3761ede2 Florent Chuffart
# nuc_width = 160, ##<< Nucleosome width.
182 3761ede2 Florent Chuffart
# only_f = FALSE, ##<< Filter only F reads.
183 3761ede2 Florent Chuffart
# only_r = FALSE, ##<< Filter only R reads.
184 3761ede2 Florent Chuffart
# filter_for_coverage = FALSE, ##<< Does it filter for plot coverage?
185 3761ede2 Florent Chuffart
# USE_DPLYR = FALSE ##<< Use dplyr lib to filter reads.
186 3761ede2 Florent Chuffart
# ) {
187 3761ede2 Florent Chuffart
#   n = names(inputs)
188 3761ede2 Florent Chuffart
#
189 3761ede2 Florent Chuffart
#   if (!USE_DPLYR) {
190 3761ede2 Florent Chuffart
#     if (only_f) {
191 3761ede2 Florent Chuffart
#       inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
192 3761ede2 Florent Chuffart
#     } else if (only_r) {
193 3761ede2 Florent Chuffart
#       inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
194 3761ede2 Florent Chuffart
#     } else {
195 3761ede2 Florent Chuffart
#       inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
196 3761ede2 Florent Chuffart
#     }
197 3761ede2 Florent Chuffart
#   } else {
198 3761ede2 Florent Chuffart
#     names(inputs) = c("chr", "pos", "str", "lev")
199 3761ede2 Florent Chuffart
#     if (only_f) {
200 3761ede2 Florent Chuffart
#       inputs_out = filter(inputs, chr == chr,  pos >= x_min - nuc_width, str == "F", pos <= x_max + nuc_width)
201 3761ede2 Florent Chuffart
#     } else if (only_r) {
202 3761ede2 Florent Chuffart
#       inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, str == "R" & pos <= x_max + nuc_width)
203 3761ede2 Florent Chuffart
#     } else {
204 3761ede2 Florent Chuffart
#       inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, pos <= x_max + nuc_width)
205 3761ede2 Florent Chuffart
#     }
206 3761ede2 Florent Chuffart
#       # if (!filter_for_coverage) {
207 3761ede2 Florent Chuffart
#       #   inputs$corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
208 3761ede2 Florent Chuffart
#       #   inputs = filter(inputs, chr == chr, corrected_inputs_coords >= x_min, corrected_inputs_coords <= x_max)
209 3761ede2 Florent Chuffart
#       #   inputs$corrected_inputs_coords = NULL
210 3761ede2 Florent Chuffart
#       # }
211 3761ede2 Florent Chuffart
#   }
212 3761ede2 Florent Chuffart
#
213 3761ede2 Florent Chuffart
#   if (!filter_for_coverage) {
214 3761ede2 Florent Chuffart
#     corrected_inputs_coords = inputs_out[,2] + nuc_width/2 * sign_from_strand(inputs_out[,3])
215 3761ede2 Florent Chuffart
#     inputs_out = inputs_out[inputs_out[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
216 3761ede2 Florent Chuffart
#   }
217 3761ede2 Florent Chuffart
#
218 3761ede2 Florent Chuffart
#   names(inputs_out) = n
219 3761ede2 Florent Chuffart
#   return(inputs_out)
220 3761ede2 Florent Chuffart
# ### Returns filtred inputs.
221 3761ede2 Florent Chuffart
# }
222 3761ede2 Florent Chuffart
223 3761ede2 Florent Chuffart
224 3761ede2 Florent Chuffart
225 6b5a528c Florent Chuffart
get_comp_strand = function(
226 6b5a528c Florent Chuffart
### Compute the complementatry strand.
227 6b5a528c Florent Chuffart
strand ##<< The original strand.
228 6b5a528c Florent Chuffart
) {
229 6b5a528c Florent Chuffart
	apply(t(strand),2, function(n){
230 55c1cdff Florent Chuffart
	  if (n=="a") {return("t")}
231 6b5a528c Florent Chuffart
		if (n=="t") {return("a")}
232 55c1cdff Florent Chuffart
		if (n=="c") {return("g")}
233 55c1cdff Florent Chuffart
		if (n=="g") {return("c")}
234 6b5a528c Florent Chuffart
	})
235 6b5a528c Florent Chuffart
### Returns the complementatry strand.
236 6b5a528c Florent Chuffart
}
237 6b5a528c Florent Chuffart
238 6b5a528c Florent Chuffart
239 55c1cdff Florent Chuffart
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
240 5badc2fd Florent Chuffart
### This function aggregates nucleosomes from replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center (dyad). A chain of nucleosomes is builts across all replicates. Adjacent nucleosomes of the chain are compared two by two. Comparison is based on a log likelihood ratio (LLR1). depending on the LLR1 value nucleosomes are merged (low LLR) or separated (high LLR). Finally the function returns a list of clusters and all computed llr_scores. Each cluster ows an attribute wp for "well positioned". This attribute is set to TRUE if the cluster is composed of exactly one nucleosome of each sample.
241 6b5a528c Florent Chuffart
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
242 e5603c3f Florent Chuffart
llr_thres=20, ##<< Log likelihood ratio threshold to decide between merging and separating
243 6b5a528c Florent Chuffart
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
244 6b5a528c Florent Chuffart
){
245 6b5a528c Florent Chuffart
	end_of_tracks = function(tracks) {
246 6b5a528c Florent Chuffart
		if (length(tracks) == 0) {
247 6b5a528c Florent Chuffart
			return(TRUE)
248 6b5a528c Florent Chuffart
		}
249 6b5a528c Florent Chuffart
	  for (lower_bound in tracks) {
250 6b5a528c Florent Chuffart
			if(!is.na(lower_bound)) {
251 6b5a528c Florent Chuffart
	      if (lower_bound < coord_max) {
252 6b5a528c Florent Chuffart
	        return(FALSE)
253 6b5a528c Florent Chuffart
	      }
254 6b5a528c Florent Chuffart
	  	}
255 6b5a528c Florent Chuffart
	  }
256 6b5a528c Florent Chuffart
	  return(TRUE)
257 6b5a528c Florent Chuffart
	}
258 d973538c Florent Chuffart
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center) {
259 6b5a528c Florent Chuffart
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
260 6b5a528c Florent Chuffart
			new_cluster$wp = TRUE
261 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
262 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
263 6b5a528c Florent Chuffart
		  	clusters[[length(clusters) + 1]] = new_cluster
264 6b5a528c Florent Chuffart
				# print(new_cluster)
265 6b5a528c Florent Chuffart
		  }
266 6b5a528c Florent Chuffart
		} else {
267 6b5a528c Florent Chuffart
			new_cluster$wp = FALSE
268 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
269 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
270 6b5a528c Florent Chuffart
			  clusters[[length(clusters) + 1]] = new_cluster
271 6b5a528c Florent Chuffart
			}
272 6b5a528c Florent Chuffart
		}
273 6b5a528c Florent Chuffart
		return(clusters)
274 6b5a528c Florent Chuffart
	}
275 6b5a528c Florent Chuffart
	strain = samples[[1]]$strain
276 7e2d37e1 Florent Chuffart
	llr_scores = c()
277 6b5a528c Florent Chuffart
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
278 6b5a528c Florent Chuffart
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
279 6b5a528c Florent Chuffart
  # compute clusters
280 6b5a528c Florent Chuffart
  clusters = list()
281 6b5a528c Florent Chuffart
  cluster_contents = list()
282 6b5a528c Florent Chuffart
  # Init reader
283 6b5a528c Florent Chuffart
  indexes = c()
284 6b5a528c Florent Chuffart
  track_readers = c()
285 6b5a528c Florent Chuffart
  current_nuc = NULL
286 7e2d37e1 Florent Chuffart
	llr_score = llr_thres + 1
287 55c1cdff Florent Chuffart
  # Read nucs from TF outputs
288 6b5a528c Florent Chuffart
  tf_outs = list()
289 6b5a528c Florent Chuffart
	i = 1
290 6b5a528c Florent Chuffart
  for (sample in samples) {
291 6b5a528c Florent Chuffart
		tf_outs[[i]] = sample$outputs
292 55c1cdff Florent Chuffart
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]
293 6b5a528c Florent Chuffart
    indexes[i] = 1
294 6b5a528c Florent Chuffart
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
295 5bfac5a3 Florent Chuffart
      track_readers[i] = coord_max
296 6b5a528c Florent Chuffart
	  } else {
297 5bfac5a3 Florent Chuffart
      track_readers[i] = tf_outs[[i]][indexes[i],]$center
298 6b5a528c Florent Chuffart
		}
299 5bfac5a3 Florent Chuffart
		i = i+1
300 6b5a528c Florent Chuffart
  }
301 d973538c Florent Chuffart
  nb_tracks = length(tf_outs)
302 6b5a528c Florent Chuffart
	# print(track_readers)
303 6b5a528c Florent Chuffart
  new_cluster = NULL
304 6b5a528c Florent Chuffart
  nb_nucs_in_cluster = 0
305 6b5a528c Florent Chuffart
  nuc_from_track = c()
306 d973538c Florent Chuffart
  for (i in 1:nb_tracks){
307 6b5a528c Florent Chuffart
    nuc_from_track[i] = FALSE
308 6b5a528c Florent Chuffart
  }
309 6b5a528c Florent Chuffart
  # Start clustering
310 6b5a528c Florent Chuffart
  while (!end_of_tracks(track_readers)) {
311 6b5a528c Florent Chuffart
    new_center = min(track_readers)
312 5bfac5a3 Florent Chuffart
		current_track = which(track_readers == new_center)[1]
313 6b5a528c Florent Chuffart
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
314 6b5a528c Florent Chuffart
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
315 6b5a528c Florent Chuffart
		new_nuc$inputs = samples[[current_track]]$inputs
316 6b5a528c Florent Chuffart
		new_nuc$chr = samples[[current_track]]$roi$chr
317 6b5a528c Florent Chuffart
		new_nuc$track = current_track
318 5bfac5a3 Florent Chuffart
319 55c1cdff Florent Chuffart
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width)
320 6b5a528c Florent Chuffart
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
321 6b5a528c Florent Chuffart
		new_nuc$original_reads = flatted_reads[[3]]
322 6b5a528c Florent Chuffart
323 6b5a528c Florent Chuffart
    new_upper_bound = new_nuc$upper_bound
324 6b5a528c Florent Chuffart
325 6b5a528c Florent Chuffart
    if (!is.null(current_nuc)) {
326 7e2d37e1 Florent Chuffart
			llr_score = llr_score_nvecs(list(current_nuc$original_reads,new_nuc$original_reads))
327 7e2d37e1 Florent Chuffart
			llr_scores = c(llr_scores,llr_score)
328 6b5a528c Florent Chuffart
		}
329 7e2d37e1 Florent Chuffart
		# print(paste(llr_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
330 7e2d37e1 Florent Chuffart
		if (is.na(llr_score)) {
331 7e2d37e1 Florent Chuffart
			llr_score = llr_thres + 1
332 6b5a528c Florent Chuffart
		}
333 7e2d37e1 Florent Chuffart
		# Store llr_score
334 7e2d37e1 Florent Chuffart
		new_nuc$llr_score = llr_score
335 7e2d37e1 Florent Chuffart
	  if (llr_score < llr_thres) {
336 6b5a528c Florent Chuffart
      # aggregate to current cluster
337 6b5a528c Florent Chuffart
      #   update bound
338 6b5a528c Florent Chuffart
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
339 55c1cdff Florent Chuffart
        new_cluster$upper_bound = new_nuc$upper_bound
340 6b5a528c Florent Chuffart
      }
341 6b5a528c Florent Chuffart
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
342 55c1cdff Florent Chuffart
        new_cluster$lower_bound = new_nuc$lower_bound
343 6b5a528c Florent Chuffart
      }
344 6b5a528c Florent Chuffart
      #   add nucleosome to current cluster
345 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
346 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
347 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
348 6b5a528c Florent Chuffart
    } else {
349 6b5a528c Florent Chuffart
			if (!is.null(new_cluster)) {
350 6b5a528c Florent Chuffart
        # store old cluster
351 d973538c Florent Chuffart
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
352 6b5a528c Florent Chuffart
			}
353 6b5a528c Florent Chuffart
      # Reinit current cluster composition stuff
354 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = 0
355 6b5a528c Florent Chuffart
      nuc_from_track = c()
356 d973538c Florent Chuffart
      for (i in 1:nb_tracks){
357 6b5a528c Florent Chuffart
        nuc_from_track[i] = FALSE
358 6b5a528c Florent Chuffart
      }
359 55c1cdff Florent Chuffart
      # create new cluster
360 6b5a528c Florent Chuffart
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
361 6b5a528c Florent Chuffart
      # update upper bound
362 6b5a528c Florent Chuffart
      current_upper_bound = new_upper_bound
363 6b5a528c Florent Chuffart
      # add nucleosome to current cluster
364 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
365 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
366 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
367 5bfac5a3 Florent Chuffart
368 6b5a528c Florent Chuffart
		}
369 5bfac5a3 Florent Chuffart
370 6b5a528c Florent Chuffart
		current_nuc = new_nuc
371 6b5a528c Florent Chuffart
372 6b5a528c Florent Chuffart
    # update indexes
373 55c1cdff Florent Chuffart
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {
374 6b5a528c Florent Chuffart
      indexes[current_track] = indexes[current_track] + 1
375 6b5a528c Florent Chuffart
      # update track
376 6b5a528c Florent Chuffart
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
377 6b5a528c Florent Chuffart
    } else {
378 6b5a528c Florent Chuffart
      # update track
379 6b5a528c Florent Chuffart
      track_readers[current_track] = coord_max
380 6b5a528c Florent Chuffart
    }
381 6b5a528c Florent Chuffart
  }
382 6b5a528c Florent Chuffart
  # store last cluster
383 6b5a528c Florent Chuffart
  if (!is.null(new_cluster)) {
384 6b5a528c Florent Chuffart
    # store old cluster
385 d973538c Florent Chuffart
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
386 6b5a528c Florent Chuffart
  }
387 7e2d37e1 Florent Chuffart
	return(list(clusters, llr_scores))
388 7e2d37e1 Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed llr scores.
389 5bfac5a3 Florent Chuffart
}, ex=function(){
390 6b5a528c Florent Chuffart
	# Dealing with a region of interest
391 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
392 6b5a528c Florent Chuffart
	samples = list()
393 6b5a528c Florent Chuffart
	for (i in 1:3) {
394 6b5a528c Florent Chuffart
		# Create TF output
395 6b5a528c Florent Chuffart
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
396 6b5a528c Florent Chuffart
		outputs = dfadd(NULL,tf_nuc)
397 6b5a528c Florent Chuffart
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
398 6b5a528c Florent Chuffart
		# Generate corresponding reads
399 6b5a528c Florent Chuffart
		nb_reads = round(runif(1,170,230))
400 6b5a528c Florent Chuffart
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
401 6b5a528c Florent Chuffart
		u_reads = sort(unique(reads))
402 6b5a528c Florent Chuffart
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
403 6b5a528c Florent Chuffart
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
404 6b5a528c Florent Chuffart
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
405 6b5a528c Florent Chuffart
		u_reads = u_reads + shifts
406 55c1cdff Florent Chuffart
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
407 55c1cdff Florent Chuffart
		                         "V2" = u_reads,
408 55c1cdff Florent Chuffart
														 "V3" = strands,
409 6b5a528c Florent Chuffart
														 "V4" = counts), stringsAsFactors=FALSE)
410 6b5a528c Florent Chuffart
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
411 6b5a528c Florent Chuffart
	}
412 6b5a528c Florent Chuffart
	print(aggregate_intra_strain_nucs(samples))
413 6b5a528c Florent Chuffart
})
414 6b5a528c Florent Chuffart
415 7646593d Florent Chuffart
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
416 7646593d Florent Chuffart
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
417 7646593d Florent Chuffart
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
418 d973538c Florent Chuffart
cur_index, ##<< the index of the roi involved
419 d973538c Florent Chuffart
nb_tracks=3 ##<< the number of replicates
420 7646593d Florent Chuffart
) {
421 7646593d Florent Chuffart
	if  (length(partial_strain_maps) == 0 ){
422 4016229d Florent Chuffart
		print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." ))
423 55c1cdff Florent Chuffart
    tmp_strain_maps = list()
424 7646593d Florent Chuffart
	} else {
425 7646593d Florent Chuffart
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
426 7646593d Florent Chuffart
			tmp_nuc = partial_strain_maps[[i]]
427 7646593d Florent Chuffart
			tmp_nuc_as_list = list()
428 7646593d Florent Chuffart
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
429 7646593d Florent Chuffart
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
430 7646593d Florent Chuffart
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
431 4016229d Florent Chuffart
			tmp_nuc_as_list[["cur_index"]] = cur_index
432 7646593d Florent Chuffart
			tmp_nuc_as_list[["index_nuc"]] = i
433 7646593d Florent Chuffart
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
434 7646593d Florent Chuffart
			all_original_reads = c()
435 7646593d Florent Chuffart
			for (j in 1:length(tmp_nuc$nucs)) {
436 7646593d Florent Chuffart
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
437 7646593d Florent Chuffart
			}
438 7646593d Florent Chuffart
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
439 0d5fbf02 Florent Chuffart
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
440 7646593d Florent Chuffart
			if (tmp_nuc$wp) {
441 d973538c Florent Chuffart
        for (i in 1:(nb_tracks-1)) {
442 d973538c Florent Chuffart
  				tmp_nuc_as_list[[paste("llr", i, sep="_")]] = signif(tmp_nuc$nucs[[i + 1]]$llr_score,5)          
443 d973538c Florent Chuffart
        }
444 7646593d Florent Chuffart
			} else {
445 d973538c Florent Chuffart
        for (i in 1:(nb_tracks-1)) {
446 d973538c Florent Chuffart
  				tmp_nuc_as_list[[paste("llr", i, sep="_")]] = NA
447 d973538c Florent Chuffart
        }
448 7646593d Florent Chuffart
			}
449 7646593d Florent Chuffart
      return(tmp_nuc_as_list)
450 7646593d Florent Chuffart
    })
451 7646593d Florent Chuffart
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
452 7646593d Florent Chuffart
	}
453 21b8928f Florent Chuffart
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
454 7646593d Florent Chuffart
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
455 7646593d Florent Chuffart
}
456 6b5a528c Florent Chuffart
457 6b5a528c Florent Chuffart
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
458 e5603c3f Florent Chuffart
### This function aligns nucleosomes between two strains for a given genome region.
459 55c1cdff Florent Chuffart
replicates, ##<< Set of replicates, ideally 3 per strain.
460 e5603c3f Florent Chuffart
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's NULL this list will be computed.
461 e5603c3f Florent Chuffart
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's NULL this list will be computed.
462 6b5a528c Florent Chuffart
corr_thres=0.5, ##<< Correlation threshold.
463 e5603c3f Florent Chuffart
llr_thres=100, ##<< Log likelihood ratio threshold to decide between merging and separating
464 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
465 5bfac5a3 Florent Chuffart
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
466 6b5a528c Florent Chuffart
) {
467 6b5a528c Florent Chuffart
	if (length(replicates) < 2) {
468 6b5a528c Florent Chuffart
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
469 6b5a528c Florent Chuffart
	} else if (length(replicates) > 2) {
470 5bfac5a3 Florent Chuffart
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
471 6b5a528c Florent Chuffart
	}
472 6b5a528c Florent Chuffart
	common_nuc = NULL
473 7e2d37e1 Florent Chuffart
	llr_scores = c()
474 6b5a528c Florent Chuffart
	chr = replicates[[1]][[1]]$roi$chr
475 6b5a528c Florent Chuffart
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
476 6b5a528c Florent Chuffart
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
477 6b5a528c Florent Chuffart
	strain_ref1 = replicates[[1]][[1]]$strain
478 6b5a528c Florent Chuffart
	strain_ref2 = replicates[[2]][[1]]$strain
479 4016229d Florent Chuffart
	big_cur = replicates[[1]][[1]]$roi
480 4016229d Florent Chuffart
  orig_big_cur = replicates[[1]][[1]]$orig_roi
481 4016229d Florent Chuffart
	if(big_cur$end - big_cur$begin < 0) {
482 4016229d Florent Chuffart
		tmp_begin = big_cur$begin
483 4016229d Florent Chuffart
		big_cur$begin =  big_cur$end
484 4016229d Florent Chuffart
		big_cur$end =  tmp_begin
485 6b5a528c Florent Chuffart
	}
486 6b5a528c Florent Chuffart
	# GO!
487 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref1)) {
488 6b5a528c Florent Chuffart
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
489 6b5a528c Florent Chuffart
	}
490 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref2)) {
491 6b5a528c Florent Chuffart
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
492 6b5a528c Florent Chuffart
  }
493 6b5a528c Florent Chuffart
	lws = c()
494 6b5a528c Florent Chuffart
	ups = c()
495 6b5a528c Florent Chuffart
	for (na in wp_nucs_strain_ref2) {
496 6b5a528c Florent Chuffart
		lws = c(lws, na$lower_bound)
497 6b5a528c Florent Chuffart
		ups = c(ups, na$upper_bound)
498 6b5a528c Florent Chuffart
	}
499 6b5a528c Florent Chuffart
500 5bfac5a3 Florent Chuffart
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))
501 6b5a528c Florent Chuffart
	roi_strain_ref1 = NULL
502 6b5a528c Florent Chuffart
	roi_strain_ref2 = NULL
503 6b5a528c Florent Chuffart
	if (length(wp_nucs_strain_ref1) > 0) {
504 6b5a528c Florent Chuffart
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
505 6b5a528c Florent Chuffart
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
506 6b5a528c Florent Chuffart
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
507 6b5a528c Florent Chuffart
			# Filtering on Well Positionned
508 883439d6 Florent Chuffart
			if (nuc_strain_ref1$wp) {
509 6b5a528c Florent Chuffart
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
510 4016229d Florent Chuffart
				roi_strain_ref2 = translate_cur(roi_strain_ref1, strain_ref2, big_cur=orig_big_cur, config=config)
511 1d833b97 Florent Chuffart
        if (!is.null(roi_strain_ref2)){
512 6b5a528c Florent Chuffart
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
513 6b5a528c Florent Chuffart
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
514 6b5a528c Florent Chuffart
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
515 6b5a528c Florent Chuffart
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
516 6b5a528c Florent Chuffart
					for (index_nuc_strain_ref2 in matching_nas) {
517 6b5a528c Florent Chuffart
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
518 6b5a528c Florent Chuffart
						# Filtering on Well Positionned
519 55c1cdff Florent Chuffart
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)
520 4016229d Florent Chuffart
						if (!is.null(translate_cur(nuc_strain_ref2_to_roi, strain_ref1, big_cur=orig_big_cur, config=config)) &
521 1e34bb1f Florent Chuffart
                nuc_strain_ref2$wp) {
522 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score and collecting reads
523 6b5a528c Florent Chuffart
							SKIP = FALSE
524 6b5a528c Florent Chuffart
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
525 6b5a528c Florent Chuffart
							reads_strain_ref1 = c()
526 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref1$nucs){
527 5bfac5a3 Florent Chuffart
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)
528 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
529 6b5a528c Florent Chuffart
									SKIP = TRUE
530 6b5a528c Florent Chuffart
								}
531 6b5a528c Florent Chuffart
							}
532 6b5a528c Florent Chuffart
							reads_strain_ref2 = c()
533 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref2$nucs){
534 5bfac5a3 Florent Chuffart
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)
535 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
536 6b5a528c Florent Chuffart
									SKIP = TRUE
537 6b5a528c Florent Chuffart
								}
538 6b5a528c Florent Chuffart
							}
539 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score
540 6b5a528c Florent Chuffart
							if (!SKIP) {
541 6b5a528c Florent Chuffart
								# tranlation of reads into strain 2 coords
542 6b5a528c Florent Chuffart
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
543 6b5a528c Florent Chuffart
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
544 7e2d37e1 Florent Chuffart
								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
545 7e2d37e1 Florent Chuffart
								llr_scores = c(llr_scores, llr_score)
546 e5603c3f Florent Chuffart
								# Filtering on LLR Score
547 67157457 Florent Chuffart
                if (llr_score < llr_thres) {
548 6b5a528c Florent Chuffart
									tmp_nuc = list()
549 6b5a528c Florent Chuffart
									# strain_ref1
550 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
551 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
552 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
553 6b5a528c Florent Chuffart
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
554 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
555 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
556 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
557 6b5a528c Florent Chuffart
									# strain_ref2
558 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
559 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
560 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
561 6b5a528c Florent Chuffart
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
562 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
563 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
564 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
565 6b5a528c Florent Chuffart
									# common
566 7e2d37e1 Florent Chuffart
									tmp_nuc[["llr_score"]] = signif(llr_score,5)
567 dadb6a4d Florent Chuffart
                  tmp_nuc[["common_wp_pval"]] = signif(1-pchisq(2*tmp_nuc[["llr_score"]], df=2),5)
568 dadb6a4d Florent Chuffart
									tmp_nuc[["diff"]] = abs(abs((roi_strain_ref2$begin - roi_strain_ref2$end)) / 2 - abs((nuc_strain_ref2$lower_bound - nuc_strain_ref2$upper_bound)) / 2)
569 6b5a528c Florent Chuffart
									common_nuc = dfadd(common_nuc, tmp_nuc)
570 ec2936ea Florent Chuffart
                }
571 6b5a528c Florent Chuffart
							}
572 6b5a528c Florent Chuffart
						}
573 5bfac5a3 Florent Chuffart
					}
574 6b5a528c Florent Chuffart
				} else {
575 6b5a528c Florent Chuffart
		      print("WARNING! No roi for strain ref 2.")
576 6b5a528c Florent Chuffart
			  }
577 6b5a528c Florent Chuffart
		  }
578 6b5a528c Florent Chuffart
		}
579 5bfac5a3 Florent Chuffart
580 67157457 Florent Chuffart
    if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
581 67157457 Florent Chuffart
      index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
582 67157457 Florent Chuffart
      to_remove_list = c()
583 67157457 Florent Chuffart
      for (i in 1:length(index_redundant)) {
584 67157457 Florent Chuffart
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
585 67157457 Florent Chuffart
          to_remove = index_redundant[i]
586 67157457 Florent Chuffart
        }   else {
587 67157457 Florent Chuffart
          to_remove = index_redundant[i] + 1
588 67157457 Florent Chuffart
        }
589 67157457 Florent Chuffart
        to_remove_list = c(to_remove_list, to_remove)
590 67157457 Florent Chuffart
      }
591 67157457 Florent Chuffart
      common_nuc = common_nuc[-to_remove_list,]
592 67157457 Florent Chuffart
    }
593 5bfac5a3 Florent Chuffart
594 67157457 Florent Chuffart
    if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
595 67157457 Florent Chuffart
      index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
596 67157457 Florent Chuffart
      to_remove_list = c()
597 67157457 Florent Chuffart
      for (i in 1:length(index_redundant)) {
598 67157457 Florent Chuffart
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
599 67157457 Florent Chuffart
          to_remove = index_redundant[i]
600 67157457 Florent Chuffart
        }   else {
601 67157457 Florent Chuffart
          to_remove = index_redundant[i] + 1
602 67157457 Florent Chuffart
        }
603 67157457 Florent Chuffart
        to_remove_list = c(to_remove_list, to_remove)
604 67157457 Florent Chuffart
      }
605 67157457 Florent Chuffart
      common_nuc = common_nuc[-to_remove_list,]
606 67157457 Florent Chuffart
    }
607 5bfac5a3 Florent Chuffart
608 7e2d37e1 Florent Chuffart
		return(list(common_nuc, llr_scores))
609 6b5a528c Florent Chuffart
	} else {
610 6b5a528c Florent Chuffart
		print("WARNING, no nucs for strain_ref1.")
611 6b5a528c Florent Chuffart
		return(NULL)
612 6b5a528c Florent Chuffart
	}
613 7e2d37e1 Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed llr scores.
614 5bfac5a3 Florent Chuffart
}, ex=function(){
615 1d833b97 Florent Chuffart
616 4016229d Florent Chuffart
    # Define new translate_cur function...
617 4016229d Florent Chuffart
    translate_cur = function(roi, strain2, big_cur=NULL, config=NULL) {
618 1d833b97 Florent Chuffart
      return(roi)
619 1d833b97 Florent Chuffart
    }
620 1d833b97 Florent Chuffart
    # Binding it by uncomment follwing lines.
621 4016229d Florent Chuffart
    unlockBinding("translate_cur", as.environment("package:nucleominer"))
622 4016229d Florent Chuffart
    unlockBinding("translate_cur", getNamespace("nucleominer"))
623 4016229d Florent Chuffart
    assign("translate_cur", translate_cur, "package:nucleominer")
624 4016229d Florent Chuffart
    assign("translate_cur", translate_cur, getNamespace("nucleominer"))
625 4016229d Florent Chuffart
    lockBinding("translate_cur", getNamespace("nucleominer"))
626 4016229d Florent Chuffart
    lockBinding("translate_cur", as.environment("package:nucleominer"))
627 1d833b97 Florent Chuffart
628 6b5a528c Florent Chuffart
	# Dealing with a region of interest
629 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
630 4016229d Florent Chuffart
	roi2 = translate_cur(roi, roi$strain_ref1)
631 6b5a528c Florent Chuffart
	replicates = list()
632 6b5a528c Florent Chuffart
	for (j in 1:2) {
633 6b5a528c Florent Chuffart
		samples = list()
634 6b5a528c Florent Chuffart
		for (i in 1:3) {
635 6b5a528c Florent Chuffart
			# Create TF output
636 6b5a528c Florent Chuffart
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
637 6b5a528c Florent Chuffart
			outputs = dfadd(NULL,tf_nuc)
638 6b5a528c Florent Chuffart
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
639 6b5a528c Florent Chuffart
			# Generate corresponding reads
640 6b5a528c Florent Chuffart
			nb_reads = round(runif(1,170,230))
641 6b5a528c Florent Chuffart
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
642 6b5a528c Florent Chuffart
			u_reads = sort(unique(reads))
643 6b5a528c Florent Chuffart
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
644 6b5a528c Florent Chuffart
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
645 6b5a528c Florent Chuffart
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
646 6b5a528c Florent Chuffart
			u_reads = u_reads + shifts
647 55c1cdff Florent Chuffart
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
648 55c1cdff Florent Chuffart
			                         "V2" = u_reads,
649 55c1cdff Florent Chuffart
															 "V3" = strands,
650 6b5a528c Florent Chuffart
															 "V4" = counts), stringsAsFactors=FALSE)
651 6b5a528c Florent Chuffart
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
652 6b5a528c Florent Chuffart
		}
653 6b5a528c Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
654 6b5a528c Florent Chuffart
	}
655 6b5a528c Florent Chuffart
	print(align_inter_strain_nucs(replicates))
656 6b5a528c Florent Chuffart
})
657 6b5a528c Florent Chuffart
658 6b5a528c Florent Chuffart
659 6b5a528c Florent Chuffart
660 6b5a528c Florent Chuffart
661 6b5a528c Florent Chuffart
662 6b5a528c Florent Chuffart
663 6b5a528c Florent Chuffart
664 6b5a528c Florent Chuffart
665 6b5a528c Florent Chuffart
666 6b5a528c Florent Chuffart
667 6b5a528c Florent Chuffart
668 6b5a528c Florent Chuffart
669 6b5a528c Florent Chuffart
670 6b5a528c Florent Chuffart
671 6b5a528c Florent Chuffart
672 6b5a528c Florent Chuffart
673 6b5a528c Florent Chuffart
674 6b5a528c Florent Chuffart
675 6b5a528c Florent Chuffart
676 6b5a528c Florent Chuffart
677 6b5a528c Florent Chuffart
678 6b5a528c Florent Chuffart
679 6b5a528c Florent Chuffart
680 6b5a528c Florent Chuffart
681 6b5a528c Florent Chuffart
682 6b5a528c Florent Chuffart
683 6b5a528c Florent Chuffart
fetch_mnase_replicates = function(# Prefetch data
684 6b5a528c Florent Chuffart
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
685 6b5a528c Florent Chuffart
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
686 6b5a528c Florent Chuffart
roi, ##<< Region of interest.
687 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
688 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
689 6b5a528c Florent Chuffart
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
690 6b5a528c Florent Chuffart
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
691 6b5a528c Florent Chuffart
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
692 6b5a528c Florent Chuffart
) {
693 6b5a528c Florent Chuffart
	samples=list()
694 6b5a528c Florent Chuffart
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
695 6b5a528c Florent Chuffart
	for (i in samples_ids) {
696 6b5a528c Florent Chuffart
		sample = as.list(all_samples[all_samples$id==i,])
697 a0b91fee Florent Chuffart
    sample$orig_roi = roi
698 4016229d Florent Chuffart
    sample$roi = translate_cur(roi, sample$strain, config = config)
699 6b5a528c Florent Chuffart
		if (get_genome) {
700 6b5a528c Florent Chuffart
			# Get Genome
701 5bfac5a3 Florent Chuffart
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
702 6b5a528c Florent Chuffart
		}
703 6b5a528c Florent Chuffart
		# Get inputs
704 55c1cdff Florent Chuffart
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
705 5bfac5a3 Florent Chuffart
		sample$total_reads = sum(sample$inputs[,4])
706 6b5a528c Florent Chuffart
		if (!only_fetch) {
707 6b5a528c Florent Chuffart
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
708 6b5a528c Florent Chuffart
	  }
709 6b5a528c Florent Chuffart
	  # Get TF outputs for Mnase_Seq samples
710 5bfac5a3 Florent Chuffart
		if (sample$marker == "Mnase_Seq" & get_ouputs) {
711 a52f85d7 Florent Chuffart
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
712 6b5a528c Florent Chuffart
			if (!only_fetch) {
713 6b5a528c Florent Chuffart
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
714 6b5a528c Florent Chuffart
  		}
715 6b5a528c Florent Chuffart
		}
716 5bfac5a3 Florent Chuffart
		samples[[length(samples) + 1]] = sample
717 6b5a528c Florent Chuffart
	}
718 6b5a528c Florent Chuffart
  return(samples)
719 6b5a528c Florent Chuffart
}
720 6b5a528c Florent Chuffart
721 6b5a528c Florent Chuffart
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
722 6b5a528c Florent Chuffart
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
723 55c1cdff Florent Chuffart
region1, ##<< Original regions.
724 6b5a528c Florent Chuffart
region2 ##<< Regions to substract.
725 6b5a528c Florent Chuffart
) {
726 6b5a528c Florent Chuffart
  rec_substract_region = function(region1, region2) {
727 6b5a528c Florent Chuffart
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
728 6b5a528c Florent Chuffart
    cur_fuzzy = region1[i,]
729 6b5a528c Florent Chuffart
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
730 6b5a528c Florent Chuffart
    if (length(inter_wp[,1]) > 0) {
731 6b5a528c Florent Chuffart
      ret = c()
732 6b5a528c Florent Chuffart
      for (j in 1:length(inter_wp[,1])) {
733 6b5a528c Florent Chuffart
        cur_wp = inter_wp[j,]
734 6b5a528c Florent Chuffart
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
735 6b5a528c Florent Chuffart
          # remove cur_fuzzy
736 6b5a528c Florent Chuffart
          ret = c()
737 6b5a528c Florent Chuffart
          break
738 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
739 55c1cdff Florent Chuffart
          # crop fuzzy
740 55c1cdff Florent Chuffart
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1
741 6b5a528c Florent Chuffart
          ret = cur_fuzzy
742 6b5a528c Florent Chuffart
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
743 55c1cdff Florent Chuffart
          # crop fuzzy
744 55c1cdff Florent Chuffart
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1
745 6b5a528c Florent Chuffart
          ret = cur_fuzzy
746 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
747 6b5a528c Florent Chuffart
          # split fuzzy
748 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_1 = cur_fuzzy
749 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1
750 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_2 = cur_fuzzy
751 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1
752 6b5a528c Florent Chuffart
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
753 6b5a528c Florent Chuffart
          # print(ret)
754 6b5a528c Florent Chuffart
          # ret = cur_fuzzy
755 6b5a528c Florent Chuffart
          break
756 6b5a528c Florent Chuffart
        } else {
757 6b5a528c Florent Chuffart
          stop("WARNING NO ADAPTED CASE!")
758 55c1cdff Florent Chuffart
        }
759 6b5a528c Florent Chuffart
      }
760 6b5a528c Florent Chuffart
      return(ret)
761 6b5a528c Florent Chuffart
    } else {
762 6b5a528c Florent Chuffart
      return(cur_fuzzy)
763 6b5a528c Florent Chuffart
    }
764 6b5a528c Florent Chuffart
  })
765 6b5a528c Florent Chuffart
  }
766 21b8928f Florent Chuffart
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
767 6b5a528c Florent Chuffart
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
768 b20637ed Florent Chuffart
  tmp_ulist = unlist(non_inter_fuzzy)
769 b20637ed Florent Chuffart
  tmp_names = names(tmp_ulist)[1:4]
770 55c1cdff Florent Chuffart
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE)
771 b20637ed Florent Chuffart
  names(non_inter_fuzzy) = tmp_names
772 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)
773 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)
774 b20637ed Florent Chuffart
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
775 b20637ed Florent Chuffart
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
776 55c1cdff Florent Chuffart
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]
777 b20637ed Florent Chuffart
  return(non_inter_fuzzy)
778 6b5a528c Florent Chuffart
}
779 6b5a528c Florent Chuffart
780 e5603c3f Florent Chuffart
union_regions = function(# Aggregate regions that intersect themselves.
781 55c1cdff Florent Chuffart
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
782 6b5a528c Florent Chuffart
regions ##<< The Regions to be aggregated
783 6b5a528c Florent Chuffart
) {
784 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {return(regions)}
785 0303dbe8 Florent Chuffart
  if (nrow(regions) == 0) {return(regions)}
786 6b5a528c Florent Chuffart
  old_length = length(regions[,1])
787 6b5a528c Florent Chuffart
  new_length = 0
788 55c1cdff Florent Chuffart
789 6b5a528c Florent Chuffart
  while (old_length != new_length) {
790 6b5a528c Florent Chuffart
    regions = regions[order(regions$lower_bound), ]
791 55c1cdff Florent Chuffart
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE)
792 6b5a528c Florent Chuffart
    vec_end_1 = which(regions$stop)
793 6b5a528c Florent Chuffart
    if (length(vec_end_1) == 0) {
794 55c1cdff Florent Chuffart
      vec_end_1 = c(length(regions$stop))
795 55c1cdff Florent Chuffart
    }
796 6b5a528c Florent Chuffart
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
797 6b5a528c Florent Chuffart
      vec_end_1 = c(vec_end_1, length(regions$stop))
798 6b5a528c Florent Chuffart
    }
799 6b5a528c Florent Chuffart
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
800 6b5a528c Florent Chuffart
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
801 6b5a528c Florent Chuffart
      chr = regions$chr[vec_beg_1[i]]
802 6b5a528c Florent Chuffart
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
803 6b5a528c Florent Chuffart
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
804 4016229d Florent Chuffart
      cur_index = regions$cur_index[vec_beg_1[i]]
805 4016229d Florent Chuffart
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, cur_index=cur_index))
806 6b5a528c Florent Chuffart
      })
807 59ad95ca Florent Chuffart
    union = collapse_regions(union)
808 6b5a528c Florent Chuffart
    old_length = length(regions[,1])
809 6b5a528c Florent Chuffart
    new_length = length(union[,1])
810 6b5a528c Florent Chuffart
    regions = union
811 6b5a528c Florent Chuffart
  }
812 6b5a528c Florent Chuffart
  return(union)
813 6b5a528c Florent Chuffart
}
814 6b5a528c Florent Chuffart
815 0303dbe8 Florent Chuffart
# remove_aligned_wp = function(# Remove wp nucs from common nucs list.
816 0303dbe8 Florent Chuffart
# ### It is based on common wp nucs index on nucs and region.
817 0303dbe8 Florent Chuffart
# strain_maps, ##<< Nuc maps.
818 4016229d Florent Chuffart
# cur_index, ##<< The region of interest index.
819 0303dbe8 Florent Chuffart
# tmp_common_nucs, ##<< the list of wp nucs.
820 0303dbe8 Florent Chuffart
# strain##<< The strain to consider.
821 0303dbe8 Florent Chuffart
# ){
822 0303dbe8 Florent Chuffart
#   fuzzy_nucs = strain_maps[[strain]]
823 4016229d Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$cur_index == cur_index,]
824 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
825 0303dbe8 Florent Chuffart
#   if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
826 0303dbe8 Florent Chuffart
#   if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
827 0303dbe8 Florent Chuffart
#   anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
828 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
829 0303dbe8 Florent Chuffart
#   return(fuzzy_nucs)
830 0303dbe8 Florent Chuffart
# }
831 6b5a528c Florent Chuffart
832 6b5a528c Florent Chuffart
translate_regions = function(# Translate a list of regions from a strain ref to another.
833 e5603c3f Florent Chuffart
### This function is an elaborated call to translate_cur.
834 55c1cdff Florent Chuffart
regions, ##<< Regions to be translated.
835 6b5a528c Florent Chuffart
combi, ##<< Combination of strains.
836 4016229d Florent Chuffart
cur_index, ##<< The region of interest index.
837 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
838 6b5a528c Florent Chuffart
roi ##<< The region of interest.
839 6b5a528c Florent Chuffart
) {
840 6b5a528c Florent Chuffart
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
841 6b5a528c Florent Chuffart
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
842 4016229d Florent Chuffart
    big_cur =  roi
843 4016229d Florent Chuffart
    trs_tmp_regions_ref2 = translate_cur(tmp_regions_ref2, combi[1], config = config, big_cur = big_cur)
844 5bfac5a3 Florent Chuffart
    if (is.null(trs_tmp_regions_ref2)) {
845 5bfac5a3 Florent Chuffart
      return(NULL)
846 5bfac5a3 Florent Chuffart
    } else {
847 4016229d Florent Chuffart
      return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), cur_index=cur_index)))
848 5bfac5a3 Florent Chuffart
    }
849 5bfac5a3 Florent Chuffart
  })
850 5bfac5a3 Florent Chuffart
851 59ad95ca Florent Chuffart
  return(collapse_regions(tr_regions))
852 59ad95ca Florent Chuffart
}
853 59ad95ca Florent Chuffart
854 59ad95ca Florent Chuffart
collapse_regions = function(# reformat an "apply  manipulated" list of regions
855 59ad95ca Florent Chuffart
### Utils to reformat an "apply  manipulated" list of regions
856 59ad95ca Florent Chuffart
regions ##< a list of regions
857 59ad95ca Florent Chuffart
) {
858 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {
859 5bfac5a3 Florent Chuffart
    return(NULL)
860 5bfac5a3 Florent Chuffart
  } else {
861 5bfac5a3 Florent Chuffart
    regions = do.call(rbind, regions)
862 5bfac5a3 Florent Chuffart
    regions$chr = as.character(regions$chr)
863 5bfac5a3 Florent Chuffart
    regions$chr = as.numeric(regions$chr)
864 5bfac5a3 Florent Chuffart
    regions$lower_bound = as.numeric(regions$lower_bound)
865 5bfac5a3 Florent Chuffart
    regions$upper_bound = as.numeric(regions$upper_bound)
866 5bfac5a3 Florent Chuffart
    regions = regions[order(regions$lower_bound),]
867 5bfac5a3 Florent Chuffart
    return(regions)
868 5bfac5a3 Florent Chuffart
  }
869 6b5a528c Florent Chuffart
}
870 6b5a528c Florent Chuffart
871 6b5a528c Florent Chuffart
872 6b5a528c Florent Chuffart
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
873 4016229d Florent Chuffart
### The fucntion is no more necessary since we remove "big_cur" bug in translate_cur function.
874 6b5a528c Florent Chuffart
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
875 6b5a528c Florent Chuffart
roi, ##<< The region of interest.
876 1d833b97 Florent Chuffart
strain, ##<< The strain to consider.
877 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
878 6b5a528c Florent Chuffart
) {
879 4016229d Florent Chuffart
  tr_roi = translate_cur(roi, strain, config = config)
880 6b5a528c Florent Chuffart
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
881 6b5a528c Florent Chuffart
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
882 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
883 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
884 6b5a528c Florent Chuffart
  }
885 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
886 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
887 6b5a528c Florent Chuffart
  }
888 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
889 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
890 6b5a528c Florent Chuffart
  }
891 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
892 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
893 6b5a528c Florent Chuffart
  }
894 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
895 6b5a528c Florent Chuffart
  return(tmp_fuzzy_nucs)
896 6b5a528c Florent Chuffart
}
897 6b5a528c Florent Chuffart
898 55c1cdff Florent Chuffart
get_all_reads = function(# Retrieve Reads
899 6b5a528c Florent Chuffart
### Retrieve reads for a given marker, combi, form.
900 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
901 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
902 cc54c799 Florent Chuffart
form="wp", ##<< The nuc form to considere.
903 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
904 6b5a528c Florent Chuffart
) {
905 6b5a528c Florent Chuffart
	all_reads = NULL
906 6b5a528c Florent Chuffart
  for (manip in c("Mnase_Seq", marker)) {
907 4016229d Florent Chuffart
    if (form == "unr") {
908 4016229d Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
909 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
910 6b5a528c Florent Chuffart
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
911 6b5a528c Florent Chuffart
      tmp_res$form = form
912 6b5a528c Florent Chuffart
    } else if (form == "wp") {
913 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
914 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
915 6b5a528c Florent Chuffart
      tmp_res$form = form
916 4016229d Florent Chuffart
    } else if (form == "wpunr") {
917 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
918 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
919 6b5a528c Florent Chuffart
      tmp_res$form = "wp"
920 4016229d Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
921 5bfac5a3 Florent Chuffart
  		tmp_res2 = read.table(file=out_filename, header=TRUE)
922 6b5a528c Florent Chuffart
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
923 4016229d Florent Chuffart
      tmp_res2$form = "unr"
924 6b5a528c Florent Chuffart
      tmp_res = rbind(tmp_res, tmp_res2)
925 6b5a528c Florent Chuffart
    }
926 6b5a528c Florent Chuffart
		if (is.null(all_reads)) {
927 6b5a528c Florent Chuffart
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
928 55c1cdff Florent Chuffart
		}
929 6b5a528c Florent Chuffart
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
930 6b5a528c Florent Chuffart
		all_reads = cbind(all_reads, tmp_res)
931 6b5a528c Florent Chuffart
  }
932 5bfac5a3 Florent Chuffart
  return(all_reads)
933 6b5a528c Florent Chuffart
}
934 6b5a528c Florent Chuffart
935 e5603c3f Florent Chuffart
get_design = function(# Build the design for DESeq
936 6b5a528c Florent Chuffart
### This function build the design according sample properties.
937 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
938 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
939 6b5a528c Florent Chuffart
all_samples ##<< Global list of samples.
940 6b5a528c Florent Chuffart
) {
941 6b5a528c Florent Chuffart
  off1 = 0
942 6b5a528c Florent Chuffart
  off2 = 0
943 6b5a528c Florent Chuffart
	manips = c("Mnase_Seq", marker)
944 6b5a528c Florent Chuffart
	design_rownames = c()
945 6b5a528c Florent Chuffart
	design_manip = c()
946 6b5a528c Florent Chuffart
	design_strain = c()
947 5bfac5a3 Florent Chuffart
  off2index = function(off) {
948 55c1cdff Florent Chuffart
  	switch(toString(off),
949 6b5a528c Florent Chuffart
  		"1"=c(0,1,1),
950 6b5a528c Florent Chuffart
  	  "2"=c(1,0,1),
951 55c1cdff Florent Chuffart
    	"3"=c(1,1,0),
952 6b5a528c Florent Chuffart
  		c(1,1,1)
953 5bfac5a3 Florent Chuffart
  		)
954 6b5a528c Florent Chuffart
  }
955 6b5a528c Florent Chuffart
	for (manip in manips) {
956 6b5a528c Florent Chuffart
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
957 6b5a528c Florent Chuffart
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
958 6b5a528c Florent Chuffart
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
959 6b5a528c Florent Chuffart
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
960 6b5a528c Florent Chuffart
		} else {
961 6b5a528c Florent Chuffart
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
962 6b5a528c Florent Chuffart
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
963 6b5a528c Florent Chuffart
			}
964 6b5a528c Florent Chuffart
		}
965 5bfac5a3 Florent Chuffart
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))
966 5bfac5a3 Florent Chuffart
		for (strain in combi) {
967 6b5a528c Florent Chuffart
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
968 6b5a528c Florent Chuffart
			design_strain = c(design_strain, rep(strain, length(cols)))
969 6b5a528c Florent Chuffart
			design_rownames = c(design_rownames, cols)
970 6b5a528c Florent Chuffart
		}
971 6b5a528c Florent Chuffart
	}
972 6b5a528c Florent Chuffart
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
973 5bfac5a3 Florent Chuffart
	return(snep_design)
974 6b5a528c Florent Chuffart
}
975 6b5a528c Florent Chuffart
976 6b5a528c Florent Chuffart
plot_dist_samples = function(# Plot the distribution of reads.
977 e5603c3f Florent Chuffart
### This fuxntion use the DESeq nomalization feature to compare qualitatively the distribution.
978 6b5a528c Florent Chuffart
strain, ##<< The strain to considere.
979 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
980 6b5a528c Florent Chuffart
res, ##<< Data
981 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
982 6b5a528c Florent Chuffart
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
983 5bfac5a3 Florent Chuffart
) {
984 6b5a528c Florent Chuffart
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
985 6b5a528c Florent Chuffart
	snepCountTable = res[,cols]
986 6b5a528c Florent Chuffart
	snepDesign = data.frame(
987 6b5a528c Florent Chuffart
		row.names = cols,
988 6b5a528c Florent Chuffart
		manip = rep(marker, length(cols)),
989 6b5a528c Florent Chuffart
		strain = rep(strain, length(cols))
990 6b5a528c Florent Chuffart
		)
991 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
992 6b5a528c Florent Chuffart
	sizeFactors = estimateSizeFactors(cdsFull)
993 6b5a528c Florent Chuffart
	# print(sizeFactors[[1]])
994 6b5a528c Florent Chuffart
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
995 6b5a528c Florent Chuffart
	if (NEWPLOT) {
996 5bfac5a3 Florent Chuffart
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))
997 6b5a528c Florent Chuffart
		NEWPLOT = FALSE
998 6b5a528c Florent Chuffart
	}
999 6b5a528c Florent Chuffart
	for (it in 1:length(sample_ids)) {
1000 6b5a528c Florent Chuffart
		sample_id = sample_ids[it]
1001 6b5a528c Florent Chuffart
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1002 6b5a528c Florent Chuffart
	}
1003 7164d3ac Florent Chuffart
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1004 6b5a528c Florent Chuffart
}
1005 6b5a528c Florent Chuffart
1006 e5603c3f Florent Chuffart
analyse_design = function(# Launch DESeq methods.
1007 e5603c3f Florent Chuffart
### This function is based on DESeq example. It normalizes data, fit data to GLM model with and without interaction term and compares the two models.
1008 e5603c3f Florent Chuffart
snep_design, ##<< The design to consider.
1009 e5603c3f Florent Chuffart
reads ##<< The data to consider.
1010 6b5a528c Florent Chuffart
) {
1011 6b5a528c Florent Chuffart
	snep_count_table = reads[, rownames(snep_design)]
1012 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1013 6b5a528c Florent Chuffart
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1014 6b5a528c Florent Chuffart
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1015 6b5a528c Florent Chuffart
1016 6b5a528c Florent Chuffart
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1017 6b5a528c Florent Chuffart
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1018 6b5a528c Florent Chuffart
1019 6b5a528c Florent Chuffart
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1020 5bfac5a3 Florent Chuffart
	return(list(fit1, fit0, snep_design, pvalsGLM))
1021 6b5a528c Florent Chuffart
}
1022 6b5a528c Florent Chuffart
1023 6b5a528c Florent Chuffart
1024 6b5a528c Florent Chuffart
1025 6b5a528c Florent Chuffart
1026 6b5a528c Florent Chuffart
1027 6b5a528c Florent Chuffart
1028 6b5a528c Florent Chuffart
1029 6b5a528c Florent Chuffart
1030 6b5a528c Florent Chuffart
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1031 55c1cdff Florent Chuffart
### This function uses
1032 6b5a528c Florent Chuffart
marker, ##<< The marker involved.
1033 6b5a528c Florent Chuffart
combi, ##<< The strain combination involved.
1034 55c1cdff Florent Chuffart
form, ##<< the nuc form involved.
1035 cc54c799 Florent Chuffart
all_samples, ##<< Global list of samples.
1036 e5603c3f Florent Chuffart
FDR = 0.0001, ## the specific False Discover Rate
1037 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1038 6b5a528c Florent Chuffart
) {
1039 6b5a528c Florent Chuffart
  # PRETREAT
1040 4016229d Florent Chuffart
  snep_design = get_design(marker, combi, all_samples)
1041 cc54c799 Florent Chuffart
  reads = get_all_reads(marker, combi, form, config=config)
1042 6b5a528c Florent Chuffart
  # RUN ANALYSE
1043 4016229d Florent Chuffart
  tmp_analyse = analyse_design(snep_design, reads)
1044 6b5a528c Florent Chuffart
  # RESULTS
1045 6b5a528c Florent Chuffart
	fit1 = tmp_analyse[[1]]
1046 6b5a528c Florent Chuffart
	fit0 = tmp_analyse[[2]]
1047 6b5a528c Florent Chuffart
  k = names(fit1)
1048 6b5a528c Florent Chuffart
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1049 6b5a528c Florent Chuffart
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1050 6b5a528c Florent Chuffart
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1051 6b5a528c Florent Chuffart
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1052 6b5a528c Florent Chuffart
	snep_design = tmp_analyse[[3]]
1053 4016229d Florent Chuffart
  # print(snep_design)
1054 e5603c3f Florent Chuffart
	thres = FDR(reads$pvalsGLM, FDR)
1055 6b5a528c Florent Chuffart
	reads$snep_index = reads$pvalsGLM < thres
1056 5badc2fd Florent Chuffart
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", FDR*100,"% of FDR.", sep = ""))
1057 6b5a528c Florent Chuffart
  return(reads)
1058 5bfac5a3 Florent Chuffart
  },  ex=function(){
1059 6b5a528c Florent Chuffart
    marker = "H3K4me1"
1060 55c1cdff Florent Chuffart
    combi = c("BY", "YJM")
1061 4016229d Florent Chuffart
    form = "wpunr" # "wp" | "unr" | "wpunr"
1062 6b5a528c Florent Chuffart
    # foo = get_sneps(marker, combi, form)
1063 6b5a528c Florent Chuffart
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1064 6b5a528c Florent Chuffart
})
1065 6b5a528c Florent Chuffart
1066 6b5a528c Florent Chuffart
1067 6b5a528c Florent Chuffart
1068 6b5a528c Florent Chuffart
1069 6b5a528c Florent Chuffart
1070 6b5a528c Florent Chuffart
1071 6b5a528c Florent Chuffart
1072 6b5a528c Florent Chuffart
1073 6b5a528c Florent Chuffart
1074 6b5a528c Florent Chuffart
1075 6b5a528c Florent Chuffart
1076 6b5a528c Florent Chuffart
1077 6b5a528c Florent Chuffart
1078 6b5a528c Florent Chuffart
1079 6b5a528c Florent Chuffart
1080 6b5a528c Florent Chuffart
1081 6b5a528c Florent Chuffart
1082 6b5a528c Florent Chuffart
1083 6b5a528c Florent Chuffart
1084 6b5a528c Florent Chuffart
1085 6b5a528c Florent Chuffart
1086 6b5a528c Florent Chuffart
1087 6b5a528c Florent Chuffart
1088 6b5a528c Florent Chuffart
1089 1d833b97 Florent Chuffart
ROM2ARAB = function(# Roman to Arabic pair list.
1090 e5603c3f Florent Chuffart
### Utility to convert Roman numbers into Arabic numbers
1091 1d833b97 Florent Chuffart
){list(
1092 1d833b97 Florent Chuffart
  "I" = 1,
1093 1d833b97 Florent Chuffart
  "II" = 2,
1094 1d833b97 Florent Chuffart
  "III" = 3,
1095 1d833b97 Florent Chuffart
  "IV" = 4,
1096 1d833b97 Florent Chuffart
  "V" = 5,
1097 1d833b97 Florent Chuffart
  "VI" = 6,
1098 1d833b97 Florent Chuffart
  "VII" = 7,
1099 1d833b97 Florent Chuffart
  "VIII" = 8,
1100 1d833b97 Florent Chuffart
  "IX" = 9,
1101 1d833b97 Florent Chuffart
  "X" = 10,
1102 1d833b97 Florent Chuffart
  "XI" = 11,
1103 1d833b97 Florent Chuffart
  "XII" = 12,
1104 1d833b97 Florent Chuffart
  "XIII" = 13,
1105 1d833b97 Florent Chuffart
  "XIV" = 14,
1106 1d833b97 Florent Chuffart
  "XV" = 15,
1107 1d833b97 Florent Chuffart
  "XVI" = 16,
1108 1d833b97 Florent Chuffart
  "XVII" = 17,
1109 1d833b97 Florent Chuffart
  "XVIII" = 18,
1110 1d833b97 Florent Chuffart
  "XIX" = 19,
1111 1d833b97 Florent Chuffart
  "XX" = 20
1112 1d833b97 Florent Chuffart
)}
1113 1d833b97 Florent Chuffart
1114 1d833b97 Florent Chuffart
switch_pairlist = structure(function(# Switch a pairlist
1115 1d833b97 Florent Chuffart
### Take a pairlist key:value and return the switched pairlist value:key.
1116 55c1cdff Florent Chuffart
l ##<< The pairlist to switch.
1117 1d833b97 Florent Chuffart
) {
1118 1d833b97 Florent Chuffart
	ret = list()
1119 1d833b97 Florent Chuffart
	for (name in names(l)) {
1120 1d833b97 Florent Chuffart
		ret[[as.character(l[[name]])]] = name
1121 1d833b97 Florent Chuffart
	}
1122 1d833b97 Florent Chuffart
	ret
1123 1d833b97 Florent Chuffart
### The switched pairlist.
1124 1d833b97 Florent Chuffart
}, ex=function(){
1125 1d833b97 Florent Chuffart
	l = list(key1 = "value1", key2 = "value2")
1126 1d833b97 Florent Chuffart
	print(switch_pairlist(l))
1127 1d833b97 Florent Chuffart
})
1128 1d833b97 Florent Chuffart
1129 1d833b97 Florent Chuffart
ARAB2ROM = function(# Arabic to Roman pair list.
1130 e5603c3f Florent Chuffart
### Utility to convert Arabic numbers to Roman numbers
1131 1d833b97 Florent Chuffart
){switch_pairlist(ROM2ARAB())}
1132 1d833b97 Florent Chuffart
1133 1d833b97 Florent Chuffart
1134 1d833b97 Florent Chuffart
1135 1d833b97 Florent Chuffart
1136 5bfac5a3 Florent Chuffart
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1137 e5603c3f Florent Chuffart
### This fonction allows to access to a specific part of the c2c file.
1138 5bfac5a3 Florent Chuffart
strain1, ##<< the key strain
1139 5bfac5a3 Florent Chuffart
strain2, ##<< the target strain
1140 e5603c3f Florent Chuffart
chr=NULL, ##<< if defined, the c2c will be filtered according to the chromosome value
1141 e5603c3f Florent Chuffart
lower_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome upper than lower_bound
1142 e5603c3f Florent Chuffart
upper_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome lower than upper_bound
1143 5bfac5a3 Florent Chuffart
config=NULL##<<  GLOBAL config variable
1144 1d833b97 Florent Chuffart
) {
1145 1d833b97 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1146 1d833b97 Florent Chuffart
	# Launch c2c file
1147 1d833b97 Florent Chuffart
	if (reverse) {
1148 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]
1149 1d833b97 Florent Chuffart
	} else {
1150 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]
1151 1d833b97 Florent Chuffart
	}
1152 5bfac5a3 Florent Chuffart
	c2c = get_content(c2c_filename, "table", stringsAsFactors=FALSE)
1153 5bfac5a3 Florent Chuffart
  # Filtering unagapped
1154 1d833b97 Florent Chuffart
  c2c = c2c[c2c$V6=="-",]
1155 1d833b97 Florent Chuffart
	# Reverse
1156 1d833b97 Florent Chuffart
	if (reverse) {
1157 1d833b97 Florent Chuffart
		tmp_col = c2c$V1
1158 1d833b97 Florent Chuffart
		c2c$V1 = c2c$V7
1159 1d833b97 Florent Chuffart
		c2c$V7 = tmp_col
1160 1d833b97 Florent Chuffart
		tmp_col = c2c$V2
1161 1d833b97 Florent Chuffart
		c2c$V2 = c2c$V9
1162 1d833b97 Florent Chuffart
		c2c$V9 = tmp_col
1163 1d833b97 Florent Chuffart
		tmp_col = c2c$V3
1164 1d833b97 Florent Chuffart
		c2c$V3 = c2c$V10
1165 1d833b97 Florent Chuffart
		c2c$V10 = tmp_col
1166 1d833b97 Florent Chuffart
	}
1167 5bfac5a3 Florent Chuffart
  if (!is.null(chr)) {
1168 5bfac5a3 Florent Chuffart
  	if (strain1 == "BY") {
1169 5bfac5a3 Florent Chuffart
  		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1170 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "RM") {
1171 5bfac5a3 Florent Chuffart
  	  chro_1 = paste("supercontig_1.",chr,sep="")
1172 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "YJM") {
1173 5bfac5a3 Florent Chuffart
  	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1174 5bfac5a3 Florent Chuffart
  	}
1175 5bfac5a3 Florent Chuffart
  	c2c = c2c[c2c$V1 == chro_1,]
1176 5bfac5a3 Florent Chuffart
    if (!is.null(lower_bound)) {
1177 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound}
1178 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound}      
1179 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1180 5bfac5a3 Florent Chuffart
    }
1181 5bfac5a3 Florent Chuffart
    if (!is.null(upper_bound)) {
1182 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound}
1183 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound}      
1184 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1185 5bfac5a3 Florent Chuffart
    }
1186 5bfac5a3 Florent Chuffart
  }
1187 5bfac5a3 Florent Chuffart
  return(c2c)
1188 5bfac5a3 Florent Chuffart
# It retruns the appropriate c2c file part.
1189 5bfac5a3 Florent Chuffart
}
1190 5bfac5a3 Florent Chuffart
1191 4016229d Florent Chuffart
translate_cur = structure(function(# Translate coords of a genome region.
1192 5bfac5a3 Florent Chuffart
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1193 5bfac5a3 Florent Chuffart
roi, ##<< Original genome region of interest.
1194 5bfac5a3 Florent Chuffart
strain2, ##<< The strain in wich you want the genome region of interest.
1195 5bfac5a3 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
1196 4016229d Florent Chuffart
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1197 5bfac5a3 Florent Chuffart
) {
1198 d973538c Florent Chuffart
	strain1 = roi$strain_ref  
1199 d973538c Florent Chuffart
  # Do something or nothing?
1200 d973538c Florent Chuffart
  if (is.null(config$NEUTRAL_TRANSLATE_CUR)) {
1201 d973538c Florent Chuffart
    config$NEUTRAL_TRANSLATE_CUR = FALSE
1202 d973538c Florent Chuffart
  }
1203 d973538c Florent Chuffart
	if (strain1 == strain2 | config$NEUTRAL_TRANSLATE_CUR) {
1204 d973538c Florent Chuffart
    roi$strain_ref = strain2
1205 d973538c Florent Chuffart
    roi$length = roi$end - roi$begin + sign(roi$end - roi$begin)
1206 5bfac5a3 Florent Chuffart
		return(roi)
1207 5bfac5a3 Florent Chuffart
	}
1208 d973538c Florent Chuffart
  
1209 5bfac5a3 Florent Chuffart
	# Extract c2c file
1210 4016229d Florent Chuffart
	if (!is.null(big_cur)) {
1211 4016229d Florent Chuffart
  	# Dealing with big_cur
1212 4016229d Florent Chuffart
		if (roi$strain_ref != big_cur$strain_ref) {
1213 4016229d Florent Chuffart
      big_cur = translate_cur(big_cur, roi$strain_ref, config=config)
1214 62d69d53 Florent Chuffart
    }
1215 4016229d Florent Chuffart
    if (big_cur$end < big_cur$begin) {
1216 4016229d Florent Chuffart
      tmp_var = big_cur$begin
1217 4016229d Florent Chuffart
      big_cur$begin = big_cur$end
1218 4016229d Florent Chuffart
      big_cur$end = tmp_var
1219 4016229d Florent Chuffart
      big_cur$length = big_cur$end - big_cur$begin + 1
1220 55c1cdff Florent Chuffart
    }
1221 4016229d Florent Chuffart
    if (big_cur$chr!=roi$chr | roi$end > big_cur$end | roi$end < big_cur$begin | roi$begin > big_cur$end | roi$begin < big_cur$begin) {
1222 4016229d Florent Chuffart
      print("WARNING! Trying to translate a roi not included in a big_cur.")
1223 ded736c8 Florent Chuffart
      return(NULL)
1224 ded736c8 Florent Chuffart
    }
1225 4016229d Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, big_cur$chr, big_cur$begin, big_cur$end, config=config)
1226 5bfac5a3 Florent Chuffart
  } else {
1227 4016229d Florent Chuffart
    # No big_cur
1228 5bfac5a3 Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, roi$chr, config=config)    
1229 5bfac5a3 Florent Chuffart
  }
1230 5bfac5a3 Florent Chuffart
  
1231 1d833b97 Florent Chuffart
  #	Convert initial roi$chr into c2c format
1232 5bfac5a3 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1233 1d833b97 Florent Chuffart
	begin_1 = roi$begin
1234 1d833b97 Florent Chuffart
  end_1 = roi$end
1235 5bfac5a3 Florent Chuffart
  if (reverse) {
1236 5bfac5a3 Florent Chuffart
  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1237 5bfac5a3 Florent Chuffart
    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1238 1d833b97 Florent Chuffart
	} else {
1239 5bfac5a3 Florent Chuffart
		tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1240 5bfac5a3 Florent Chuffart
	  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1241 1d833b97 Florent Chuffart
	}
1242 5bfac5a3 Florent Chuffart
1243 5bfac5a3 Florent Chuffart
	# Never happend conditions ...
1244 1d833b97 Florent Chuffart
	{
1245 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) == 0) {
1246 1d833b97 Florent Chuffart
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1247 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1248 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2>=begin_1,]
1249 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V2)
1250 54cc8283 Florent Chuffart
      } else {
1251 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3>=begin_1,]
1252 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V3)
1253 54cc8283 Florent Chuffart
      }
1254 1d833b97 Florent Chuffart
			if (reverse) {
1255 5bfac5a3 Florent Chuffart
		  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1256 1d833b97 Florent Chuffart
			} else {
1257 5bfac5a3 Florent Chuffart
				tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1258 1d833b97 Florent Chuffart
			}
1259 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) == 0) {
1260 4016229d Florent Chuffart
				if (!is.null(big_cur)) {
1261 1d833b97 Florent Chuffart
					return(NULL)
1262 4016229d Florent Chuffart
					tmptransfostart = c2c[c2c$V3>=big_cur$begin & c2c$V2<=big_cur$begin,]
1263 55c1cdff Florent Chuffart
				} else {
1264 1d833b97 Florent Chuffart
					print(tmptransfostart)
1265 1d833b97 Florent Chuffart
					print(tmptransfostop)
1266 5bfac5a3 Florent Chuffart
					stop("Never happend condition 1.")
1267 1d833b97 Florent Chuffart
				}
1268 1d833b97 Florent Chuffart
			}
1269 55c1cdff Florent Chuffart
		}
1270 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) == 0) {
1271 1d833b97 Florent Chuffart
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1272 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1273 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3<=end_1,]
1274 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V3)
1275 54cc8283 Florent Chuffart
      } else {
1276 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2<=end_1,]
1277 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V2)
1278 54cc8283 Florent Chuffart
      }
1279 1d833b97 Florent Chuffart
			if (reverse) {
1280 5bfac5a3 Florent Chuffart
		    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1281 1d833b97 Florent Chuffart
			} else {
1282 5bfac5a3 Florent Chuffart
			  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1283 1d833b97 Florent Chuffart
			}
1284 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) == 0) {
1285 4016229d Florent Chuffart
				if (!is.null(big_cur)) {
1286 1d833b97 Florent Chuffart
					return(NULL)
1287 4016229d Florent Chuffart
				  tmptransfostop = c2c[c2c$V3>=big_cur$end & c2c$V2<=big_cur$end,]
1288 1d833b97 Florent Chuffart
				} else {
1289 1d833b97 Florent Chuffart
					print(tmptransfostart)
1290 1d833b97 Florent Chuffart
					print(tmptransfostop)
1291 1d833b97 Florent Chuffart
					stop("Never happend condition 2.")
1292 55c1cdff Florent Chuffart
				}
1293 55c1cdff Florent Chuffart
			}
1294 1d833b97 Florent Chuffart
		}
1295 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) != 1) {
1296 1d833b97 Florent Chuffart
			# print("many start")
1297 5bfac5a3 Florent Chuffart
			tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1298 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) != 1) {
1299 1d833b97 Florent Chuffart
				print(tmptransfostart)
1300 1d833b97 Florent Chuffart
				print(tmptransfostop)
1301 1d833b97 Florent Chuffart
  			stop("Never happend condition 3.")
1302 1d833b97 Florent Chuffart
			}
1303 1d833b97 Florent Chuffart
		}
1304 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) != 1) {
1305 1d833b97 Florent Chuffart
			# print("many stop")
1306 5bfac5a3 Florent Chuffart
		  tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1307 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) != 1) {
1308 1d833b97 Florent Chuffart
				print(tmptransfostart)
1309 1d833b97 Florent Chuffart
				print(tmptransfostop)
1310 1d833b97 Florent Chuffart
  			stop("Never happend condition 4.")
1311 1d833b97 Florent Chuffart
			}
1312 5bfac5a3 Florent Chuffart
		}
1313 1d833b97 Florent Chuffart
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1314 1d833b97 Florent Chuffart
			print(tmptransfostart)
1315 5bfac5a3 Florent Chuffart
			print(tmptransfostop)
1316 1d833b97 Florent Chuffart
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1317 1d833b97 Florent Chuffart
		}
1318 55c1cdff Florent Chuffart
	}
1319 5bfac5a3 Florent Chuffart
  
1320 1d833b97 Florent Chuffart
  # Deal with strand
1321 1d833b97 Florent Chuffart
  if (tmptransfostart$V8 == 1) {
1322 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1323 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1324 1d833b97 Florent Chuffart
  } else {
1325 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1326 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1327 1d833b97 Florent Chuffart
  }
1328 1d833b97 Florent Chuffart
	# Build returned roi
1329 1d833b97 Florent Chuffart
	roi$strain_ref = strain2
1330 1d833b97 Florent Chuffart
	if (roi$strain_ref == "BY") {
1331 1d833b97 Florent Chuffart
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1332 1d833b97 Florent Chuffart
	} else {
1333 1d833b97 Florent Chuffart
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1334 1d833b97 Florent Chuffart
	}
1335 1d833b97 Florent Chuffart
  roi$begin = begin_2
1336 1d833b97 Florent Chuffart
  roi$end = end_2
1337 1d833b97 Florent Chuffart
	if (sign(roi$end - roi$begin) == 0) {
1338 5bfac5a3 Florent Chuffart
		roi$length = 1
1339 5bfac5a3 Florent Chuffart
	} else {
1340 5bfac5a3 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1341 1d833b97 Florent Chuffart
	}
1342 55c1cdff Florent Chuffart
  return(roi)
1343 1d833b97 Florent Chuffart
}, ex=function(){
1344 4016229d Florent Chuffart
	# Define new translate_cur function...
1345 4016229d Florent Chuffart
	translate_cur = function(roi, strain2, config) {
1346 1d833b97 Florent Chuffart
		strain1 = roi$strain_ref
1347 1d833b97 Florent Chuffart
		if (strain1 == strain2) {
1348 1d833b97 Florent Chuffart
			return(roi)
1349 1d833b97 Florent Chuffart
		} else {
1350 4016229d Florent Chuffart
		  stop("Here is my new translate_cur function...")
1351 5bfac5a3 Florent Chuffart
		}
1352 1d833b97 Florent Chuffart
	}
1353 1d833b97 Florent Chuffart
	# Binding it by uncomment follwing lines.
1354 4016229d Florent Chuffart
	# unlockBinding("translate_cur", as.environment("package:nm"))
1355 4016229d Florent Chuffart
	# unlockBinding("translate_cur", getNamespace("nm"))
1356 4016229d Florent Chuffart
	# assign("translate_cur", translate_cur, "package:nm")
1357 4016229d Florent Chuffart
	# assign("translate_cur", translate_cur, getNamespace("nm"))
1358 4016229d Florent Chuffart
	# lockBinding("translate_cur", getNamespace("nm"))
1359 4016229d Florent Chuffart
	# lockBinding("translate_cur", as.environment("package:nm"))
1360 1d833b97 Florent Chuffart
})
1361 1d833b97 Florent Chuffart
1362 1d833b97 Florent Chuffart
1363 5bfac5a3 Florent Chuffart
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1364 5bfac5a3 Florent Chuffart
### CURs are regions that can be aligned between the genomes
1365 5bfac5a3 Florent Chuffart
diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
1366 5bfac5a3 Florent Chuffart
min_cur_width = 4000, ##<< The minimum width of a CUR
1367 5bfac5a3 Florent Chuffart
config = NULL ##<< GLOBAL config variable
1368 5bfac5a3 Florent Chuffart
) {
1369 1d833b97 Florent Chuffart
1370 5bfac5a3 Florent Chuffart
  check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
1371 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1372 5bfac5a3 Florent Chuffart
    check_homogeneity(c2c)
1373 5bfac5a3 Florent Chuffart
  	if (length(c2c[,1]) == 0 ) {
1374 5bfac5a3 Florent Chuffart
      stop("WARNING! checking overlapping for a region corresponding to an empty c2c.")
1375 5bfac5a3 Florent Chuffart
    } else {
1376 5bfac5a3 Florent Chuffart
  		lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)})
1377 5bfac5a3 Florent Chuffart
  		upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)})
1378 5bfac5a3 Florent Chuffart
  		tmp_index = order(lower_bounds)
1379 5bfac5a3 Florent Chuffart
      lower_bounds = lower_bounds[tmp_index]
1380 5bfac5a3 Florent Chuffart
      upper_bounds = upper_bounds[tmp_index]
1381 5bfac5a3 Florent Chuffart
      tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)]
1382 5bfac5a3 Florent Chuffart
      ov_index = which(tmp_diff < 0)
1383 5bfac5a3 Florent Chuffart
      if(length(ov_index < 0) !=0 ) {
1384 5bfac5a3 Florent Chuffart
        ov_index = ov_index[1]
1385 5bfac5a3 Florent Chuffart
        print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep=""))
1386 5bfac5a3 Florent Chuffart
        c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),]
1387 5bfac5a3 Florent Chuffart
        print(c2c_corrupted)
1388 5bfac5a3 Florent Chuffart
        return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1))
1389 5bfac5a3 Florent Chuffart
      }
1390 5bfac5a3 Florent Chuffart
      return(NULL)
1391 5bfac5a3 Florent Chuffart
    }
1392 5bfac5a3 Florent Chuffart
  }
1393 1d833b97 Florent Chuffart
1394 5bfac5a3 Florent Chuffart
  check_homogeneity = function(sub_c2c) {
1395 5bfac5a3 Florent Chuffart
    tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3)
1396 5bfac5a3 Florent Chuffart
    tmp_signs = tmp_signs[tmp_signs != 0]
1397 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs[1]  != tmp_signs)) {
1398 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign)! ********************"))
1399 5bfac5a3 Florent Chuffart
      print(tmp_signs)
1400 5bfac5a3 Florent Chuffart
  	}
1401 5bfac5a3 Florent Chuffart
    tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10)
1402 5bfac5a3 Florent Chuffart
    tmp_signs2 = tmp_signs2[tmp_signs2 != 0]
1403 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs2[1]  != tmp_signs2)) {
1404 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign2)! ********************"))
1405 5bfac5a3 Florent Chuffart
      print(tmp_signs2)
1406 5bfac5a3 Florent Chuffart
  	}
1407 5bfac5a3 Florent Chuffart
  	if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1408 5bfac5a3 Florent Chuffart
  		print("*************** ERROR, non homogenous region chrs or V8! ********************")
1409 5bfac5a3 Florent Chuffart
  	}
1410 5bfac5a3 Florent Chuffart
  }
1411 1d833b97 Florent Chuffart
1412 5bfac5a3 Florent Chuffart
  test_and_squeeze_rois = function(foo, config=NULL) {
1413 5bfac5a3 Florent Chuffart
    is_it_ok = function(list1, list2) {
1414 5bfac5a3 Florent Chuffart
      bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length))
1415 5bfac5a3 Florent Chuffart
      ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0
1416 5bfac5a3 Florent Chuffart
      if (!ok) {
1417 5bfac5a3 Florent Chuffart
        print(bar[bar[,3] != 0 | bar[,6] != 0, ])
1418 5bfac5a3 Florent Chuffart
      }
1419 5bfac5a3 Florent Chuffart
      return (ok)
1420 5bfac5a3 Florent Chuffart
    }
1421 5bfac5a3 Florent Chuffart
    squeeze_rois = function(list1, list2) {
1422 5bfac5a3 Florent Chuffart
      rois = apply(t(1:nrow(list1)), 2, function(i){
1423 5bfac5a3 Florent Chuffart
        roi = list1[i,]
1424 5bfac5a3 Florent Chuffart
        roi2 = list2[i,]
1425 5bfac5a3 Florent Chuffart
        roi$begin = max(roi$begin, roi2$begin)
1426 5bfac5a3 Florent Chuffart
        roi$end = min(roi$end, roi2$end)
1427 5bfac5a3 Florent Chuffart
        roi$length =  roi$end - roi$begin + 1
1428 5bfac5a3 Florent Chuffart
        return(roi)
1429 5bfac5a3 Florent Chuffart
      })
1430 5bfac5a3 Florent Chuffart
      return(do.call(rbind, rois))
1431 5bfac5a3 Florent Chuffart
    }
1432 5bfac5a3 Florent Chuffart
    # foo_orig = compute_inter_all_strain_curs2(config=config)
1433 5bfac5a3 Florent Chuffart
    # foo = foo_orig
1434 5bfac5a3 Florent Chuffart
    STOP = FALSE
1435 5bfac5a3 Florent Chuffart
    nb_round = 0
1436 5bfac5a3 Florent Chuffart
    while(!STOP) {
1437 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1438 5bfac5a3 Florent Chuffart
      print(paste("2-2 round #", nb_round, sep=""))
1439 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1440 4016229d Florent Chuffart
      fooyjm = translate_curs(foo, "YJM", config=config)
1441 4016229d Florent Chuffart
      fooyjmby = translate_curs(fooyjm, "BY", config=config)
1442 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, fooyjmby)) {
1443 5bfac5a3 Florent Chuffart
        print("case 1")
1444 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, fooyjmby)
1445 5bfac5a3 Florent Chuffart
    		next
1446 5bfac5a3 Florent Chuffart
      }
1447 4016229d Florent Chuffart
      foorm = translate_curs(foo, "RM", config=config)
1448 4016229d Florent Chuffart
      foormby = translate_curs(foorm, "BY", config=config)
1449 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foormby)) {
1450 5bfac5a3 Florent Chuffart
        print("case 2")
1451 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foormby)
1452 5bfac5a3 Florent Chuffart
    		next
1453 5bfac5a3 Florent Chuffart
      }
1454 4016229d Florent Chuffart
      fooyjmrm = translate_curs(fooyjm, "RM", config=config)
1455 4016229d Florent Chuffart
      fooyjmrmyjm = translate_curs(fooyjmrm, "YJM", config=config)
1456 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooyjm, fooyjmrmyjm)) {
1457 5bfac5a3 Florent Chuffart
        print("case 3")
1458 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooyjm, fooyjmrmyjm)
1459 5bfac5a3 Florent Chuffart
        next
1460 5bfac5a3 Florent Chuffart
      }
1461 4016229d Florent Chuffart
      foormyjm = translate_curs(foorm, "YJM", config=config)
1462 4016229d Florent Chuffart
      foormyjmrm = translate_curs(foormyjm, "RM", config=config)
1463 5bfac5a3 Florent Chuffart
      if (!is_it_ok(foorm, foormyjmrm)) {
1464 5bfac5a3 Florent Chuffart
        print("case 4")
1465 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(foorm, foormyjmrm)
1466 5bfac5a3 Florent Chuffart
        next
1467 5bfac5a3 Florent Chuffart
      }
1468 4016229d Florent Chuffart
      foo = translate_curs(foo, "BY", config=config)
1469 5bfac5a3 Florent Chuffart
      STOP = TRUE
1470 5bfac5a3 Florent Chuffart
    }
1471 5bfac5a3 Florent Chuffart
    STOP = FALSE
1472 5bfac5a3 Florent Chuffart
    nb_round = 0
1473 5bfac5a3 Florent Chuffart
    while(!STOP) {
1474 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1475 5bfac5a3 Florent Chuffart
      print(paste("3-3 round #", nb_round, sep=""))
1476 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1477 4016229d Florent Chuffart
      foobyrm = translate_curs(fooby, "RM", config=config)
1478 4016229d Florent Chuffart
      foobyrmyjm = translate_curs(foobyrm, "YJM", config=config)
1479 4016229d Florent Chuffart
      foobyrmyjmby = translate_curs(foobyrmyjm, "BY", config=config)
1480 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyrmyjmby)) {
1481 5bfac5a3 Florent Chuffart
        print("case 1")
1482 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyrmyjmby)
1483 5bfac5a3 Florent Chuffart
      }
1484 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1485 4016229d Florent Chuffart
      foobyyjm = translate_curs(fooby, "YJM", config=config)
1486 4016229d Florent Chuffart
      foobyyjmrm = translate_curs(foobyyjm, "RM", config=config)
1487 4016229d Florent Chuffart
      foobyyjmrmby = translate_curs(foobyyjmrm, "BY", config=config)
1488 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyyjmrmby)) {
1489 5bfac5a3 Florent Chuffart
        print("case 2")
1490 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyyjmrmby)
1491 5bfac5a3 Florent Chuffart
        next
1492 5bfac5a3 Florent Chuffart
      }
1493 4016229d Florent Chuffart
      foo = translate_curs(foo, "BY", config=config)
1494 5bfac5a3 Florent Chuffart
      STOP = TRUE
1495 5bfac5a3 Florent Chuffart
    }
1496 5bfac5a3 Florent Chuffart
    print("end")
1497 5bfac5a3 Florent Chuffart
    return(foo)
1498 5bfac5a3 Florent Chuffart
  }
1499 1d833b97 Florent Chuffart
1500 5bfac5a3 Florent Chuffart
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) {
1501 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, config=config)
1502 5bfac5a3 Florent Chuffart
    # computing diffs
1503 55c1cdff Florent Chuffart
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
1504 55c1cdff Florent Chuffart
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
1505 1d833b97 Florent Chuffart
    # Filtering
1506 1d833b97 Florent Chuffart
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1507 1d833b97 Florent Chuffart
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1508 5bfac5a3 Florent Chuffart
    rois = apply(t(1:length(indexes_start)), 2, function(i) {
1509 5bfac5a3 Florent Chuffart
      if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start)))
1510 5bfac5a3 Florent Chuffart
      returned_rois = NULL
1511 1d833b97 Florent Chuffart
  		start = indexes_start[i]
1512 1d833b97 Florent Chuffart
  		stop = indexes_stop[i]
1513 1d833b97 Florent Chuffart
  		sub_c2c = c2c[start:stop,]
1514 5bfac5a3 Florent Chuffart
      check_homogeneity(sub_c2c)
1515 1d833b97 Florent Chuffart
  		if (strain1 == "BY") {
1516 1d833b97 Florent Chuffart
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1517 5bfac5a3 Florent Chuffart
  		} else {
1518 1d833b97 Florent Chuffart
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1519 1d833b97 Florent Chuffart
  		}
1520 5bfac5a3 Florent Chuffart
  		roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1)
1521 5bfac5a3 Florent Chuffart
  		roi$length = roi$end - roi$begin + 1
1522 1d833b97 Florent Chuffart
  		if (roi$length >= min_cur_width) {
1523 5bfac5a3 Florent Chuffart
  			lower_bound = roi$begin
1524 5bfac5a3 Florent Chuffart
  			upper_bound = roi$end
1525 5bfac5a3 Florent Chuffart
        check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1526 5bfac5a3 Florent Chuffart
        while(!is.null(check)) {
1527 5bfac5a3 Florent Chuffart
          # print(check)
1528 5bfac5a3 Florent Chuffart
      		roi1 = roi
1529 5bfac5a3 Florent Chuffart
          roi1$end = check[[1]]
1530 5bfac5a3 Florent Chuffart
      		roi1$length = roi1$end - roi1$begin + 1
1531 5bfac5a3 Florent Chuffart
      		if (roi1$length >= min_cur_width) {
1532 5bfac5a3 Florent Chuffart
            returned_rois = dfadd(returned_rois,roi1)
1533 5bfac5a3 Florent Chuffart
          }
1534 5bfac5a3 Florent Chuffart
          roi$begin = check[[2]]
1535 5bfac5a3 Florent Chuffart
      		roi$length = roi$end - roi$begin + 1
1536 5bfac5a3 Florent Chuffart
      		if (roi$length >= min_cur_width) {
1537 5bfac5a3 Florent Chuffart
      			lower_bound = min(roi$begin, roi$end)
1538 5bfac5a3 Florent Chuffart
      			upper_bound = max(roi$begin, roi$end)
1539 5bfac5a3 Florent Chuffart
            check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1540 5bfac5a3 Florent Chuffart
          } else {
1541 5bfac5a3 Florent Chuffart
            check = NULL
1542 5bfac5a3 Florent Chuffart
            roi = NULL
1543 5bfac5a3 Florent Chuffart
          }
1544 5bfac5a3 Florent Chuffart
        }
1545 5bfac5a3 Florent Chuffart
        returned_rois = dfadd(returned_rois,roi)
1546 1d833b97 Florent Chuffart
  	  }
1547 5bfac5a3 Florent Chuffart
    })
1548 5bfac5a3 Florent Chuffart
    rois = do.call(rbind,rois)
1549 5bfac5a3 Florent Chuffart
    rois = rois[order(as.numeric(rois$chr), rois$begin), ]
1550 1d833b97 Florent Chuffart
  	return(rois)
1551 1d833b97 Florent Chuffart
  }
1552 1d833b97 Florent Chuffart
1553 4016229d Florent Chuffart
  translate_curs = function(rois, target_strain, config) {
1554 5bfac5a3 Florent Chuffart
    tr_rois = apply(t(1:nrow(rois)), 2, function(i){
1555 5bfac5a3 Florent Chuffart
      roi = rois[i,]
1556 4016229d Florent Chuffart
      tr_roi = translate_cur(roi, target_strain, config=config)  
1557 5bfac5a3 Florent Chuffart
      tmp_begin = min(tr_roi$begin, tr_roi$end)
1558 5bfac5a3 Florent Chuffart
      tmp_end = max(tr_roi$begin, tr_roi$end)
1559 5bfac5a3 Florent Chuffart
      tr_roi$begin = tmp_begin
1560 5bfac5a3 Florent Chuffart
      tr_roi$end = tmp_end
1561 5bfac5a3 Florent Chuffart
      tr_roi$length =  tr_roi$end - tr_roi$begin + 1
1562 5bfac5a3 Florent Chuffart
      return(tr_roi)
1563 5bfac5a3 Florent Chuffart
    })
1564 5bfac5a3 Florent Chuffart
    tr_rois = do.call(rbind, tr_rois)
1565 5bfac5a3 Florent Chuffart
    return(tr_rois)
1566 5bfac5a3 Florent Chuffart
  }
1567 729c934e Florent Chuffart
1568 5bfac5a3 Florent Chuffart
  combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"))
1569 5bfac5a3 Florent Chuffart
  rois = list()
1570 5bfac5a3 Florent Chuffart
  for (combi in combis) {
1571 5bfac5a3 Florent Chuffart
    strain1 = combi[1]
1572 5bfac5a3 Florent Chuffart
    strain2 = combi[2]
1573 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1574 5bfac5a3 Florent Chuffart
    rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1575 5bfac5a3 Florent Chuffart
    strain1 = combi[2]
1576 5bfac5a3 Florent Chuffart
    strain2 = combi[1]
1577 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1578 5bfac5a3 Florent Chuffart
    rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1579 4016229d Florent Chuffart
    tr_rois_rev = translate_curs(rois_rev, combi[1], config)      
1580 5bfac5a3 Florent Chuffart
    region1 = rois_fwd
1581 5bfac5a3 Florent Chuffart
    region2 = tr_rois_rev
1582 5bfac5a3 Florent Chuffart
    rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev)
1583 5bfac5a3 Florent Chuffart
  }
1584 5bfac5a3 Florent Chuffart
  reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
1585 5bfac5a3 Florent Chuffart
  reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
1586 4016229d Florent Chuffart
  tr_reducted_1_rois = translate_curs(reducted_1_rois, "RM", config)  
1587 5bfac5a3 Florent Chuffart
  reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]])
1588 5bfac5a3 Florent Chuffart
  reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ]
1589 4016229d Florent Chuffart
  reducted_rois = translate_curs(reducted_2_rois, "BY", config)  
1590 5bfac5a3 Florent Chuffart
  reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
1591 5bfac5a3 Florent Chuffart
  squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
1592 5bfac5a3 Florent Chuffart
  return (squeezed_rois)
1593 1d833b97 Florent Chuffart
}
1594 1d833b97 Florent Chuffart
1595 5bfac5a3 Florent Chuffart
intersect_region = function(# Returns the intersection of 2 list on regions.
1596 5bfac5a3 Florent Chuffart
### This function...
1597 5bfac5a3 Florent Chuffart
region1, ##<< Original regions.
1598 5bfac5a3 Florent Chuffart
region2 ##<< Regions to intersect.
1599 5bfac5a3 Florent Chuffart
) {
1600 5bfac5a3 Florent Chuffart
  intersection = apply(t(1:nrow(region1)), 2, function(i) {
1601 5bfac5a3 Florent Chuffart
    roi1 = region1[i, ]
1602 5bfac5a3 Florent Chuffart
    sub_regions2 = region2[region2$chr == roi1$chr, ]
1603 5bfac5a3 Florent Chuffart
    sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end |
1604 5bfac5a3 Florent Chuffart
                                roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end |
1605 5bfac5a3 Florent Chuffart
                                sub_regions2$begin < roi1$begin  & roi1$end < sub_regions2$end
1606 5bfac5a3 Florent Chuffart
                                , ]
1607 5bfac5a3 Florent Chuffart
    if (nrow(sub_regions2) == 0) {
1608 5bfac5a3 Florent Chuffart
      print("removing a region")
1609 5bfac5a3 Florent Chuffart
      return(NULL)
1610 5bfac5a3 Florent Chuffart
    } else if (nrow(sub_regions2) > 1) {
1611 5bfac5a3 Florent Chuffart
      print("more than one region in intersect_region")          
1612 5bfac5a3 Florent Chuffart
      return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])})))
1613 5bfac5a3 Florent Chuffart
    } else {
1614 5bfac5a3 Florent Chuffart
      roi2 = sub_regions2[1,]
1615 5bfac5a3 Florent Chuffart
      if (roi1$begin < roi2$begin) {
1616 5bfac5a3 Florent Chuffart
        print("not the same begin")
1617 5bfac5a3 Florent Chuffart
        roi1$begin = roi2$begin
1618 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1619 5bfac5a3 Florent Chuffart
      }
1620 5bfac5a3 Florent Chuffart
      if (roi1$end > roi2$end) {
1621 5bfac5a3 Florent Chuffart
        print("not the same end")
1622 5bfac5a3 Florent Chuffart
        roi1$end = roi2$end
1623 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1624 5bfac5a3 Florent Chuffart
      }
1625 5bfac5a3 Florent Chuffart
      return(roi1)
1626 5bfac5a3 Florent Chuffart
    }         
1627 5bfac5a3 Florent Chuffart
  })
1628 5bfac5a3 Florent Chuffart
  return(do.call(rbind,intersection))
1629 5bfac5a3 Florent Chuffart
}
1630 1d833b97 Florent Chuffart
1631 55c1cdff Florent Chuffart
build_replicates = structure(function(# Stage replicates data
1632 e5603c3f Florent Chuffart
### This function loads in memory the data corresponding to the given experiments.
1633 e5603c3f Florent Chuffart
expe, ##<< a list of vectors corresponding to replicates.
1634 61914235 Florent Chuffart
roi, ##<< the region that we are interested in.
1635 61914235 Florent Chuffart
only_fetch=FALSE, ##<< filter or not inputs.
1636 61914235 Florent Chuffart
get_genome=FALSE,##<< Load or not corresponding genome.
1637 61914235 Florent Chuffart
all_samples, ##<< Global list of samples.
1638 61914235 Florent Chuffart
config=NULL ##<< GLOBAL config variable.
1639 61914235 Florent Chuffart
) {
1640 61914235 Florent Chuffart
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1641 61914235 Florent Chuffart
  	samples=list()
1642 61914235 Florent Chuffart
  	for (i in samples_ids) {
1643 61914235 Florent Chuffart
  		sample = as.list(all_samples[all_samples$id==i,])
1644 a0b91fee Florent Chuffart
      sample$orig_roi = roi
1645 4016229d Florent Chuffart
      sample$roi = translate_cur(roi, sample$strain, config = config)
1646 61914235 Florent Chuffart
  		if (get_genome) {
1647 61914235 Florent Chuffart
  			# Get Genome
1648 61914235 Florent Chuffart
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1649 5bfac5a3 Florent Chuffart
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1650 61914235 Florent Chuffart
  		}
1651 61914235 Florent Chuffart
  		# Get inputs
1652 61914235 Florent Chuffart
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1653 55c1cdff Florent Chuffart
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE)
1654 5bfac5a3 Florent Chuffart
  		sample$total_reads = sum(sample$inputs[,4])
1655 61914235 Florent Chuffart
  		if (!only_fetch) {
1656 b8a95426 Florent Chuffart
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
1657 61914235 Florent Chuffart
  	  }
1658 61914235 Florent Chuffart
  	  # Get TF outputs for Mnase_Seq samples
1659 5bfac5a3 Florent Chuffart
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {
1660 61914235 Florent Chuffart
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1661 61914235 Florent Chuffart
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1662 61914235 Florent Chuffart
  			if (!only_fetch) {
1663 61914235 Florent Chuffart
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1664 61914235 Florent Chuffart
    		}
1665 61914235 Florent Chuffart
  		}
1666 5bfac5a3 Florent Chuffart
  		samples[[length(samples) + 1]] = sample
1667 61914235 Florent Chuffart
  	}
1668 61914235 Florent Chuffart
  	return(samples)
1669 61914235 Florent Chuffart
  }
1670 61914235 Florent Chuffart
	replicates = list()
1671 61914235 Florent Chuffart
	for(samples_ids in expe) {
1672 61914235 Florent Chuffart
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1673 61914235 Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
1674 61914235 Florent Chuffart
	}
1675 61914235 Florent Chuffart
	return(replicates)
1676 61914235 Florent Chuffart
  }, ex = function() {
1677 61914235 Florent Chuffart
    # library(rjson)
1678 61914235 Florent Chuffart
    # library(nucleominer)
1679 55c1cdff Florent Chuffart
    #
1680 61914235 Florent Chuffart
    # # Read config file
1681 e5603c3f Florent Chuffart
    # json_conf_file = "nucleominer_config.json"
1682 61914235 Florent Chuffart
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1683 61914235 Florent Chuffart
    # # Read sample file
1684 55c1cdff Florent Chuffart
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
1685 61914235 Florent Chuffart
    # # here are the sample ids in a list
1686 61914235 Florent Chuffart
    # expes = list(c(1))
1687 61914235 Florent Chuffart
    # # here is the region that we wnt to see the coverage
1688 55c1cdff Florent Chuffart
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY")
1689 61914235 Florent Chuffart
    # # it displays the corverage
1690 61914235 Florent Chuffart
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1691 55c1cdff Florent Chuffart
    # out = watch_samples(replicates, config$READ_LENGTH,
1692 55c1cdff Florent Chuffart
    #       plot_coverage = TRUE,
1693 55c1cdff Florent Chuffart
    #       plot_squared_reads = FALSE,
1694 55c1cdff Florent Chuffart
    #       plot_ref_genome = FALSE,
1695 55c1cdff Florent Chuffart
    #       plot_arrow_raw_reads = FALSE,
1696 55c1cdff Florent Chuffart
    #       plot_arrow_nuc_reads = FALSE,
1697 55c1cdff Florent Chuffart
    #       plot_gaussian_reads = FALSE,
1698 55c1cdff Florent Chuffart
    #       plot_gaussian_unified_reads = FALSE,
1699 55c1cdff Florent Chuffart
    #       plot_ellipse_nucs = FALSE,
1700 55c1cdff Florent Chuffart
    #       plot_wp_nucs = FALSE,
1701 55c1cdff Florent Chuffart
    #       plot_wp_nuc_model = FALSE,
1702 55c1cdff Florent Chuffart
    #       plot_common_nucs = FALSE,
1703 61914235 Florent Chuffart
    #       height = 50)
1704 61914235 Florent Chuffart
  })
1705 61914235 Florent Chuffart
1706 61914235 Florent Chuffart
1707 61914235 Florent Chuffart
1708 61914235 Florent Chuffart
1709 61914235 Florent Chuffart
1710 61914235 Florent Chuffart
1711 61914235 Florent Chuffart
1712 61914235 Florent Chuffart
1713 61914235 Florent Chuffart
1714 61914235 Florent Chuffart
1715 61914235 Florent Chuffart
1716 61914235 Florent Chuffart
1717 61914235 Florent Chuffart
1718 61914235 Florent Chuffart
1719 61914235 Florent Chuffart
1720 61914235 Florent Chuffart
1721 61914235 Florent Chuffart
1722 61914235 Florent Chuffart
1723 61914235 Florent Chuffart
1724 61914235 Florent Chuffart
1725 61914235 Florent Chuffart
1726 61914235 Florent Chuffart
1727 61914235 Florent Chuffart
1728 61914235 Florent Chuffart
1729 61914235 Florent Chuffart
1730 61914235 Florent Chuffart
1731 61914235 Florent Chuffart
1732 61914235 Florent Chuffart
1733 61914235 Florent Chuffart
1734 61914235 Florent Chuffart
1735 61914235 Florent Chuffart
1736 61914235 Florent Chuffart
1737 61914235 Florent Chuffart
1738 1d833b97 Florent Chuffart
1739 1d833b97 Florent Chuffart
1740 1d833b97 Florent Chuffart
1741 1d833b97 Florent Chuffart
1742 1d833b97 Florent Chuffart
1743 1d833b97 Florent Chuffart
1744 1d833b97 Florent Chuffart
1745 1d833b97 Florent Chuffart
1746 1d833b97 Florent Chuffart
1747 1d833b97 Florent Chuffart
1748 1d833b97 Florent Chuffart
1749 1d833b97 Florent Chuffart
1750 1d833b97 Florent Chuffart
1751 1d833b97 Florent Chuffart
1752 1d833b97 Florent Chuffart
1753 1d833b97 Florent Chuffart
1754 1d833b97 Florent Chuffart
1755 1d833b97 Florent Chuffart
1756 1d833b97 Florent Chuffart
1757 1d833b97 Florent Chuffart
1758 1d833b97 Florent Chuffart
1759 1d833b97 Florent Chuffart
1760 1d833b97 Florent Chuffart
1761 1d833b97 Florent Chuffart
1762 1d833b97 Florent Chuffart
1763 1d833b97 Florent Chuffart
1764 1d833b97 Florent Chuffart
1765 1d833b97 Florent Chuffart
1766 1d833b97 Florent Chuffart
1767 1d833b97 Florent Chuffart
1768 1d833b97 Florent Chuffart
1769 1d833b97 Florent Chuffart
1770 1d833b97 Florent Chuffart
1771 1d833b97 Florent Chuffart
1772 6b5a528c Florent Chuffart
1773 6b5a528c Florent Chuffart
1774 6b5a528c Florent Chuffart
1775 6b5a528c Florent Chuffart
1776 6b5a528c Florent Chuffart
1777 6b5a528c Florent Chuffart
1778 6b5a528c Florent Chuffart
1779 6b5a528c Florent Chuffart
1780 6b5a528c Florent Chuffart
1781 6b5a528c Florent Chuffart
1782 6b5a528c Florent Chuffart
1783 6b5a528c Florent Chuffart
1784 6b5a528c Florent Chuffart
1785 6b5a528c Florent Chuffart
1786 6b5a528c Florent Chuffart
1787 6b5a528c Florent Chuffart
1788 55c1cdff Florent Chuffart
watch_samples = function(# Watching analysis of samples
1789 6b5a528c Florent Chuffart
### This function allows to view analysis for a particuler region of the genome.
1790 6b5a528c Florent Chuffart
replicates, ##<< replicates under the form...
1791 6b5a528c Florent Chuffart
read_length, ##<< length of the reads
1792 6b5a528c Florent Chuffart
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
1793 6b5a528c Florent Chuffart
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
1794 6b5a528c Florent Chuffart
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
1795 6b5a528c Florent Chuffart
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
1796 6b5a528c Florent Chuffart
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
1797 6b5a528c Florent Chuffart
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
1798 6b5a528c Florent Chuffart
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
1799 6b5a528c Florent Chuffart
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
1800 7646593d Florent Chuffart
change_col = TRUE, ##<< Change the color of each nucleosome.
1801 6b5a528c Florent Chuffart
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
1802 3bfae55d Florent Chuffart
plot_fuzzy_nucs = FALSE,  ##<< Plot (or not) cluster of fuzzy
1803 6b5a528c Florent Chuffart
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
1804 9fdbfada Florent Chuffart
plot_common_nucs = FALSE,  ##<< Plot (or not) aligned reads.
1805 9fdbfada Florent Chuffart
plot_common_unrs = FALSE,  ##<< Plot (or not) unaligned nucleosomal refgions (UNRs).
1806 6b5a528c Florent Chuffart
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
1807 729c934e Florent Chuffart
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
1808 6e0010bc Florent Chuffart
plot_sample_id = FALSE, ##<<  Plot (or not) the sample id for each sample.
1809 55c1cdff Florent Chuffart
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
1810 6b5a528c Florent Chuffart
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
1811 1d833b97 Florent Chuffart
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1812 6e0010bc Florent Chuffart
main=NULL, ##<< main title of the produced plot
1813 6e0010bc Florent Chuffart
xlab=NULL, ##<< xlab of the produced plot
1814 6e0010bc Florent Chuffart
ylab="#reads (per million reads)", ##<< ylab of the produced plot
1815 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1816 6b5a528c Florent Chuffart
){
1817 6b5a528c Florent Chuffart
  returned_list = list()
1818 6b5a528c Florent Chuffart
  # Computing global display parameters
1819 6b5a528c Florent Chuffart
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
1820 6b5a528c Florent Chuffart
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
1821 6b5a528c Florent Chuffart
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
1822 6b5a528c Florent Chuffart
  } else {
1823 6b5a528c Florent Chuffart
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
1824 6b5a528c Florent Chuffart
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
1825 6b5a528c Florent Chuffart
  }
1826 6b5a528c Florent Chuffart
	base_glo = 0
1827 6b5a528c Florent Chuffart
	nb_rank_glo = 0
1828 6b5a528c Florent Chuffart
  for (samples in replicates) {
1829 6b5a528c Florent Chuffart
  	nb_rank_glo = nb_rank_glo + length(samples)
1830 6b5a528c Florent Chuffart
  }
1831 6b5a528c Florent Chuffart
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
1832 6b5a528c Florent Chuffart
	y_min_glo = min(ylim_glo)
1833 6b5a528c Florent Chuffart
	y_max_glo = max(ylim_glo)
1834 6b5a528c Florent Chuffart
  delta_y_glo = y_max_glo - y_min_glo
1835 6b5a528c Florent Chuffart
  # Plot main frame
1836 6e0010bc Florent Chuffart
  if (is.null(xlab)) {
1837 6e0010bc Florent Chuffart
    xlab = paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr)
1838 6e0010bc Florent Chuffart
  }
1839 6e0010bc Florent Chuffart
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab=ylab, xlab=xlab, main=main )
1840 6b5a528c Florent Chuffart
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
1841 6b5a528c Florent Chuffart
  # Go
1842 6b5a528c Florent Chuffart
	replicates_wp_nucs = list()
1843 9fdbfada Florent Chuffart
  wp_maps = list()
1844 9fdbfada Florent Chuffart
  fuzzy_maps = list()
1845 6b5a528c Florent Chuffart
  for (replicate_rank in 1:length(replicates)) {
1846 6b5a528c Florent Chuffart
		# Computing replicate parameters
1847 6b5a528c Florent Chuffart
		nb_rank = length(samples)
1848 6b5a528c Florent Chuffart
		base = (replicate_rank-1) * height * nb_rank
1849 6b5a528c Florent Chuffart
		ylim = c(base, base + height * nb_rank)
1850 6b5a528c Florent Chuffart
		y_min = min(ylim)
1851 6b5a528c Florent Chuffart
		y_max = max(ylim)
1852 6b5a528c Florent Chuffart
	  delta_y = y_max - y_min
1853 6b5a528c Florent Chuffart
		samples = replicates[[replicate_rank]]
1854 6b5a528c Florent Chuffart
		for (sample_rank in 1:length(samples)) {
1855 6b5a528c Florent Chuffart
			# computing sample parameters
1856 6b5a528c Florent Chuffart
			sample = samples[[sample_rank]]
1857 6b5a528c Florent Chuffart
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
1858 6e0010bc Florent Chuffart
      if (plot_sample_id) {
1859 6e0010bc Florent Chuffart
  			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
1860 6e0010bc Florent Chuffart
      }
1861 6b5a528c Florent Chuffart
1862 6b5a528c Florent Chuffart
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
1863 6b5a528c Florent Chuffart
			  x_min = sample$roi[["begin"]]
1864 6b5a528c Florent Chuffart
			  x_max = sample$roi[["end"]]
1865 6b5a528c Florent Chuffart
		  } else {
1866 6b5a528c Florent Chuffart
			  x_min = - sample$roi[["begin"]]
1867 6b5a528c Florent Chuffart
			  x_max = - sample$roi[["end"]]
1868 5bfac5a3 Florent Chuffart
		  }
1869 6b5a528c Florent Chuffart
			shift = x_min_glo - x_min
1870 6b5a528c Florent Chuffart
	    # Plot Genome seq
1871 6b5a528c Florent Chuffart
			if (plot_ref_genome) {
1872 6b5a528c Florent Chuffart
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
1873 6b5a528c Florent Chuffart
		  }
1874 6b5a528c Florent Chuffart
			# Plot reads
1875 6b5a528c Florent Chuffart
			reads = sample$inputs
1876 5bfac5a3 Florent Chuffart
			signs = sign_from_strand(reads[,3])
1877 6b5a528c Florent Chuffart
			if (plot_arrow_raw_reads) {
1878 55c1cdff Florent Chuffart
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev,
1879 5bfac5a3 Florent Chuffart
				col=1, length=0.15/nb_rank)
1880 6b5a528c Florent Chuffart
			}
1881 5bfac5a3 Florent Chuffart
	    if (plot_squared_reads) {
1882 6b5a528c Florent Chuffart
        # require(plotrix)
1883 6b5a528c Florent Chuffart
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
1884 6b5a528c Florent Chuffart
			}
1885 5bfac5a3 Florent Chuffart
	    if (plot_coverage) {
1886 a52f85d7 Florent Chuffart
        if (length(reads[,1]) != 0) {
1887 a52f85d7 Florent Chuffart
          step_h = sign(x_min) * signs * reads[,4]
1888 a52f85d7 Florent Chuffart
          step_b = sign(x_min) * reads[,2] + shift
1889 a52f85d7 Florent Chuffart
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
1890 55c1cdff Florent Chuffart
          steps_x = min(step_b, step_e):max(step_b, step_e)
1891 a52f85d7 Florent Chuffart
          steps_y = rep(0, length(steps_x))
1892 55c1cdff Florent Chuffart
          for (i in 1:length(step_h)) {
1893 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
1894 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
1895 a52f85d7 Florent Chuffart
          }
1896 a52f85d7 Florent Chuffart
          tmp_index = which(steps_y != 0)
1897 a52f85d7 Florent Chuffart
          steps_x = steps_x[tmp_index]
1898 a52f85d7 Florent Chuffart
          steps_y = steps_y[tmp_index]
1899 a52f85d7 Florent Chuffart
          tmp_current_level = 0
1900 55c1cdff Florent Chuffart
          for (i in 1:length(steps_y)) {
1901 a52f85d7 Florent Chuffart
            steps_y[i] = tmp_current_level + steps_y[i]
1902 a52f85d7 Florent Chuffart
            tmp_current_level = steps_y[i]
1903 a52f85d7 Florent Chuffart
          }
1904 a52f85d7 Florent Chuffart
          steps_y = c(0, steps_y)
1905 a52f85d7 Florent Chuffart
          steps_y = steps_y * 1000000/sample$total_reads
1906 a52f85d7 Florent Chuffart
        } else {
1907 a52f85d7 Florent Chuffart
          steps_y = c(0, 0, 0)
1908 1e310114 Florent Chuffart
          steps_x = c(x_min, x_max)
1909 6b5a528c Florent Chuffart
        }
1910 0d1475ac Florent Chuffart
        # print(steps_x)
1911 0d1475ac Florent Chuffart
        # print(steps_y)
1912 6b5a528c Florent Chuffart
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
1913 6b5a528c Florent Chuffart
        abline(y_lev,0)
1914 6b5a528c Florent Chuffart
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
1915 6b5a528c Florent Chuffart
			}
1916 6b5a528c Florent Chuffart
			# Plot nucs
1917 6b5a528c Florent Chuffart
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
1918 6b5a528c Florent Chuffart
				nucs = sample$outputs
1919 6b5a528c Florent Chuffart
				if (length(nucs$center) > 0) {
1920 6b5a528c Florent Chuffart
					col = 1
1921 6b5a528c Florent Chuffart
		      for (i in 1:length(nucs$center)) {
1922 7646593d Florent Chuffart
            if (change_col) {
1923 55c1cdff Florent Chuffart
  						col = col + 1
1924 6e0010bc Florent Chuffart
              } else {
1925 6e0010bc Florent Chuffart
                col = "blue"
1926 6e0010bc Florent Chuffart
              }
1927 6b5a528c Florent Chuffart
		        nuc = nucs[i,]
1928 6b5a528c Florent Chuffart
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
1929 6b5a528c Florent Chuffart
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
1930 55c1cdff Florent Chuffart
						total_involved_reads = sum(involved_reads[,4])
1931 6b5a528c Florent Chuffart
						if (plot_arrow_nuc_reads ) {
1932 55c1cdff Florent Chuffart
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev,
1933 5bfac5a3 Florent Chuffart
							col=col, length=0.15/nb_rank)
1934 6b5a528c Florent Chuffart
						}
1935 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
1936 6b5a528c Florent Chuffart
  						flatted_reads = flat_reads(involved_reads, nuc$width)
1937 6b5a528c Florent Chuffart
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1938 6b5a528c Florent Chuffart
		  			}
1939 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads ) {
1940 6b5a528c Florent Chuffart
							flatted_reads = flat_reads(involved_reads, nuc$width)
1941 6b5a528c Florent Chuffart
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1942 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
1943 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
1944 6b5a528c Florent Chuffart
	          }
1945 6b5a528c Florent Chuffart
	          if (plot_gaussian_unified_reads ) {
1946 6e0010bc Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=1)
1947 6b5a528c Florent Chuffart
	          }
1948 6b5a528c Florent Chuffart
	          if (plot_ellipse_nucs) {
1949 6b5a528c Florent Chuffart
				      # require(plotrix)
1950 55c1cdff Florent Chuffart
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col)
1951 6b5a528c Florent Chuffart
						}
1952 6b5a528c Florent Chuffart
		      }
1953 6b5a528c Florent Chuffart
		    } else {
1954 6b5a528c Florent Chuffart
		      print("WARNING! No nucs to print.")
1955 6b5a528c Florent Chuffart
		    }
1956 6b5a528c Florent Chuffart
			}
1957 6b5a528c Florent Chuffart
	  }
1958 55c1cdff Florent Chuffart
1959 6b5a528c Florent Chuffart
	  # Plot wp nucs
1960 9fdbfada Florent Chuffart
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) {
1961 6b5a528c Florent Chuffart
			if (samples[[1]]$marker == "Mnase_Seq") {
1962 55c1cdff Florent Chuffart
				if (is.null(aggregated_intra_strain_nucs)) {
1963 5bfac5a3 Florent Chuffart
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
1964 6b5a528c Florent Chuffart
				} else {
1965 6b5a528c Florent Chuffart
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
1966 6b5a528c Florent Chuffart
				}
1967 6b5a528c Florent Chuffart
		  } else {
1968 6b5a528c Florent Chuffart
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
1969 6b5a528c Florent Chuffart
		  }
1970 729c934e Florent Chuffart
      if (plot_chain) {
1971 55c1cdff Florent Chuffart
        tf_nucs = lapply(wp_nucs, function(nuc) {
1972 729c934e Florent Chuffart
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
1973 729c934e Florent Chuffart
            tmp_nuc = tmp_nuc[[1]]
1974 729c934e Florent Chuffart
            tmp_nuc$inputs = NULL
1975 729c934e Florent Chuffart
            tmp_nuc$original_reads = NULL
1976 729c934e Florent Chuffart
            tmp_nuc$wp = nuc$wp
1977 729c934e Florent Chuffart
            # print(tmp_nuc)
1978 729c934e Florent Chuffart
            return(tmp_nuc)
1979 55c1cdff Florent Chuffart
          })
1980 dc58ee6c Florent Chuffart
          return(do.call(rbind, bar))
1981 dc58ee6c Florent Chuffart
        })
1982 729c934e Florent Chuffart
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
1983 55c1cdff Florent Chuffart
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2
1984 dc58ee6c Florent Chuffart
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
1985 dc58ee6c Florent Chuffart
        tmp_y_prev = tmp_y[-length(tmp_y)]
1986 dc58ee6c Florent Chuffart
        tmp_y_next = tmp_y[-1]
1987 dc58ee6c Florent Chuffart
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
1988 dc58ee6c Florent Chuffart
        tmp_track = unlist(tf_nucs$track)
1989 dc58ee6c Florent Chuffart
        tmp_track_prev = tmp_track[-length(tmp_track)]
1990 dc58ee6c Florent Chuffart
        tmp_track_next = tmp_track[-1]
1991 6e0010bc Florent Chuffart
        # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
1992 e5603c3f Florent Chuffart
        if (is.null(config$TRACK_LLR_OFFSET)) {
1993 e5603c3f Florent Chuffart
          config$TRACK_LLR_OFFSET = 0
1994 6e0010bc Florent Chuffart
        }
1995 e5603c3f Florent Chuffart
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LLR_OFFSET * 25
1996 dc58ee6c Florent Chuffart
        tmp_x_prev = tmp_x[-length(tmp_x)]
1997 dc58ee6c Florent Chuffart
        tmp_x_next = tmp_x[-1]
1998 dc58ee6c Florent Chuffart
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
1999 dc58ee6c Florent Chuffart
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
2000 7e2d37e1 Florent Chuffart
        tmp_llr_inter =signif(unlist(tf_nucs$llr_score)[-1], 2)
2001 55c1cdff Florent Chuffart
        new_tmp_x = c()
2002 55c1cdff Florent Chuffart
        new_tmp_y = c()
2003 dc58ee6c Florent Chuffart
        index_odd = 1:length(tmp_x) * 2 - 1
2004 55c1cdff Florent Chuffart
        index_even = (1:(length(tmp_x) - 1)) * 2
2005 55c1cdff Florent Chuffart
        new_tmp_x[index_odd] = tmp_x
2006 dc58ee6c Florent Chuffart
        new_tmp_y[index_odd] = tmp_y
2007 dc58ee6c Florent Chuffart
        new_tmp_x[index_even] = tmp_x_inter
2008 dc58ee6c Florent Chuffart
        new_tmp_y[index_even] = tmp_y_inter
2009 ec2936ea Florent Chuffart
        lines(new_tmp_x , new_tmp_y, lwd=2)
2010 dc58ee6c Florent Chuffart
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
2011 ec2936ea Florent Chuffart
        points(tmp_x, tmp_y, cex=4, lwd=2)
2012 dc58ee6c Florent Chuffart
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
2013 e5603c3f Florent Chuffart
        if (is.null(config$LEGEND_LLR_POS)) {
2014 6e0010bc Florent Chuffart
          pos = 2
2015 6e0010bc Florent Chuffart
        } else {
2016 e5603c3f Florent Chuffart
          pos = config$LEGEND_LLR_POS
2017 6e0010bc Florent Chuffart
        }
2018 7e2d37e1 Florent Chuffart
        col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
2019 7e2d37e1 Florent Chuffart
        text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) 
2020 37b11e9b Florent Chuffart
        
2021 729c934e Florent Chuffart
      }
2022 55c1cdff Florent Chuffart
2023 9fdbfada Florent Chuffart
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
2024 729c934e Florent Chuffart
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
2025 9fdbfada Florent Chuffart
        strain = samples[[1]]$strain
2026 cc54d805 Florent Chuffart
        nb_tracks = length(replicates[[1]])
2027 cc54d805 Florent Chuffart
        # print(paste("**************"), nb_tracks)
2028 d973538c Florent Chuffart
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo", nb_tracks)
2029 9fdbfada Florent Chuffart
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
2030 9fdbfada Florent Chuffart
2031 9fdbfada Florent Chuffart
        if (plot_fuzzy_nucs) {
2032 9fdbfada Florent Chuffart
          fuzzy_map = fuzzy_maps[[strain]]
2033 4016229d Florent Chuffart
          if (!is.null(fuzzy_map)) {
2034 4016229d Florent Chuffart
            if (nrow(fuzzy_map) > 0) {
2035 4016229d Florent Chuffart
              rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1)                    
2036 4016229d Florent Chuffart
            }
2037 d492bca4 Florent Chuffart
          }
2038 9fdbfada Florent Chuffart
        } 
2039 9fdbfada Florent Chuffart
2040 9fdbfada Florent Chuffart
        if (plot_wp_nucs) {
2041 9fdbfada Florent Chuffart
    			for (wp_nuc in wp_nucs) {
2042 9fdbfada Florent Chuffart
    				if (wp_nuc$wp){
2043 9fdbfada Florent Chuffart
    					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2044 9fdbfada Florent Chuffart
    					if (plot_wp_nuc_model) {
2045 9fdbfada Florent Chuffart
      					all_original_reads = c()
2046 9fdbfada Florent Chuffart
      					for(initial_nuc in wp_nuc$nucs) {
2047 9fdbfada Florent Chuffart
      						all_original_reads = c(all_original_reads, initial_nuc$original_reads)
2048 9fdbfada Florent Chuffart
      					}
2049 9fdbfada Florent Chuffart
      					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2050 9fdbfada Florent Chuffart
    					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
2051 9fdbfada Florent Chuffart
    				  }
2052 729c934e Florent Chuffart
  				  }
2053 729c934e Florent Chuffart
  				}
2054 729c934e Florent Chuffart
  			}
2055 729c934e Florent Chuffart
      }
2056 6b5a528c Florent Chuffart
		}
2057 5bfac5a3 Florent Chuffart
	}
2058 55c1cdff Florent Chuffart
2059 5bfac5a3 Florent Chuffart
	if (plot_common_nucs) {
2060 9fdbfada Florent Chuffart
    if (is.null(aligned_inter_strain_nucs)) {
2061 9fdbfada Florent Chuffart
      aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2062 b69443b5 Florent Chuffart
      if (!is.null(aligned_inter_strain_nucs)) {
2063 4016229d Florent Chuffart
        aligned_inter_strain_nucs$cur_index = "foo" 
2064 b69443b5 Florent Chuffart
      }
2065 9fdbfada Florent Chuffart
    }
2066 55c1cdff Florent Chuffart
2067 9fdbfada Florent Chuffart
    #Plot common wp nucs
2068 9fdbfada Florent Chuffart
    mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2069 9fdbfada Florent Chuffart
    for (replicate_rank in 1:length(replicates)) {
2070 9fdbfada Florent Chuffart
      nb_rank[[replicate_rank]] = length(samples)
2071 9fdbfada Florent Chuffart
      base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2072 9fdbfada Florent Chuffart
      ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2073 9fdbfada Florent Chuffart
      y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2074 9fdbfada Florent Chuffart
      y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2075 9fdbfada Florent Chuffart
      delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2076 9fdbfada Florent Chuffart
      mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2077 9fdbfada Florent Chuffart
2078 9fdbfada Florent Chuffart
      samples = replicates[[replicate_rank]]
2079 9fdbfada Florent Chuffart
      for (sample_rank in 1:length(samples)) {
2080 9fdbfada Florent Chuffart
        sample = samples[[sample_rank]]
2081 9fdbfada Florent Chuffart
        y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2082 9fdbfada Florent Chuffart
        if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2083 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = sample$roi[["begin"]]
2084 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = sample$roi[["end"]]
2085 9fdbfada Florent Chuffart
        } else {
2086 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = - sample$roi[["begin"]]
2087 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = - sample$roi[["end"]]
2088 6b5a528c Florent Chuffart
        }
2089 9fdbfada Florent Chuffart
        shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2090 55c1cdff Florent Chuffart
      }
2091 9fdbfada Florent Chuffart
    }
2092 b69443b5 Florent Chuffart
    print(aligned_inter_strain_nucs)
2093 b69443b5 Florent Chuffart
    if (!is.null(aligned_inter_strain_nucs)) {
2094 b69443b5 Florent Chuffart
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2095 b69443b5 Florent Chuffart
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2096 b69443b5 Florent Chuffart
        tmp_xs = tmp_ys = c()
2097 b69443b5 Florent Chuffart
        for (replicate_rank in 1:length(replicates)) {
2098 b69443b5 Florent Chuffart
          samples = replicates[[replicate_rank]]
2099 b69443b5 Florent Chuffart
          strain = samples[[1]]$strain
2100 b69443b5 Florent Chuffart
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2101 b69443b5 Florent Chuffart
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2102 b69443b5 Florent Chuffart
        }
2103 37b11e9b Florent Chuffart
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]])*8, cex=dev.size()[1]*400/(x_max[[1]]-x_min[[1]]), pch=19)
2104 55c1cdff Florent Chuffart
      }
2105 9fdbfada Florent Chuffart
    }
2106 9fdbfada Florent Chuffart
    
2107 9fdbfada Florent Chuffart
    if (plot_common_unrs) {
2108 9fdbfada Florent Chuffart
      combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain)
2109 9fdbfada Florent Chuffart
      roi = as.list(samples[[1]]$roi)
2110 4016229d Florent Chuffart
      cur_index = "foo"
2111 9fdbfada Florent Chuffart
      common_nuc_results = list()
2112 9fdbfada Florent Chuffart
      common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs
2113 4016229d Florent Chuffart
      unrs = get_unrs(combi, roi, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) 
2114 ec2936ea Florent Chuffart
      rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], border=4, lwd=10, col=adjustcolor(4, alpha.f = 0.05))        
2115 9fdbfada Florent Chuffart
    }
2116 9fdbfada Florent Chuffart
2117 6b5a528c Florent Chuffart
	}
2118 6b5a528c Florent Chuffart
  return(returned_list)
2119 6b5a528c Florent Chuffart
}
2120 6b5a528c Florent Chuffart
2121 6b5a528c Florent Chuffart
2122 6b5a528c Florent Chuffart
2123 9fdbfada Florent Chuffart
get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain.
2124 9fdbfada Florent Chuffart
### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions.
2125 0a13b5e3 Florent Chuffart
wp_map, ##<< Well positionned nucleosomes map.
2126 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2127 9fdbfada Florent Chuffart
strain, ##<< The strain we want to extracvt the fuzzy map.
2128 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable.
2129 9fdbfada Florent Chuffart
) {
2130 9fdbfada Florent Chuffart
  fuzzy_map = wp_map[wp_map$wp==0, ]
2131 9fdbfada Florent Chuffart
  if (nrow(fuzzy_map) > 0) {
2132 9fdbfada Florent Chuffart
    fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,])
2133 9fdbfada Florent Chuffart
    if (!is.null(fuzzy_map)) {
2134 9fdbfada Florent Chuffart
      fuzzy_map = union_regions(fuzzy_map)
2135 9fdbfada Florent Chuffart
      fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config)
2136 9fdbfada Florent Chuffart
    }
2137 9fdbfada Florent Chuffart
  }
2138 9fdbfada Florent Chuffart
  return(fuzzy_map)
2139 9fdbfada Florent Chuffart
}
2140 9fdbfada Florent Chuffart
2141 9fdbfada Florent Chuffart
2142 9fdbfada Florent Chuffart
2143 9fdbfada Florent Chuffart
2144 9fdbfada Florent Chuffart
2145 9fdbfada Florent Chuffart
get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs).
2146 9fdbfada Florent Chuffart
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline.
2147 9fdbfada Florent Chuffart
combi, ##<< The strain combination to consider.
2148 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2149 4016229d Florent Chuffart
cur_index, ##<< The region of interest index.
2150 9fdbfada Florent Chuffart
wp_maps, ##<< Well positionned nucleosomes maps.
2151 9fdbfada Florent Chuffart
fuzzy_maps, ##<< Fuzzy nucleosomes maps.
2152 9fdbfada Florent Chuffart
common_nuc_results, ##<< Common wp nuc maps
2153 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable
2154 9fdbfada Florent Chuffart
) {
2155 4016229d Florent Chuffart
  # print(cur_index)
2156 9fdbfada Florent Chuffart
2157 9fdbfada Florent Chuffart
  tmp_combi_key = paste(combi[1], combi[2], sep="_")
2158 9fdbfada Florent Chuffart
  tmp_common_nucs = common_nuc_results[[tmp_combi_key]]
2159 4016229d Florent Chuffart
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$cur_index == cur_index, ]
2160 9fdbfada Florent Chuffart
2161 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from", combi[1]))
2162 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[1]]]
2163 4016229d Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2164 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2165 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2166 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2167 9fdbfada Florent Chuffart
  # Let's go!
2168 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2169 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2170 9fdbfada Florent Chuffart
      return(NULL)
2171 9fdbfada Florent Chuffart
    } else {
2172 9fdbfada Florent Chuffart
      return (index_nuc)
2173 9fdbfada Florent Chuffart
    }
2174 9fdbfada Florent Chuffart
  }))
2175 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2176 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2177 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2178 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2179 9fdbfada Florent Chuffart
  }
2180 9fdbfada Florent Chuffart
  tmp_unr_nucs_1 = tmp_unr
2181 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)}
2182 9fdbfada Florent Chuffart
  agg_unr_1 = tmp_unr_nucs_1
2183 9fdbfada Florent Chuffart
2184 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from ", combi[2]))
2185 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[2]]]
2186 4016229d Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2187 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2188 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2189 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2190 9fdbfada Florent Chuffart
  # Let's go!
2191 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2192 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2193 9fdbfada Florent Chuffart
      return(NULL)
2194 9fdbfada Florent Chuffart
    } else {
2195 9fdbfada Florent Chuffart
      return (index_nuc)
2196 9fdbfada Florent Chuffart
    }
2197 9fdbfada Florent Chuffart
  }))
2198 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2199 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2200 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2201 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2202 9fdbfada Florent Chuffart
  }
2203 9fdbfada Florent Chuffart
  tmp_unr_nucs_2 = tmp_unr
2204 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)}
2205 9fdbfada Florent Chuffart
  agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config)
2206 4016229d Florent Chuffart
  tr_agg_unr_2 = translate_regions(agg_unr_2, combi, cur_index, roi=roi, config=config)
2207 9fdbfada Florent Chuffart
  tr_agg_unr_2 = union_regions(tr_agg_unr_2)
2208 9fdbfada Florent Chuffart
2209 f0703205 Florent Chuffart
  # print("Dealing with unr from both...")
2210 9fdbfada Florent Chuffart
  all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2))
2211 9fdbfada Florent Chuffart
2212 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[1]))
2213 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2214 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2215 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2216 9fdbfada Florent Chuffart
  # Let's go!
2217 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2218 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2219 9fdbfada Florent Chuffart
      return (index_nuc)
2220 9fdbfada Florent Chuffart
    } else {
2221 9fdbfada Florent Chuffart
      return(NULL)
2222 9fdbfada Florent Chuffart
    }
2223 9fdbfada Florent Chuffart
  }))
2224 9fdbfada Florent Chuffart
  wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2225 9fdbfada Florent Chuffart
  
2226 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[2]))
2227 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2228 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2229 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2230 9fdbfada Florent Chuffart
  # Let's go!
2231 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2232 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2233 9fdbfada Florent Chuffart
      return (index_nuc)
2234 9fdbfada Florent Chuffart
    } else {
2235 9fdbfada Florent Chuffart
      return(NULL)
2236 9fdbfada Florent Chuffart
    }
2237 9fdbfada Florent Chuffart
  }))
2238 9fdbfada Florent Chuffart
  wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2239 9fdbfada Florent Chuffart
  wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config)
2240 9fdbfada Florent Chuffart
  if (nrow(wp_nucs_2) == 0) {
2241 9fdbfada Florent Chuffart
    tr_wp_nucs_2 = wp_nucs_2
2242 9fdbfada Florent Chuffart
  } else {
2243 4016229d Florent Chuffart
    tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, cur_index, roi=roi, config=config)      
2244 9fdbfada Florent Chuffart
  }
2245 9fdbfada Florent Chuffart
2246 f0703205 Florent Chuffart
  # print("Dealing with wp from both...")
2247 9fdbfada Florent Chuffart
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
2248 9fdbfada Florent Chuffart
2249 f0703205 Florent Chuffart
  # print("Dealing with unr and wp...")
2250 9fdbfada Florent Chuffart
  non_inter_unr = substract_region(all_unr, all_wp)
2251 9fdbfada Florent Chuffart
  non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config)
2252 9fdbfada Florent Chuffart
  if (is.null(non_inter_unr)) { return(NULL) }
2253 9fdbfada Florent Chuffart
  non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound
2254 37b11e9b Florent Chuffart
  min_unr_width = 75
2255 37b11e9b Florent Chuffart
  non_inter_unr = non_inter_unr[non_inter_unr$len >= min_unr_width,]
2256 9fdbfada Florent Chuffart
2257 9fdbfada Florent Chuffart
  non_inter_unr$index_nuc = 1:length(non_inter_unr[,1])
2258 9fdbfada Florent Chuffart
  return (non_inter_unr)
2259 9fdbfada Florent Chuffart
}
2260 9fdbfada Florent Chuffart
2261 9fdbfada Florent Chuffart