Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ dadb6a4d

Historique | Voir | Annoter | Télécharger (86,88 ko)

1 55c1cdff Florent Chuffart
# get_content = function(# Get content from cached
2 6b5a528c Florent Chuffart
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
3 6b5a528c Florent Chuffart
# obj, ##<< The object that we want its content.
4 6b5a528c Florent Chuffart
# ... ##<< Parameters that will be passed to my_read
5 6b5a528c Florent Chuffart
# ) {
6 6b5a528c Florent Chuffart
#   UseMethod("get_content", obj)
7 55c1cdff Florent Chuffart
#   ### Returns the cached content of the current object.
8 6b5a528c Florent Chuffart
# }
9 55c1cdff Florent Chuffart
#
10 55c1cdff Florent Chuffart
# get_content.default = structure( function(# Get content from cached
11 6b5a528c Florent Chuffart
# ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed.
12 6b5a528c Florent Chuffart
# obj, ##<< The object that we want its content.
13 6b5a528c Florent Chuffart
# ... ##<< Parameters that will be passed to my_read
14 6b5a528c Florent Chuffart
# ) {
15 6b5a528c Florent Chuffart
#   if (inherits(try(NM_CACHE,TRUE), "try-error") || is.null(NM_CACHE)) {
16 6b5a528c Florent Chuffart
#     NM_CACHE <<- list()
17 6b5a528c Florent Chuffart
#   }
18 6b5a528c Florent Chuffart
#   if (is.null(NM_CACHE[[obj$filename]])) {
19 6b5a528c Florent Chuffart
#     print(paste("Loading file ",obj$filename, sep=""))
20 6b5a528c Florent Chuffart
#     tmp_content = my_read(obj, ...)
21 6b5a528c Florent Chuffart
#     print("affect it...")
22 6b5a528c Florent Chuffart
#     NM_CACHE[[obj$filename]] <<- tmp_content
23 6b5a528c Florent Chuffart
#     print("done.")
24 55c1cdff Florent Chuffart
#   }
25 6b5a528c Florent Chuffart
#   return(NM_CACHE[[obj$filename]])
26 55c1cdff Florent Chuffart
# ### Returns the cached content of the current object.
27 6b5a528c Florent Chuffart
# }, ex=function(){
28 6b5a528c Florent Chuffart
#   # Create a dataframe
29 6b5a528c Florent Chuffart
#   df = NULL
30 6b5a528c Florent Chuffart
#   df = dfadd(df, list(key1 = "value1", key2 = "value2"))
31 6b5a528c Florent Chuffart
#   df = dfadd(df, list(key1 = "value1'", key2 = "value3'"))
32 6b5a528c Florent Chuffart
#   # Dump it into tmp file
33 6b5a528c Florent Chuffart
#   write.table(df, file="/tmp/tmp.dump.table.tmp")
34 6b5a528c Florent Chuffart
#   # Load it into cache using feature.
35 6b5a528c Florent Chuffart
#   cached_content_object  = list(filename="/tmp/tmp.dump.table.tmp")
36 55c1cdff Florent Chuffart
#   class(cached_content_object) = "table"
37 6b5a528c Florent Chuffart
#   # First time it will be load into cache
38 6b5a528c Florent Chuffart
#   print(get_content(cached_content_object))
39 6b5a528c Florent Chuffart
#   # Second time not
40 6b5a528c Florent Chuffart
#   print(get_content(cached_content_object))
41 55c1cdff Florent Chuffart
#
42 6b5a528c Florent Chuffart
# })
43 55c1cdff Florent Chuffart
#
44 6b5a528c Florent Chuffart
# my_read = function(
45 55c1cdff Florent Chuffart
# ### Abstract my_read function.
46 6b5a528c Florent Chuffart
#   obj, ...) {
47 6b5a528c Florent Chuffart
#   UseMethod("my_read", obj)
48 6b5a528c Florent Chuffart
# }
49 55c1cdff Florent Chuffart
#
50 6b5a528c Florent Chuffart
# my_read.default = function(
51 55c1cdff Florent Chuffart
# ### Default my_read function.
52 6b5a528c Florent Chuffart
#   obj, ...){
53 55c1cdff Florent Chuffart
#   stop(paste("ERROR, my_read is not defined for any Objects (file ", obj$filename," )", sep=""))
54 6b5a528c Florent Chuffart
# }
55 55c1cdff Florent Chuffart
#
56 6b5a528c Florent Chuffart
# my_read.fasta = function(
57 55c1cdff Florent Chuffart
# ### my_read function for fasta files.
58 6b5a528c Florent Chuffart
#   obj, ...){
59 6b5a528c Florent Chuffart
#   # require(seqinr)
60 6b5a528c Florent Chuffart
#   return(read.fasta(obj$filename, ...))
61 6b5a528c Florent Chuffart
# }
62 55c1cdff Florent Chuffart
#
63 6b5a528c Florent Chuffart
# my_read.table = function(
64 55c1cdff Florent Chuffart
# ### my_read function for table files.
65 6b5a528c Florent Chuffart
#   obj, ...){
66 6b5a528c Florent Chuffart
#   if (rev(unlist(strsplit(obj$filename, ".", fixed=TRUE)))[1] == "gz") {
67 55c1cdff Florent Chuffart
#     return(read.table(file=gzfile(obj$filename), ...))
68 55c1cdff Florent Chuffart
#   } else {
69 6b5a528c Florent Chuffart
#     return(read.table(file=obj$filename, ...))
70 6b5a528c Florent Chuffart
#   }
71 55c1cdff Florent Chuffart
# }
72 55c1cdff Florent Chuffart
#
73 6b5a528c Florent Chuffart
# my_read.cvs = function(
74 55c1cdff Florent Chuffart
# ### my_read function for cvs files.
75 6b5a528c Florent Chuffart
#   obj, ...){
76 6b5a528c Florent Chuffart
#   return(read.csv(file=obj$filename, ...))
77 55c1cdff Florent Chuffart
# }
78 6b5a528c Florent Chuffart
79 6b5a528c Florent Chuffart
80 6b5a528c Florent Chuffart
81 6b5a528c Florent Chuffart
82 6b5a528c Florent Chuffart
83 6b5a528c Florent Chuffart
84 6b5a528c Florent Chuffart
85 6b5a528c Florent Chuffart
86 6b5a528c Florent Chuffart
87 6b5a528c Florent Chuffart
FDR = structure(function#  False Discovery Rate
88 6b5a528c Florent Chuffart
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
89 55c1cdff Florent Chuffart
##author<< Gael Yvert,
90 6b5a528c Florent Chuffart
(
91 6b5a528c Florent Chuffart
x, ##<< A vector x of independent p-values.
92 6b5a528c Florent Chuffart
FDR ##<< The specified FDR.
93 6b5a528c Florent Chuffart
) {
94 6b5a528c Florent Chuffart
  x <- sort(na.omit(x))
95 6b5a528c Florent Chuffart
  N = length(x)
96 6b5a528c Florent Chuffart
  i = 1;
97 6b5a528c Florent Chuffart
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
98 6b5a528c Florent Chuffart
  if (i == 1)
99 6b5a528c Florent Chuffart
    return (NA)
100 6b5a528c Florent Chuffart
  else
101 6b5a528c Florent Chuffart
    return( x[i-1] )
102 6b5a528c Florent Chuffart
### Return the the corresponding cutoff.
103 6b5a528c Florent Chuffart
}, ex=function(){
104 6b5a528c Florent Chuffart
  print("example")
105 6b5a528c Florent Chuffart
})
106 6b5a528c Florent Chuffart
107 6b5a528c Florent Chuffart
108 6b5a528c Florent Chuffart
109 7e2d37e1 Florent Chuffart
llr_score_nvecs = structure(function # Likelihood ratio
110 7e2d37e1 Florent Chuffart
### Compute the log likelihood ratio of two or more set of value.
111 6b5a528c Florent Chuffart
(
112 7e2d37e1 Florent Chuffart
  xs ##<< list of vectors.
113 6b5a528c Florent Chuffart
) {
114 7e2d37e1 Florent Chuffart
  l = length(xs)
115 7e2d37e1 Florent Chuffart
  if (l < 1) {
116 7e2d37e1 Florent Chuffart
    return(NA)
117 7e2d37e1 Florent Chuffart
  }
118 7e2d37e1 Florent Chuffart
  if (l == 1) {
119 7e2d37e1 Florent Chuffart
    return(1)
120 7e2d37e1 Florent Chuffart
  }
121 7e2d37e1 Florent Chuffart
  sumllX = 0
122 7e2d37e1 Florent Chuffart
  for (i in 1:l) {
123 7e2d37e1 Florent Chuffart
    x = xs[[i]]
124 7e2d37e1 Florent Chuffart
  	if (length(x) <= 1) {
125 7e2d37e1 Florent Chuffart
  		return(NA)
126 7e2d37e1 Florent Chuffart
  	}
127 7e2d37e1 Florent Chuffart
    meanX = mean(x)
128 7e2d37e1 Florent Chuffart
    sdX = sd(x)
129 7e2d37e1 Florent Chuffart
    llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
130 7e2d37e1 Florent Chuffart
    sumllX = sumllX + llX
131 7e2d37e1 Florent Chuffart
  }
132 7e2d37e1 Florent Chuffart
  meanXglo = mean(unlist(xs))
133 7e2d37e1 Florent Chuffart
  sdXglo = sd(unlist(xs))
134 7e2d37e1 Florent Chuffart
  llXYZ = sum(log(dnorm(unlist(xs),mean=meanXglo,sd=sdXglo)))
135 7e2d37e1 Florent Chuffart
  ratio = sumllX - llXYZ
136 6b5a528c Florent Chuffart
  return(ratio)
137 7e2d37e1 Florent Chuffart
### Returns the log likelihood ratio.
138 6b5a528c Florent Chuffart
}, ex=function(){
139 6b5a528c Florent Chuffart
  # LOD score for 2 set of values
140 6b5a528c Florent Chuffart
  mean1=5; sd1=2; card2 = 250
141 6b5a528c Florent Chuffart
  mean2=6; sd2=3; card1 = 200
142 6b5a528c Florent Chuffart
  x1 = rnorm(card1, mean1, sd1)
143 55c1cdff Florent Chuffart
  x2 = rnorm(card2, mean2, sd2)
144 6b5a528c Florent Chuffart
  min = floor(min(c(x1,x2)))
145 6b5a528c Florent Chuffart
  max = ceiling(max(c(x1,x2)))
146 6b5a528c Florent Chuffart
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
147 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
148 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
149 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
150 7e2d37e1 Florent Chuffart
  llr_score_nvecs(list(x1,x2))
151 6b5a528c Florent Chuffart
 })
152 6b5a528c Florent Chuffart
153 6b5a528c Florent Chuffart
dfadd = structure(function# Adding list to a dataframe.
154 6b5a528c Florent Chuffart
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
155 6b5a528c Florent Chuffart
	(df, ##<<  A dataframe
156 6b5a528c Florent Chuffart
		l ##<<  A list
157 6b5a528c Florent Chuffart
	) {
158 6b5a528c Florent Chuffart
  if (is.null(df)) {
159 6b5a528c Florent Chuffart
    df = data.frame(l,stringsAsFactors=FALSE)
160 6b5a528c Florent Chuffart
  } else {
161 6b5a528c Florent Chuffart
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
162 6b5a528c Florent Chuffart
  }
163 6b5a528c Florent Chuffart
  return(df)
164 6b5a528c Florent Chuffart
### Return the dataframe \emph{df}.
165 6b5a528c Florent Chuffart
}, ex=function(){
166 6b5a528c Florent Chuffart
		## Here dataframe is NULL
167 6b5a528c Florent Chuffart
		print(df)
168 6b5a528c Florent Chuffart
		df = NULL
169 5bfac5a3 Florent Chuffart
170 6b5a528c Florent Chuffart
		# Initialize df
171 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
172 6b5a528c Florent Chuffart
		print(df)
173 5bfac5a3 Florent Chuffart
174 6b5a528c Florent Chuffart
		# Adding elements to df
175 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
176 6b5a528c Florent Chuffart
		print(df)
177 6b5a528c Florent Chuffart
})
178 6b5a528c Florent Chuffart
179 6b5a528c Florent Chuffart
180 6b5a528c Florent Chuffart
sign_from_strand = function(
181 55c1cdff Florent Chuffart
### Get the sign of strand
182 6b5a528c Florent Chuffart
strands) {
183 6b5a528c Florent Chuffart
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
184 6b5a528c Florent Chuffart
### If strand in forward then returns 1 else returns -1
185 6b5a528c Florent Chuffart
}
186 6b5a528c Florent Chuffart
187 6b5a528c Florent Chuffart
flat_reads = function(
188 6b5a528c Florent Chuffart
### Extract reads coordinates from TempleteFilter input sequence
189 55c1cdff Florent Chuffart
reads, ##<< TemplateFilter input reads
190 6b5a528c Florent Chuffart
nuc_width ##<< Width used to shift F and R reads.
191 6b5a528c Florent Chuffart
) {
192 6b5a528c Florent Chuffart
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
193 6b5a528c Florent Chuffart
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
194 6b5a528c Florent Chuffart
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
195 6b5a528c Florent Chuffart
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
196 6b5a528c Florent Chuffart
### Returns a list of F reads, R reads and joint/shifted F and R reads.
197 6b5a528c Florent Chuffart
}
198 6b5a528c Florent Chuffart
199 6b5a528c Florent Chuffart
200 6b5a528c Florent Chuffart
201 6b5a528c Florent Chuffart
filter_tf_outputs = function(# Filter TemplateFilter outputs
202 ec2936ea Florent Chuffart
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlapping threshold.
203 6b5a528c Florent Chuffart
tf_outputs, ##<< TemplateFilter outputs.
204 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
205 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
206 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
207 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
208 6b5a528c Florent Chuffart
ol_bp = 59, ##<< Overlap Threshold.
209 6b5a528c Florent Chuffart
corr_thres = 0.5 ##<< Correlation threshold.
210 6b5a528c Florent Chuffart
) {
211 6b5a528c Florent Chuffart
  if (x_min < 0) {
212 0d5fbf02 Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= -x_max & tf_outputs$center + tf_outputs$width/2 <=  -x_min,]
213 6b5a528c Florent Chuffart
	} else {
214 0d5fbf02 Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= x_min & tf_outputs$center + tf_outputs$width/2 <= x_max,]
215 6b5a528c Florent Chuffart
  }
216 6b5a528c Florent Chuffart
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
217 6b5a528c Florent Chuffart
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
218 7e2d37e1 Florent Chuffart
  tf_outputs = tf_outputs[tf_outputs$correlation.score >= corr_thres,]
219 7e2d37e1 Florent Chuffart
  tf_outputs = tf_outputs[order(tf_outputs$correlation.score, decreasing=TRUE),]
220 6b5a528c Florent Chuffart
  i = 1
221 55c1cdff Florent Chuffart
  while (i <= length(tf_outputs[,1])) {
222 7e2d37e1 Florent Chuffart
    lb = tf_outputs[i,]$lower_bound
223 7e2d37e1 Florent Chuffart
    ub = tf_outputs[i,]$upper_bound
224 7e2d37e1 Florent Chuffart
    tf_outputs = tf_outputs[!(tf_outputs$lower_bound <= (ub-ol_bp) & tf_outputs$upper_bound > ub) & !(tf_outputs$upper_bound >= (lb+ol_bp) & tf_outputs$lower_bound < lb),]
225 6b5a528c Florent Chuffart
    i = i+1
226 6b5a528c Florent Chuffart
  }
227 6b5a528c Florent Chuffart
  return(tf_outputs)
228 6b5a528c Florent Chuffart
### Returns filtered TemplateFilter Outputs
229 6b5a528c Florent Chuffart
}
230 6b5a528c Florent Chuffart
231 6b5a528c Florent Chuffart
232 6b5a528c Florent Chuffart
233 6b5a528c Florent Chuffart
filter_tf_inputs = function(# Filter TemplateFilter inputs
234 6b5a528c Florent Chuffart
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
235 6b5a528c Florent Chuffart
inputs, ##<< TF inputs to be filtered.
236 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
237 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
238 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
239 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
240 6b5a528c Florent Chuffart
only_f = FALSE, ##<< Filter only F reads.
241 b8a95426 Florent Chuffart
only_r = FALSE, ##<< Filter only R reads.
242 b8a95426 Florent Chuffart
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
243 6b5a528c Florent Chuffart
) {
244 6b5a528c Florent Chuffart
	if (only_f) {
245 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
246 6b5a528c Florent Chuffart
	} else if (only_r) {
247 5bfac5a3 Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
248 6b5a528c Florent Chuffart
	} else {
249 6b5a528c Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
250 6b5a528c Florent Chuffart
	}
251 b8a95426 Florent Chuffart
  if (!filter_for_coverage) {
252 b8a95426 Florent Chuffart
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
253 55c1cdff Florent Chuffart
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
254 b8a95426 Florent Chuffart
  }
255 6b5a528c Florent Chuffart
	return(inputs)
256 6b5a528c Florent Chuffart
### Returns filtred inputs.
257 6b5a528c Florent Chuffart
}
258 6b5a528c Florent Chuffart
259 6b5a528c Florent Chuffart
260 6b5a528c Florent Chuffart
261 6b5a528c Florent Chuffart
get_comp_strand = function(
262 6b5a528c Florent Chuffart
### Compute the complementatry strand.
263 6b5a528c Florent Chuffart
strand ##<< The original strand.
264 6b5a528c Florent Chuffart
) {
265 6b5a528c Florent Chuffart
	apply(t(strand),2, function(n){
266 55c1cdff Florent Chuffart
	  if (n=="a") {return("t")}
267 6b5a528c Florent Chuffart
		if (n=="t") {return("a")}
268 55c1cdff Florent Chuffart
		if (n=="c") {return("g")}
269 55c1cdff Florent Chuffart
		if (n=="g") {return("c")}
270 6b5a528c Florent Chuffart
	})
271 6b5a528c Florent Chuffart
### Returns the complementatry strand.
272 6b5a528c Florent Chuffart
}
273 6b5a528c Florent Chuffart
274 6b5a528c Florent Chuffart
275 55c1cdff Florent Chuffart
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
276 7e2d37e1 Florent Chuffart
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{llr_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
277 6b5a528c Florent Chuffart
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
278 7e2d37e1 Florent Chuffart
llr_thres=20, ##<< Log likelihood ration threshold.
279 6b5a528c Florent Chuffart
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
280 6b5a528c Florent Chuffart
){
281 6b5a528c Florent Chuffart
	end_of_tracks = function(tracks) {
282 6b5a528c Florent Chuffart
		if (length(tracks) == 0) {
283 6b5a528c Florent Chuffart
			return(TRUE)
284 6b5a528c Florent Chuffart
		}
285 6b5a528c Florent Chuffart
	  for (lower_bound in tracks) {
286 6b5a528c Florent Chuffart
			if(!is.na(lower_bound)) {
287 6b5a528c Florent Chuffart
	      if (lower_bound < coord_max) {
288 6b5a528c Florent Chuffart
	        return(FALSE)
289 6b5a528c Florent Chuffart
	      }
290 6b5a528c Florent Chuffart
	  	}
291 6b5a528c Florent Chuffart
	  }
292 6b5a528c Florent Chuffart
	  return(TRUE)
293 6b5a528c Florent Chuffart
	}
294 6b5a528c Florent Chuffart
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
295 6b5a528c Florent Chuffart
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
296 6b5a528c Florent Chuffart
			new_cluster$wp = TRUE
297 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
298 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
299 6b5a528c Florent Chuffart
		  	clusters[[length(clusters) + 1]] = new_cluster
300 6b5a528c Florent Chuffart
				# print(new_cluster)
301 6b5a528c Florent Chuffart
		  }
302 6b5a528c Florent Chuffart
		} else {
303 6b5a528c Florent Chuffart
			new_cluster$wp = FALSE
304 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
305 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
306 6b5a528c Florent Chuffart
			  clusters[[length(clusters) + 1]] = new_cluster
307 6b5a528c Florent Chuffart
			}
308 6b5a528c Florent Chuffart
		}
309 6b5a528c Florent Chuffart
		return(clusters)
310 6b5a528c Florent Chuffart
	}
311 6b5a528c Florent Chuffart
	strain = samples[[1]]$strain
312 7e2d37e1 Florent Chuffart
	llr_scores = c()
313 6b5a528c Florent Chuffart
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
314 6b5a528c Florent Chuffart
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
315 6b5a528c Florent Chuffart
  # compute clusters
316 6b5a528c Florent Chuffart
  clusters = list()
317 6b5a528c Florent Chuffart
  cluster_contents = list()
318 6b5a528c Florent Chuffart
  # Init reader
319 6b5a528c Florent Chuffart
  indexes = c()
320 6b5a528c Florent Chuffart
  track_readers = c()
321 6b5a528c Florent Chuffart
  current_nuc = NULL
322 7e2d37e1 Florent Chuffart
	llr_score = llr_thres + 1
323 55c1cdff Florent Chuffart
  # Read nucs from TF outputs
324 6b5a528c Florent Chuffart
  tf_outs = list()
325 6b5a528c Florent Chuffart
	i = 1
326 6b5a528c Florent Chuffart
  for (sample in samples) {
327 6b5a528c Florent Chuffart
		tf_outs[[i]] = sample$outputs
328 55c1cdff Florent Chuffart
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]
329 6b5a528c Florent Chuffart
    indexes[i] = 1
330 6b5a528c Florent Chuffart
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
331 5bfac5a3 Florent Chuffart
      track_readers[i] = coord_max
332 6b5a528c Florent Chuffart
	  } else {
333 5bfac5a3 Florent Chuffart
      track_readers[i] = tf_outs[[i]][indexes[i],]$center
334 6b5a528c Florent Chuffart
		}
335 5bfac5a3 Florent Chuffart
		i = i+1
336 6b5a528c Florent Chuffart
  }
337 6b5a528c Florent Chuffart
	# print(track_readers)
338 6b5a528c Florent Chuffart
  new_cluster = NULL
339 6b5a528c Florent Chuffart
  nb_nucs_in_cluster = 0
340 6b5a528c Florent Chuffart
  nuc_from_track = c()
341 6b5a528c Florent Chuffart
  for (i in 1:length(tf_outs)){
342 6b5a528c Florent Chuffart
    nuc_from_track[i] = FALSE
343 6b5a528c Florent Chuffart
  }
344 6b5a528c Florent Chuffart
  # Start clustering
345 6b5a528c Florent Chuffart
  while (!end_of_tracks(track_readers)) {
346 6b5a528c Florent Chuffart
    new_center = min(track_readers)
347 5bfac5a3 Florent Chuffart
		current_track = which(track_readers == new_center)[1]
348 6b5a528c Florent Chuffart
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
349 6b5a528c Florent Chuffart
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
350 6b5a528c Florent Chuffart
		new_nuc$inputs = samples[[current_track]]$inputs
351 6b5a528c Florent Chuffart
		new_nuc$chr = samples[[current_track]]$roi$chr
352 6b5a528c Florent Chuffart
		new_nuc$track = current_track
353 5bfac5a3 Florent Chuffart
354 55c1cdff Florent Chuffart
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width)
355 6b5a528c Florent Chuffart
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
356 6b5a528c Florent Chuffart
		new_nuc$original_reads = flatted_reads[[3]]
357 6b5a528c Florent Chuffart
358 6b5a528c Florent Chuffart
    new_upper_bound = new_nuc$upper_bound
359 6b5a528c Florent Chuffart
360 6b5a528c Florent Chuffart
    if (!is.null(current_nuc)) {
361 7e2d37e1 Florent Chuffart
			llr_score = llr_score_nvecs(list(current_nuc$original_reads,new_nuc$original_reads))
362 7e2d37e1 Florent Chuffart
			llr_scores = c(llr_scores,llr_score)
363 6b5a528c Florent Chuffart
		}
364 7e2d37e1 Florent Chuffart
		# print(paste(llr_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
365 7e2d37e1 Florent Chuffart
		if (is.na(llr_score)) {
366 7e2d37e1 Florent Chuffart
			llr_score = llr_thres + 1
367 6b5a528c Florent Chuffart
		}
368 7e2d37e1 Florent Chuffart
		# Store llr_score
369 7e2d37e1 Florent Chuffart
		new_nuc$llr_score = llr_score
370 7e2d37e1 Florent Chuffart
	  if (llr_score < llr_thres) {
371 6b5a528c Florent Chuffart
      # aggregate to current cluster
372 6b5a528c Florent Chuffart
      #   update bound
373 6b5a528c Florent Chuffart
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
374 55c1cdff Florent Chuffart
        new_cluster$upper_bound = new_nuc$upper_bound
375 6b5a528c Florent Chuffart
      }
376 6b5a528c Florent Chuffart
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
377 55c1cdff Florent Chuffart
        new_cluster$lower_bound = new_nuc$lower_bound
378 6b5a528c Florent Chuffart
      }
379 6b5a528c Florent Chuffart
      #   add nucleosome to current cluster
380 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
381 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
382 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
383 6b5a528c Florent Chuffart
    } else {
384 6b5a528c Florent Chuffart
			if (!is.null(new_cluster)) {
385 6b5a528c Florent Chuffart
        # store old cluster
386 6b5a528c Florent Chuffart
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
387 6b5a528c Florent Chuffart
			}
388 6b5a528c Florent Chuffart
      # Reinit current cluster composition stuff
389 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = 0
390 6b5a528c Florent Chuffart
      nuc_from_track = c()
391 6b5a528c Florent Chuffart
      for (i in 1:length(tf_outs)){
392 6b5a528c Florent Chuffart
        nuc_from_track[i] = FALSE
393 6b5a528c Florent Chuffart
      }
394 55c1cdff Florent Chuffart
      # create new cluster
395 6b5a528c Florent Chuffart
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
396 6b5a528c Florent Chuffart
      # update upper bound
397 6b5a528c Florent Chuffart
      current_upper_bound = new_upper_bound
398 6b5a528c Florent Chuffart
      # add nucleosome to current cluster
399 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
400 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
401 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
402 5bfac5a3 Florent Chuffart
403 6b5a528c Florent Chuffart
		}
404 5bfac5a3 Florent Chuffart
405 6b5a528c Florent Chuffart
		current_nuc = new_nuc
406 6b5a528c Florent Chuffart
407 6b5a528c Florent Chuffart
    # update indexes
408 55c1cdff Florent Chuffart
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {
409 6b5a528c Florent Chuffart
      indexes[current_track] = indexes[current_track] + 1
410 6b5a528c Florent Chuffart
      # update track
411 6b5a528c Florent Chuffart
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
412 6b5a528c Florent Chuffart
    } else {
413 6b5a528c Florent Chuffart
      # update track
414 6b5a528c Florent Chuffart
      track_readers[current_track] = coord_max
415 6b5a528c Florent Chuffart
    }
416 6b5a528c Florent Chuffart
  }
417 6b5a528c Florent Chuffart
  # store last cluster
418 6b5a528c Florent Chuffart
  if (!is.null(new_cluster)) {
419 6b5a528c Florent Chuffart
    # store old cluster
420 6b5a528c Florent Chuffart
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
421 6b5a528c Florent Chuffart
  }
422 7e2d37e1 Florent Chuffart
	return(list(clusters, llr_scores))
423 7e2d37e1 Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed llr scores.
424 5bfac5a3 Florent Chuffart
}, ex=function(){
425 6b5a528c Florent Chuffart
	# Dealing with a region of interest
426 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
427 6b5a528c Florent Chuffart
	samples = list()
428 6b5a528c Florent Chuffart
	for (i in 1:3) {
429 6b5a528c Florent Chuffart
		# Create TF output
430 6b5a528c Florent Chuffart
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
431 6b5a528c Florent Chuffart
		outputs = dfadd(NULL,tf_nuc)
432 6b5a528c Florent Chuffart
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
433 6b5a528c Florent Chuffart
		# Generate corresponding reads
434 6b5a528c Florent Chuffart
		nb_reads = round(runif(1,170,230))
435 6b5a528c Florent Chuffart
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
436 6b5a528c Florent Chuffart
		u_reads = sort(unique(reads))
437 6b5a528c Florent Chuffart
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
438 6b5a528c Florent Chuffart
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
439 6b5a528c Florent Chuffart
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
440 6b5a528c Florent Chuffart
		u_reads = u_reads + shifts
441 55c1cdff Florent Chuffart
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
442 55c1cdff Florent Chuffart
		                         "V2" = u_reads,
443 55c1cdff Florent Chuffart
														 "V3" = strands,
444 6b5a528c Florent Chuffart
														 "V4" = counts), stringsAsFactors=FALSE)
445 6b5a528c Florent Chuffart
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
446 6b5a528c Florent Chuffart
	}
447 6b5a528c Florent Chuffart
	print(aggregate_intra_strain_nucs(samples))
448 6b5a528c Florent Chuffart
})
449 6b5a528c Florent Chuffart
450 7646593d Florent Chuffart
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
451 7646593d Florent Chuffart
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
452 7646593d Florent Chuffart
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
453 4016229d Florent Chuffart
cur_index ##<< the index of the roi involved
454 7646593d Florent Chuffart
) {
455 7646593d Florent Chuffart
	if  (length(partial_strain_maps) == 0 ){
456 4016229d Florent Chuffart
		print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." ))
457 55c1cdff Florent Chuffart
    tmp_strain_maps = list()
458 7646593d Florent Chuffart
	} else {
459 7646593d Florent Chuffart
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
460 7646593d Florent Chuffart
			tmp_nuc = partial_strain_maps[[i]]
461 7646593d Florent Chuffart
			tmp_nuc_as_list = list()
462 7646593d Florent Chuffart
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
463 7646593d Florent Chuffart
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
464 7646593d Florent Chuffart
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
465 4016229d Florent Chuffart
			tmp_nuc_as_list[["cur_index"]] = cur_index
466 7646593d Florent Chuffart
			tmp_nuc_as_list[["index_nuc"]] = i
467 7646593d Florent Chuffart
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
468 7646593d Florent Chuffart
			all_original_reads = c()
469 7646593d Florent Chuffart
			for (j in 1:length(tmp_nuc$nucs)) {
470 7646593d Florent Chuffart
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
471 7646593d Florent Chuffart
			}
472 7646593d Florent Chuffart
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
473 0d5fbf02 Florent Chuffart
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
474 7646593d Florent Chuffart
			if (tmp_nuc$wp) {
475 7e2d37e1 Florent Chuffart
				tmp_nuc_as_list[["llr_1"]] = signif(tmp_nuc$nucs[[2]]$llr_score,5)
476 7e2d37e1 Florent Chuffart
				tmp_nuc_as_list[["llr_2"]] = signif(tmp_nuc$nucs[[3]]$llr_score,5)
477 7646593d Florent Chuffart
			} else {
478 7e2d37e1 Florent Chuffart
				tmp_nuc_as_list[["llr_1"]] = NA
479 7e2d37e1 Florent Chuffart
				tmp_nuc_as_list[["llr_2"]] = NA
480 7646593d Florent Chuffart
			}
481 7646593d Florent Chuffart
      return(tmp_nuc_as_list)
482 7646593d Florent Chuffart
    })
483 7646593d Florent Chuffart
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
484 7646593d Florent Chuffart
	}
485 21b8928f Florent Chuffart
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
486 7646593d Florent Chuffart
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
487 7646593d Florent Chuffart
}
488 6b5a528c Florent Chuffart
489 6b5a528c Florent Chuffart
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
490 6b5a528c Florent Chuffart
### This function aligns nucs between two strains for a given genome region.
491 55c1cdff Florent Chuffart
replicates, ##<< Set of replicates, ideally 3 per strain.
492 6b5a528c Florent Chuffart
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
493 6b5a528c Florent Chuffart
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
494 6b5a528c Florent Chuffart
corr_thres=0.5, ##<< Correlation threshold.
495 7e2d37e1 Florent Chuffart
llr_thres=100, ##<< LOD cut off.
496 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
497 5bfac5a3 Florent Chuffart
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
498 6b5a528c Florent Chuffart
) {
499 6b5a528c Florent Chuffart
	if (length(replicates) < 2) {
500 6b5a528c Florent Chuffart
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
501 6b5a528c Florent Chuffart
	} else if (length(replicates) > 2) {
502 5bfac5a3 Florent Chuffart
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
503 6b5a528c Florent Chuffart
	}
504 6b5a528c Florent Chuffart
	common_nuc = NULL
505 7e2d37e1 Florent Chuffart
	llr_scores = c()
506 6b5a528c Florent Chuffart
	chr = replicates[[1]][[1]]$roi$chr
507 6b5a528c Florent Chuffart
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
508 6b5a528c Florent Chuffart
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
509 6b5a528c Florent Chuffart
	strain_ref1 = replicates[[1]][[1]]$strain
510 6b5a528c Florent Chuffart
	strain_ref2 = replicates[[2]][[1]]$strain
511 4016229d Florent Chuffart
	big_cur = replicates[[1]][[1]]$roi
512 4016229d Florent Chuffart
  orig_big_cur = replicates[[1]][[1]]$orig_roi
513 4016229d Florent Chuffart
	if(big_cur$end - big_cur$begin < 0) {
514 4016229d Florent Chuffart
		tmp_begin = big_cur$begin
515 4016229d Florent Chuffart
		big_cur$begin =  big_cur$end
516 4016229d Florent Chuffart
		big_cur$end =  tmp_begin
517 6b5a528c Florent Chuffart
	}
518 6b5a528c Florent Chuffart
	# GO!
519 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref1)) {
520 6b5a528c Florent Chuffart
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
521 6b5a528c Florent Chuffart
	}
522 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref2)) {
523 6b5a528c Florent Chuffart
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
524 6b5a528c Florent Chuffart
  }
525 6b5a528c Florent Chuffart
	lws = c()
526 6b5a528c Florent Chuffart
	ups = c()
527 6b5a528c Florent Chuffart
	for (na in wp_nucs_strain_ref2) {
528 6b5a528c Florent Chuffart
		lws = c(lws, na$lower_bound)
529 6b5a528c Florent Chuffart
		ups = c(ups, na$upper_bound)
530 6b5a528c Florent Chuffart
	}
531 6b5a528c Florent Chuffart
532 5bfac5a3 Florent Chuffart
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))
533 6b5a528c Florent Chuffart
	roi_strain_ref1 = NULL
534 6b5a528c Florent Chuffart
	roi_strain_ref2 = NULL
535 6b5a528c Florent Chuffart
	if (length(wp_nucs_strain_ref1) > 0) {
536 6b5a528c Florent Chuffart
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
537 6b5a528c Florent Chuffart
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
538 6b5a528c Florent Chuffart
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
539 6b5a528c Florent Chuffart
			# Filtering on Well Positionned
540 883439d6 Florent Chuffart
			if (nuc_strain_ref1$wp) {
541 6b5a528c Florent Chuffart
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
542 4016229d Florent Chuffart
				roi_strain_ref2 = translate_cur(roi_strain_ref1, strain_ref2, big_cur=orig_big_cur, config=config)
543 1d833b97 Florent Chuffart
        if (!is.null(roi_strain_ref2)){
544 6b5a528c Florent Chuffart
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
545 6b5a528c Florent Chuffart
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
546 6b5a528c Florent Chuffart
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
547 6b5a528c Florent Chuffart
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
548 6b5a528c Florent Chuffart
					for (index_nuc_strain_ref2 in matching_nas) {
549 6b5a528c Florent Chuffart
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
550 6b5a528c Florent Chuffart
						# Filtering on Well Positionned
551 55c1cdff Florent Chuffart
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)
552 4016229d Florent Chuffart
						if (!is.null(translate_cur(nuc_strain_ref2_to_roi, strain_ref1, big_cur=orig_big_cur, config=config)) &
553 1e34bb1f Florent Chuffart
                nuc_strain_ref2$wp) {
554 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score and collecting reads
555 6b5a528c Florent Chuffart
							SKIP = FALSE
556 6b5a528c Florent Chuffart
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
557 6b5a528c Florent Chuffart
							reads_strain_ref1 = c()
558 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref1$nucs){
559 5bfac5a3 Florent Chuffart
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)
560 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
561 6b5a528c Florent Chuffart
									SKIP = TRUE
562 6b5a528c Florent Chuffart
								}
563 6b5a528c Florent Chuffart
							}
564 6b5a528c Florent Chuffart
							reads_strain_ref2 = c()
565 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref2$nucs){
566 5bfac5a3 Florent Chuffart
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)
567 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
568 6b5a528c Florent Chuffart
									SKIP = TRUE
569 6b5a528c Florent Chuffart
								}
570 6b5a528c Florent Chuffart
							}
571 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score
572 6b5a528c Florent Chuffart
							if (!SKIP) {
573 6b5a528c Florent Chuffart
								# tranlation of reads into strain 2 coords
574 6b5a528c Florent Chuffart
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
575 6b5a528c Florent Chuffart
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
576 7e2d37e1 Florent Chuffart
								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
577 7e2d37e1 Florent Chuffart
								llr_scores = c(llr_scores, llr_score)
578 5bfac5a3 Florent Chuffart
								# Filtering on LOD Score
579 67157457 Florent Chuffart
                if (llr_score < llr_thres) {
580 6b5a528c Florent Chuffart
									tmp_nuc = list()
581 6b5a528c Florent Chuffart
									# strain_ref1
582 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
583 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
584 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
585 6b5a528c Florent Chuffart
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
586 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
587 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
588 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
589 6b5a528c Florent Chuffart
									# strain_ref2
590 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
591 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
592 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
593 6b5a528c Florent Chuffart
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
594 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
595 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
596 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
597 6b5a528c Florent Chuffart
									# common
598 7e2d37e1 Florent Chuffart
									tmp_nuc[["llr_score"]] = signif(llr_score,5)
599 dadb6a4d Florent Chuffart
                  tmp_nuc[["common_wp_pval"]] = signif(1-pchisq(2*tmp_nuc[["llr_score"]], df=2),5)
600 dadb6a4d Florent Chuffart
									tmp_nuc[["diff"]] = abs(abs((roi_strain_ref2$begin - roi_strain_ref2$end)) / 2 - abs((nuc_strain_ref2$lower_bound - nuc_strain_ref2$upper_bound)) / 2)
601 6b5a528c Florent Chuffart
									common_nuc = dfadd(common_nuc, tmp_nuc)
602 ec2936ea Florent Chuffart
                }
603 6b5a528c Florent Chuffart
							}
604 6b5a528c Florent Chuffart
						}
605 5bfac5a3 Florent Chuffart
					}
606 6b5a528c Florent Chuffart
				} else {
607 6b5a528c Florent Chuffart
		      print("WARNING! No roi for strain ref 2.")
608 6b5a528c Florent Chuffart
			  }
609 6b5a528c Florent Chuffart
		  }
610 6b5a528c Florent Chuffart
		}
611 5bfac5a3 Florent Chuffart
612 67157457 Florent Chuffart
    if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
613 67157457 Florent Chuffart
      index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
614 67157457 Florent Chuffart
      to_remove_list = c()
615 67157457 Florent Chuffart
      for (i in 1:length(index_redundant)) {
616 67157457 Florent Chuffart
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
617 67157457 Florent Chuffart
          to_remove = index_redundant[i]
618 67157457 Florent Chuffart
        }   else {
619 67157457 Florent Chuffart
          to_remove = index_redundant[i] + 1
620 67157457 Florent Chuffart
        }
621 67157457 Florent Chuffart
        to_remove_list = c(to_remove_list, to_remove)
622 67157457 Florent Chuffart
      }
623 67157457 Florent Chuffart
      common_nuc = common_nuc[-to_remove_list,]
624 67157457 Florent Chuffart
    }
625 5bfac5a3 Florent Chuffart
626 67157457 Florent Chuffart
    if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
627 67157457 Florent Chuffart
      index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
628 67157457 Florent Chuffart
      to_remove_list = c()
629 67157457 Florent Chuffart
      for (i in 1:length(index_redundant)) {
630 67157457 Florent Chuffart
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
631 67157457 Florent Chuffart
          to_remove = index_redundant[i]
632 67157457 Florent Chuffart
        }   else {
633 67157457 Florent Chuffart
          to_remove = index_redundant[i] + 1
634 67157457 Florent Chuffart
        }
635 67157457 Florent Chuffart
        to_remove_list = c(to_remove_list, to_remove)
636 67157457 Florent Chuffart
      }
637 67157457 Florent Chuffart
      common_nuc = common_nuc[-to_remove_list,]
638 67157457 Florent Chuffart
    }
639 5bfac5a3 Florent Chuffart
640 7e2d37e1 Florent Chuffart
		return(list(common_nuc, llr_scores))
641 6b5a528c Florent Chuffart
	} else {
642 6b5a528c Florent Chuffart
		print("WARNING, no nucs for strain_ref1.")
643 6b5a528c Florent Chuffart
		return(NULL)
644 6b5a528c Florent Chuffart
	}
645 7e2d37e1 Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed llr scores.
646 5bfac5a3 Florent Chuffart
}, ex=function(){
647 1d833b97 Florent Chuffart
648 4016229d Florent Chuffart
    # Define new translate_cur function...
649 4016229d Florent Chuffart
    translate_cur = function(roi, strain2, big_cur=NULL, config=NULL) {
650 1d833b97 Florent Chuffart
      return(roi)
651 1d833b97 Florent Chuffart
    }
652 1d833b97 Florent Chuffart
    # Binding it by uncomment follwing lines.
653 4016229d Florent Chuffart
    unlockBinding("translate_cur", as.environment("package:nucleominer"))
654 4016229d Florent Chuffart
    unlockBinding("translate_cur", getNamespace("nucleominer"))
655 4016229d Florent Chuffart
    assign("translate_cur", translate_cur, "package:nucleominer")
656 4016229d Florent Chuffart
    assign("translate_cur", translate_cur, getNamespace("nucleominer"))
657 4016229d Florent Chuffart
    lockBinding("translate_cur", getNamespace("nucleominer"))
658 4016229d Florent Chuffart
    lockBinding("translate_cur", as.environment("package:nucleominer"))
659 1d833b97 Florent Chuffart
660 6b5a528c Florent Chuffart
	# Dealing with a region of interest
661 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
662 4016229d Florent Chuffart
	roi2 = translate_cur(roi, roi$strain_ref1)
663 6b5a528c Florent Chuffart
	replicates = list()
664 6b5a528c Florent Chuffart
	for (j in 1:2) {
665 6b5a528c Florent Chuffart
		samples = list()
666 6b5a528c Florent Chuffart
		for (i in 1:3) {
667 6b5a528c Florent Chuffart
			# Create TF output
668 6b5a528c Florent Chuffart
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
669 6b5a528c Florent Chuffart
			outputs = dfadd(NULL,tf_nuc)
670 6b5a528c Florent Chuffart
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
671 6b5a528c Florent Chuffart
			# Generate corresponding reads
672 6b5a528c Florent Chuffart
			nb_reads = round(runif(1,170,230))
673 6b5a528c Florent Chuffart
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
674 6b5a528c Florent Chuffart
			u_reads = sort(unique(reads))
675 6b5a528c Florent Chuffart
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
676 6b5a528c Florent Chuffart
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
677 6b5a528c Florent Chuffart
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
678 6b5a528c Florent Chuffart
			u_reads = u_reads + shifts
679 55c1cdff Florent Chuffart
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
680 55c1cdff Florent Chuffart
			                         "V2" = u_reads,
681 55c1cdff Florent Chuffart
															 "V3" = strands,
682 6b5a528c Florent Chuffart
															 "V4" = counts), stringsAsFactors=FALSE)
683 6b5a528c Florent Chuffart
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
684 6b5a528c Florent Chuffart
		}
685 6b5a528c Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
686 6b5a528c Florent Chuffart
	}
687 6b5a528c Florent Chuffart
	print(align_inter_strain_nucs(replicates))
688 6b5a528c Florent Chuffart
})
689 6b5a528c Florent Chuffart
690 6b5a528c Florent Chuffart
691 6b5a528c Florent Chuffart
692 6b5a528c Florent Chuffart
693 6b5a528c Florent Chuffart
694 6b5a528c Florent Chuffart
695 6b5a528c Florent Chuffart
696 6b5a528c Florent Chuffart
697 6b5a528c Florent Chuffart
698 6b5a528c Florent Chuffart
699 6b5a528c Florent Chuffart
700 6b5a528c Florent Chuffart
701 6b5a528c Florent Chuffart
702 6b5a528c Florent Chuffart
703 6b5a528c Florent Chuffart
704 6b5a528c Florent Chuffart
705 6b5a528c Florent Chuffart
706 6b5a528c Florent Chuffart
707 6b5a528c Florent Chuffart
708 6b5a528c Florent Chuffart
709 6b5a528c Florent Chuffart
710 6b5a528c Florent Chuffart
711 6b5a528c Florent Chuffart
712 6b5a528c Florent Chuffart
713 6b5a528c Florent Chuffart
714 6b5a528c Florent Chuffart
715 6b5a528c Florent Chuffart
fetch_mnase_replicates = function(# Prefetch data
716 6b5a528c Florent Chuffart
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
717 6b5a528c Florent Chuffart
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
718 6b5a528c Florent Chuffart
roi, ##<< Region of interest.
719 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
720 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
721 6b5a528c Florent Chuffart
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
722 6b5a528c Florent Chuffart
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
723 6b5a528c Florent Chuffart
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
724 6b5a528c Florent Chuffart
) {
725 6b5a528c Florent Chuffart
	samples=list()
726 6b5a528c Florent Chuffart
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
727 6b5a528c Florent Chuffart
	for (i in samples_ids) {
728 6b5a528c Florent Chuffart
		sample = as.list(all_samples[all_samples$id==i,])
729 a0b91fee Florent Chuffart
    sample$orig_roi = roi
730 4016229d Florent Chuffart
    sample$roi = translate_cur(roi, sample$strain, config = config)
731 6b5a528c Florent Chuffart
		if (get_genome) {
732 6b5a528c Florent Chuffart
			# Get Genome
733 5bfac5a3 Florent Chuffart
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
734 6b5a528c Florent Chuffart
		}
735 6b5a528c Florent Chuffart
		# Get inputs
736 55c1cdff Florent Chuffart
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
737 5bfac5a3 Florent Chuffart
		sample$total_reads = sum(sample$inputs[,4])
738 6b5a528c Florent Chuffart
		if (!only_fetch) {
739 6b5a528c Florent Chuffart
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
740 6b5a528c Florent Chuffart
	  }
741 6b5a528c Florent Chuffart
	  # Get TF outputs for Mnase_Seq samples
742 5bfac5a3 Florent Chuffart
		if (sample$marker == "Mnase_Seq" & get_ouputs) {
743 a52f85d7 Florent Chuffart
			sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t")
744 6b5a528c Florent Chuffart
			if (!only_fetch) {
745 6b5a528c Florent Chuffart
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
746 6b5a528c Florent Chuffart
  		}
747 6b5a528c Florent Chuffart
		}
748 5bfac5a3 Florent Chuffart
		samples[[length(samples) + 1]] = sample
749 6b5a528c Florent Chuffart
	}
750 6b5a528c Florent Chuffart
  return(samples)
751 6b5a528c Florent Chuffart
}
752 6b5a528c Florent Chuffart
753 6b5a528c Florent Chuffart
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
754 6b5a528c Florent Chuffart
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
755 55c1cdff Florent Chuffart
region1, ##<< Original regions.
756 6b5a528c Florent Chuffart
region2 ##<< Regions to substract.
757 6b5a528c Florent Chuffart
) {
758 6b5a528c Florent Chuffart
  rec_substract_region = function(region1, region2) {
759 6b5a528c Florent Chuffart
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
760 6b5a528c Florent Chuffart
    cur_fuzzy = region1[i,]
761 6b5a528c Florent Chuffart
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
762 6b5a528c Florent Chuffart
    if (length(inter_wp[,1]) > 0) {
763 6b5a528c Florent Chuffart
      ret = c()
764 6b5a528c Florent Chuffart
      for (j in 1:length(inter_wp[,1])) {
765 6b5a528c Florent Chuffart
        cur_wp = inter_wp[j,]
766 6b5a528c Florent Chuffart
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
767 6b5a528c Florent Chuffart
          # remove cur_fuzzy
768 6b5a528c Florent Chuffart
          ret = c()
769 6b5a528c Florent Chuffart
          break
770 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
771 55c1cdff Florent Chuffart
          # crop fuzzy
772 55c1cdff Florent Chuffart
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1
773 6b5a528c Florent Chuffart
          ret = cur_fuzzy
774 6b5a528c Florent Chuffart
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
775 55c1cdff Florent Chuffart
          # crop fuzzy
776 55c1cdff Florent Chuffart
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1
777 6b5a528c Florent Chuffart
          ret = cur_fuzzy
778 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
779 6b5a528c Florent Chuffart
          # split fuzzy
780 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_1 = cur_fuzzy
781 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1
782 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_2 = cur_fuzzy
783 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1
784 6b5a528c Florent Chuffart
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
785 6b5a528c Florent Chuffart
          # print(ret)
786 6b5a528c Florent Chuffart
          # ret = cur_fuzzy
787 6b5a528c Florent Chuffart
          break
788 6b5a528c Florent Chuffart
        } else {
789 6b5a528c Florent Chuffart
          stop("WARNING NO ADAPTED CASE!")
790 55c1cdff Florent Chuffart
        }
791 6b5a528c Florent Chuffart
      }
792 6b5a528c Florent Chuffart
      return(ret)
793 6b5a528c Florent Chuffart
    } else {
794 6b5a528c Florent Chuffart
      return(cur_fuzzy)
795 6b5a528c Florent Chuffart
    }
796 6b5a528c Florent Chuffart
  })
797 6b5a528c Florent Chuffart
  }
798 21b8928f Florent Chuffart
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
799 6b5a528c Florent Chuffart
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
800 b20637ed Florent Chuffart
  tmp_ulist = unlist(non_inter_fuzzy)
801 b20637ed Florent Chuffart
  tmp_names = names(tmp_ulist)[1:4]
802 55c1cdff Florent Chuffart
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE)
803 b20637ed Florent Chuffart
  names(non_inter_fuzzy) = tmp_names
804 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)
805 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)
806 b20637ed Florent Chuffart
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
807 b20637ed Florent Chuffart
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
808 55c1cdff Florent Chuffart
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]
809 b20637ed Florent Chuffart
  return(non_inter_fuzzy)
810 6b5a528c Florent Chuffart
}
811 6b5a528c Florent Chuffart
812 6b5a528c Florent Chuffart
union_regions = function(# Aggregate regions that intersect themnselves.
813 55c1cdff Florent Chuffart
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
814 6b5a528c Florent Chuffart
regions ##<< The Regions to be aggregated
815 6b5a528c Florent Chuffart
) {
816 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {return(regions)}
817 0303dbe8 Florent Chuffart
  if (nrow(regions) == 0) {return(regions)}
818 6b5a528c Florent Chuffart
  old_length = length(regions[,1])
819 6b5a528c Florent Chuffart
  new_length = 0
820 55c1cdff Florent Chuffart
821 6b5a528c Florent Chuffart
  while (old_length != new_length) {
822 6b5a528c Florent Chuffart
    regions = regions[order(regions$lower_bound), ]
823 55c1cdff Florent Chuffart
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE)
824 6b5a528c Florent Chuffart
    vec_end_1 = which(regions$stop)
825 6b5a528c Florent Chuffart
    if (length(vec_end_1) == 0) {
826 55c1cdff Florent Chuffart
      vec_end_1 = c(length(regions$stop))
827 55c1cdff Florent Chuffart
    }
828 6b5a528c Florent Chuffart
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
829 6b5a528c Florent Chuffart
      vec_end_1 = c(vec_end_1, length(regions$stop))
830 6b5a528c Florent Chuffart
    }
831 6b5a528c Florent Chuffart
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
832 6b5a528c Florent Chuffart
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
833 6b5a528c Florent Chuffart
      chr = regions$chr[vec_beg_1[i]]
834 6b5a528c Florent Chuffart
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
835 6b5a528c Florent Chuffart
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
836 4016229d Florent Chuffart
      cur_index = regions$cur_index[vec_beg_1[i]]
837 4016229d Florent Chuffart
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, cur_index=cur_index))
838 6b5a528c Florent Chuffart
      })
839 59ad95ca Florent Chuffart
    union = collapse_regions(union)
840 6b5a528c Florent Chuffart
    old_length = length(regions[,1])
841 6b5a528c Florent Chuffart
    new_length = length(union[,1])
842 6b5a528c Florent Chuffart
    regions = union
843 6b5a528c Florent Chuffart
  }
844 6b5a528c Florent Chuffart
  return(union)
845 6b5a528c Florent Chuffart
}
846 6b5a528c Florent Chuffart
847 0303dbe8 Florent Chuffart
# remove_aligned_wp = function(# Remove wp nucs from common nucs list.
848 0303dbe8 Florent Chuffart
# ### It is based on common wp nucs index on nucs and region.
849 0303dbe8 Florent Chuffart
# strain_maps, ##<< Nuc maps.
850 4016229d Florent Chuffart
# cur_index, ##<< The region of interest index.
851 0303dbe8 Florent Chuffart
# tmp_common_nucs, ##<< the list of wp nucs.
852 0303dbe8 Florent Chuffart
# strain##<< The strain to consider.
853 0303dbe8 Florent Chuffart
# ){
854 0303dbe8 Florent Chuffart
#   fuzzy_nucs = strain_maps[[strain]]
855 4016229d Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$cur_index == cur_index,]
856 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
857 0303dbe8 Florent Chuffart
#   if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
858 0303dbe8 Florent Chuffart
#   if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
859 0303dbe8 Florent Chuffart
#   anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
860 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
861 0303dbe8 Florent Chuffart
#   return(fuzzy_nucs)
862 0303dbe8 Florent Chuffart
# }
863 6b5a528c Florent Chuffart
864 6b5a528c Florent Chuffart
translate_regions = function(# Translate a list of regions from a strain ref to another.
865 4016229d Florent Chuffart
### This function is an eloborated call to translate_cur.
866 55c1cdff Florent Chuffart
regions, ##<< Regions to be translated.
867 6b5a528c Florent Chuffart
combi, ##<< Combination of strains.
868 4016229d Florent Chuffart
cur_index, ##<< The region of interest index.
869 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
870 6b5a528c Florent Chuffart
roi ##<< The region of interest.
871 6b5a528c Florent Chuffart
) {
872 6b5a528c Florent Chuffart
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
873 6b5a528c Florent Chuffart
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
874 4016229d Florent Chuffart
    big_cur =  roi
875 4016229d Florent Chuffart
    trs_tmp_regions_ref2 = translate_cur(tmp_regions_ref2, combi[1], config = config, big_cur = big_cur)
876 5bfac5a3 Florent Chuffart
    if (is.null(trs_tmp_regions_ref2)) {
877 5bfac5a3 Florent Chuffart
      return(NULL)
878 5bfac5a3 Florent Chuffart
    } else {
879 4016229d Florent Chuffart
      return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), cur_index=cur_index)))
880 5bfac5a3 Florent Chuffart
    }
881 5bfac5a3 Florent Chuffart
  })
882 5bfac5a3 Florent Chuffart
883 59ad95ca Florent Chuffart
  return(collapse_regions(tr_regions))
884 59ad95ca Florent Chuffart
}
885 59ad95ca Florent Chuffart
886 59ad95ca Florent Chuffart
collapse_regions = function(# reformat an "apply  manipulated" list of regions
887 59ad95ca Florent Chuffart
### Utils to reformat an "apply  manipulated" list of regions
888 59ad95ca Florent Chuffart
regions ##< a list of regions
889 59ad95ca Florent Chuffart
) {
890 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {
891 5bfac5a3 Florent Chuffart
    return(NULL)
892 5bfac5a3 Florent Chuffart
  } else {
893 5bfac5a3 Florent Chuffart
    regions = do.call(rbind, regions)
894 5bfac5a3 Florent Chuffart
    regions$chr = as.character(regions$chr)
895 5bfac5a3 Florent Chuffart
    regions$chr = as.numeric(regions$chr)
896 5bfac5a3 Florent Chuffart
    regions$lower_bound = as.numeric(regions$lower_bound)
897 5bfac5a3 Florent Chuffart
    regions$upper_bound = as.numeric(regions$upper_bound)
898 5bfac5a3 Florent Chuffart
    regions = regions[order(regions$lower_bound),]
899 5bfac5a3 Florent Chuffart
    return(regions)
900 5bfac5a3 Florent Chuffart
  }
901 6b5a528c Florent Chuffart
}
902 6b5a528c Florent Chuffart
903 6b5a528c Florent Chuffart
904 6b5a528c Florent Chuffart
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
905 4016229d Florent Chuffart
### The fucntion is no more necessary since we remove "big_cur" bug in translate_cur function.
906 6b5a528c Florent Chuffart
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
907 6b5a528c Florent Chuffart
roi, ##<< The region of interest.
908 1d833b97 Florent Chuffart
strain, ##<< The strain to consider.
909 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
910 6b5a528c Florent Chuffart
) {
911 4016229d Florent Chuffart
  tr_roi = translate_cur(roi, strain, config = config)
912 6b5a528c Florent Chuffart
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
913 6b5a528c Florent Chuffart
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
914 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
915 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
916 6b5a528c Florent Chuffart
  }
917 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
918 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
919 6b5a528c Florent Chuffart
  }
920 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
921 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
922 6b5a528c Florent Chuffart
  }
923 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
924 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
925 6b5a528c Florent Chuffart
  }
926 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
927 6b5a528c Florent Chuffart
  return(tmp_fuzzy_nucs)
928 6b5a528c Florent Chuffart
}
929 6b5a528c Florent Chuffart
930 55c1cdff Florent Chuffart
get_all_reads = function(# Retrieve Reads
931 6b5a528c Florent Chuffart
### Retrieve reads for a given marker, combi, form.
932 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
933 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
934 cc54c799 Florent Chuffart
form="wp", ##<< The nuc form to considere.
935 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
936 6b5a528c Florent Chuffart
) {
937 6b5a528c Florent Chuffart
	all_reads = NULL
938 6b5a528c Florent Chuffart
  for (manip in c("Mnase_Seq", marker)) {
939 4016229d Florent Chuffart
    if (form == "unr") {
940 4016229d Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
941 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
942 6b5a528c Florent Chuffart
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
943 6b5a528c Florent Chuffart
      tmp_res$form = form
944 6b5a528c Florent Chuffart
    } else if (form == "wp") {
945 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
946 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
947 6b5a528c Florent Chuffart
      tmp_res$form = form
948 4016229d Florent Chuffart
    } else if (form == "wpunr") {
949 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
950 5bfac5a3 Florent Chuffart
  		tmp_res = read.table(file=out_filename, header=TRUE)
951 6b5a528c Florent Chuffart
      tmp_res$form = "wp"
952 4016229d Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
953 5bfac5a3 Florent Chuffart
  		tmp_res2 = read.table(file=out_filename, header=TRUE)
954 6b5a528c Florent Chuffart
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
955 4016229d Florent Chuffart
      tmp_res2$form = "unr"
956 6b5a528c Florent Chuffart
      tmp_res = rbind(tmp_res, tmp_res2)
957 6b5a528c Florent Chuffart
    }
958 6b5a528c Florent Chuffart
		if (is.null(all_reads)) {
959 6b5a528c Florent Chuffart
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
960 55c1cdff Florent Chuffart
		}
961 6b5a528c Florent Chuffart
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
962 6b5a528c Florent Chuffart
		all_reads = cbind(all_reads, tmp_res)
963 6b5a528c Florent Chuffart
  }
964 5bfac5a3 Florent Chuffart
  return(all_reads)
965 6b5a528c Florent Chuffart
}
966 6b5a528c Florent Chuffart
967 6b5a528c Florent Chuffart
get_design = function(# Build the design for deseq
968 6b5a528c Florent Chuffart
### This function build the design according sample properties.
969 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
970 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
971 6b5a528c Florent Chuffart
all_samples ##<< Global list of samples.
972 6b5a528c Florent Chuffart
) {
973 6b5a528c Florent Chuffart
  off1 = 0
974 6b5a528c Florent Chuffart
  off2 = 0
975 6b5a528c Florent Chuffart
	manips = c("Mnase_Seq", marker)
976 6b5a528c Florent Chuffart
	design_rownames = c()
977 6b5a528c Florent Chuffart
	design_manip = c()
978 6b5a528c Florent Chuffart
	design_strain = c()
979 5bfac5a3 Florent Chuffart
  off2index = function(off) {
980 55c1cdff Florent Chuffart
  	switch(toString(off),
981 6b5a528c Florent Chuffart
  		"1"=c(0,1,1),
982 6b5a528c Florent Chuffart
  	  "2"=c(1,0,1),
983 55c1cdff Florent Chuffart
    	"3"=c(1,1,0),
984 6b5a528c Florent Chuffart
  		c(1,1,1)
985 5bfac5a3 Florent Chuffart
  		)
986 6b5a528c Florent Chuffart
  }
987 6b5a528c Florent Chuffart
	for (manip in manips) {
988 6b5a528c Florent Chuffart
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
989 6b5a528c Florent Chuffart
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
990 6b5a528c Florent Chuffart
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
991 6b5a528c Florent Chuffart
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
992 6b5a528c Florent Chuffart
		} else {
993 6b5a528c Florent Chuffart
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
994 6b5a528c Florent Chuffart
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
995 6b5a528c Florent Chuffart
			}
996 6b5a528c Florent Chuffart
		}
997 5bfac5a3 Florent Chuffart
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))
998 5bfac5a3 Florent Chuffart
		for (strain in combi) {
999 6b5a528c Florent Chuffart
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
1000 6b5a528c Florent Chuffart
			design_strain = c(design_strain, rep(strain, length(cols)))
1001 6b5a528c Florent Chuffart
			design_rownames = c(design_rownames, cols)
1002 6b5a528c Florent Chuffart
		}
1003 6b5a528c Florent Chuffart
	}
1004 6b5a528c Florent Chuffart
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
1005 5bfac5a3 Florent Chuffart
	return(snep_design)
1006 6b5a528c Florent Chuffart
}
1007 6b5a528c Florent Chuffart
1008 6b5a528c Florent Chuffart
plot_dist_samples = function(# Plot the distribution of reads.
1009 6b5a528c Florent Chuffart
### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
1010 6b5a528c Florent Chuffart
strain, ##<< The strain to considere.
1011 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
1012 6b5a528c Florent Chuffart
res, ##<< Data
1013 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
1014 6b5a528c Florent Chuffart
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
1015 5bfac5a3 Florent Chuffart
) {
1016 6b5a528c Florent Chuffart
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
1017 6b5a528c Florent Chuffart
	snepCountTable = res[,cols]
1018 6b5a528c Florent Chuffart
	snepDesign = data.frame(
1019 6b5a528c Florent Chuffart
		row.names = cols,
1020 6b5a528c Florent Chuffart
		manip = rep(marker, length(cols)),
1021 6b5a528c Florent Chuffart
		strain = rep(strain, length(cols))
1022 6b5a528c Florent Chuffart
		)
1023 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
1024 6b5a528c Florent Chuffart
	sizeFactors = estimateSizeFactors(cdsFull)
1025 6b5a528c Florent Chuffart
	# print(sizeFactors[[1]])
1026 6b5a528c Florent Chuffart
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
1027 6b5a528c Florent Chuffart
	if (NEWPLOT) {
1028 5bfac5a3 Florent Chuffart
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))
1029 6b5a528c Florent Chuffart
		NEWPLOT = FALSE
1030 6b5a528c Florent Chuffart
	}
1031 6b5a528c Florent Chuffart
	for (it in 1:length(sample_ids)) {
1032 6b5a528c Florent Chuffart
		sample_id = sample_ids[it]
1033 6b5a528c Florent Chuffart
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1034 6b5a528c Florent Chuffart
	}
1035 7164d3ac Florent Chuffart
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1036 6b5a528c Florent Chuffart
}
1037 6b5a528c Florent Chuffart
1038 6b5a528c Florent Chuffart
analyse_design = function(# Launch deseq methods.
1039 6b5a528c Florent Chuffart
### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
1040 6b5a528c Florent Chuffart
snep_design, ##<< The design to considere.
1041 55c1cdff Florent Chuffart
reads ##<< The data to considere.
1042 6b5a528c Florent Chuffart
) {
1043 6b5a528c Florent Chuffart
	snep_count_table = reads[, rownames(snep_design)]
1044 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1045 6b5a528c Florent Chuffart
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1046 6b5a528c Florent Chuffart
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1047 6b5a528c Florent Chuffart
1048 6b5a528c Florent Chuffart
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1049 6b5a528c Florent Chuffart
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1050 6b5a528c Florent Chuffart
1051 6b5a528c Florent Chuffart
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1052 5bfac5a3 Florent Chuffart
	return(list(fit1, fit0, snep_design, pvalsGLM))
1053 6b5a528c Florent Chuffart
}
1054 6b5a528c Florent Chuffart
1055 6b5a528c Florent Chuffart
1056 6b5a528c Florent Chuffart
1057 6b5a528c Florent Chuffart
1058 6b5a528c Florent Chuffart
1059 6b5a528c Florent Chuffart
1060 6b5a528c Florent Chuffart
1061 6b5a528c Florent Chuffart
1062 6b5a528c Florent Chuffart
get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1063 55c1cdff Florent Chuffart
### This function uses
1064 6b5a528c Florent Chuffart
marker, ##<< The marker involved.
1065 6b5a528c Florent Chuffart
combi, ##<< The strain combination involved.
1066 55c1cdff Florent Chuffart
form, ##<< the nuc form involved.
1067 cc54c799 Florent Chuffart
all_samples, ##<< Global list of samples.
1068 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1069 6b5a528c Florent Chuffart
) {
1070 6b5a528c Florent Chuffart
  # PRETREAT
1071 4016229d Florent Chuffart
  snep_design = get_design(marker, combi, all_samples)
1072 cc54c799 Florent Chuffart
  reads = get_all_reads(marker, combi, form, config=config)
1073 6b5a528c Florent Chuffart
  # RUN ANALYSE
1074 4016229d Florent Chuffart
  tmp_analyse = analyse_design(snep_design, reads)
1075 6b5a528c Florent Chuffart
  # RESULTS
1076 6b5a528c Florent Chuffart
	fit1 = tmp_analyse[[1]]
1077 6b5a528c Florent Chuffart
	fit0 = tmp_analyse[[2]]
1078 6b5a528c Florent Chuffart
  k = names(fit1)
1079 6b5a528c Florent Chuffart
  reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1080 6b5a528c Florent Chuffart
  reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1081 6b5a528c Florent Chuffart
  reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1082 6b5a528c Florent Chuffart
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1083 6b5a528c Florent Chuffart
	snep_design = tmp_analyse[[3]]
1084 4016229d Florent Chuffart
  # print(snep_design)
1085 6b5a528c Florent Chuffart
	fdr = 0.0001
1086 55c1cdff Florent Chuffart
	thres = FDR(reads$pvalsGLM, fdr)
1087 6b5a528c Florent Chuffart
	reads$snep_index = reads$pvalsGLM < thres
1088 6b5a528c Florent Chuffart
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = ""))
1089 6b5a528c Florent Chuffart
  return(reads)
1090 5bfac5a3 Florent Chuffart
  },  ex=function(){
1091 6b5a528c Florent Chuffart
    marker = "H3K4me1"
1092 55c1cdff Florent Chuffart
    combi = c("BY", "YJM")
1093 4016229d Florent Chuffart
    form = "wpunr" # "wp" | "unr" | "wpunr"
1094 6b5a528c Florent Chuffart
    # foo = get_sneps(marker, combi, form)
1095 6b5a528c Florent Chuffart
    # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
1096 6b5a528c Florent Chuffart
})
1097 6b5a528c Florent Chuffart
1098 6b5a528c Florent Chuffart
1099 6b5a528c Florent Chuffart
1100 6b5a528c Florent Chuffart
1101 6b5a528c Florent Chuffart
1102 6b5a528c Florent Chuffart
1103 6b5a528c Florent Chuffart
1104 6b5a528c Florent Chuffart
1105 6b5a528c Florent Chuffart
1106 6b5a528c Florent Chuffart
1107 6b5a528c Florent Chuffart
1108 6b5a528c Florent Chuffart
1109 6b5a528c Florent Chuffart
1110 6b5a528c Florent Chuffart
1111 6b5a528c Florent Chuffart
1112 6b5a528c Florent Chuffart
1113 6b5a528c Florent Chuffart
1114 6b5a528c Florent Chuffart
1115 6b5a528c Florent Chuffart
1116 6b5a528c Florent Chuffart
1117 6b5a528c Florent Chuffart
1118 6b5a528c Florent Chuffart
1119 6b5a528c Florent Chuffart
1120 6b5a528c Florent Chuffart
1121 1d833b97 Florent Chuffart
ROM2ARAB = function(# Roman to Arabic pair list.
1122 1d833b97 Florent Chuffart
### Util to convert Roman to Arabic
1123 1d833b97 Florent Chuffart
){list(
1124 1d833b97 Florent Chuffart
  "I" = 1,
1125 1d833b97 Florent Chuffart
  "II" = 2,
1126 1d833b97 Florent Chuffart
  "III" = 3,
1127 1d833b97 Florent Chuffart
  "IV" = 4,
1128 1d833b97 Florent Chuffart
  "V" = 5,
1129 1d833b97 Florent Chuffart
  "VI" = 6,
1130 1d833b97 Florent Chuffart
  "VII" = 7,
1131 1d833b97 Florent Chuffart
  "VIII" = 8,
1132 1d833b97 Florent Chuffart
  "IX" = 9,
1133 1d833b97 Florent Chuffart
  "X" = 10,
1134 1d833b97 Florent Chuffart
  "XI" = 11,
1135 1d833b97 Florent Chuffart
  "XII" = 12,
1136 1d833b97 Florent Chuffart
  "XIII" = 13,
1137 1d833b97 Florent Chuffart
  "XIV" = 14,
1138 1d833b97 Florent Chuffart
  "XV" = 15,
1139 1d833b97 Florent Chuffart
  "XVI" = 16,
1140 1d833b97 Florent Chuffart
  "XVII" = 17,
1141 1d833b97 Florent Chuffart
  "XVIII" = 18,
1142 1d833b97 Florent Chuffart
  "XIX" = 19,
1143 1d833b97 Florent Chuffart
  "XX" = 20
1144 1d833b97 Florent Chuffart
)}
1145 1d833b97 Florent Chuffart
1146 1d833b97 Florent Chuffart
switch_pairlist = structure(function(# Switch a pairlist
1147 1d833b97 Florent Chuffart
### Take a pairlist key:value and return the switched pairlist value:key.
1148 55c1cdff Florent Chuffart
l ##<< The pairlist to switch.
1149 1d833b97 Florent Chuffart
) {
1150 1d833b97 Florent Chuffart
	ret = list()
1151 1d833b97 Florent Chuffart
	for (name in names(l)) {
1152 1d833b97 Florent Chuffart
		ret[[as.character(l[[name]])]] = name
1153 1d833b97 Florent Chuffart
	}
1154 1d833b97 Florent Chuffart
	ret
1155 1d833b97 Florent Chuffart
### The switched pairlist.
1156 1d833b97 Florent Chuffart
}, ex=function(){
1157 1d833b97 Florent Chuffart
	l = list(key1 = "value1", key2 = "value2")
1158 1d833b97 Florent Chuffart
	print(switch_pairlist(l))
1159 1d833b97 Florent Chuffart
})
1160 1d833b97 Florent Chuffart
1161 1d833b97 Florent Chuffart
ARAB2ROM = function(# Arabic to Roman pair list.
1162 1d833b97 Florent Chuffart
### Util to convert Arabicto Roman
1163 1d833b97 Florent Chuffart
){switch_pairlist(ROM2ARAB())}
1164 1d833b97 Florent Chuffart
1165 1d833b97 Florent Chuffart
1166 1d833b97 Florent Chuffart
1167 1d833b97 Florent Chuffart
1168 5bfac5a3 Florent Chuffart
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1169 5bfac5a3 Florent Chuffart
### This fonction allow to acces to a specific part of the c2c file.
1170 5bfac5a3 Florent Chuffart
strain1, ##<< the key strain
1171 5bfac5a3 Florent Chuffart
strain2, ##<< the target strain
1172 5bfac5a3 Florent Chuffart
chr=NULL, ##<< if defined, the c2c will filtered according to the chromosome value
1173 5bfac5a3 Florent Chuffart
lower_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome upper than lower_bound
1174 5bfac5a3 Florent Chuffart
upper_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome lower than upper_bound
1175 5bfac5a3 Florent Chuffart
config=NULL##<<  GLOBAL config variable
1176 1d833b97 Florent Chuffart
) {
1177 1d833b97 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1178 1d833b97 Florent Chuffart
	# Launch c2c file
1179 1d833b97 Florent Chuffart
	if (reverse) {
1180 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]
1181 1d833b97 Florent Chuffart
	} else {
1182 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]
1183 1d833b97 Florent Chuffart
	}
1184 5bfac5a3 Florent Chuffart
	c2c = get_content(c2c_filename, "table", stringsAsFactors=FALSE)
1185 5bfac5a3 Florent Chuffart
  # Filtering unagapped
1186 1d833b97 Florent Chuffart
  c2c = c2c[c2c$V6=="-",]
1187 1d833b97 Florent Chuffart
	# Reverse
1188 1d833b97 Florent Chuffart
	if (reverse) {
1189 1d833b97 Florent Chuffart
		tmp_col = c2c$V1
1190 1d833b97 Florent Chuffart
		c2c$V1 = c2c$V7
1191 1d833b97 Florent Chuffart
		c2c$V7 = tmp_col
1192 1d833b97 Florent Chuffart
		tmp_col = c2c$V2
1193 1d833b97 Florent Chuffart
		c2c$V2 = c2c$V9
1194 1d833b97 Florent Chuffart
		c2c$V9 = tmp_col
1195 1d833b97 Florent Chuffart
		tmp_col = c2c$V3
1196 1d833b97 Florent Chuffart
		c2c$V3 = c2c$V10
1197 1d833b97 Florent Chuffart
		c2c$V10 = tmp_col
1198 1d833b97 Florent Chuffart
	}
1199 5bfac5a3 Florent Chuffart
  if (!is.null(chr)) {
1200 5bfac5a3 Florent Chuffart
  	if (strain1 == "BY") {
1201 5bfac5a3 Florent Chuffart
  		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1202 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "RM") {
1203 5bfac5a3 Florent Chuffart
  	  chro_1 = paste("supercontig_1.",chr,sep="")
1204 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "YJM") {
1205 5bfac5a3 Florent Chuffart
  	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1206 5bfac5a3 Florent Chuffart
  	}
1207 5bfac5a3 Florent Chuffart
  	c2c = c2c[c2c$V1 == chro_1,]
1208 5bfac5a3 Florent Chuffart
    if (!is.null(lower_bound)) {
1209 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound}
1210 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound}      
1211 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1212 5bfac5a3 Florent Chuffart
    }
1213 5bfac5a3 Florent Chuffart
    if (!is.null(upper_bound)) {
1214 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound}
1215 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound}      
1216 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1217 5bfac5a3 Florent Chuffart
    }
1218 5bfac5a3 Florent Chuffart
  }
1219 5bfac5a3 Florent Chuffart
  return(c2c)
1220 5bfac5a3 Florent Chuffart
# It retruns the appropriate c2c file part.
1221 5bfac5a3 Florent Chuffart
}
1222 5bfac5a3 Florent Chuffart
1223 4016229d Florent Chuffart
translate_cur = structure(function(# Translate coords of a genome region.
1224 5bfac5a3 Florent Chuffart
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1225 5bfac5a3 Florent Chuffart
roi, ##<< Original genome region of interest.
1226 5bfac5a3 Florent Chuffart
strain2, ##<< The strain in wich you want the genome region of interest.
1227 5bfac5a3 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
1228 4016229d Florent Chuffart
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1229 5bfac5a3 Florent Chuffart
) {
1230 5bfac5a3 Florent Chuffart
	strain1 = roi$strain_ref
1231 5bfac5a3 Florent Chuffart
	if (strain1 == strain2) {
1232 5bfac5a3 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1233 5bfac5a3 Florent Chuffart
		return(roi)
1234 5bfac5a3 Florent Chuffart
	}
1235 5bfac5a3 Florent Chuffart
1236 5bfac5a3 Florent Chuffart
	# Extract c2c file
1237 4016229d Florent Chuffart
	if (!is.null(big_cur)) {
1238 4016229d Florent Chuffart
  	# Dealing with big_cur
1239 4016229d Florent Chuffart
		if (roi$strain_ref != big_cur$strain_ref) {
1240 4016229d Florent Chuffart
      big_cur = translate_cur(big_cur, roi$strain_ref, config=config)
1241 62d69d53 Florent Chuffart
    }
1242 4016229d Florent Chuffart
    if (big_cur$end < big_cur$begin) {
1243 4016229d Florent Chuffart
      tmp_var = big_cur$begin
1244 4016229d Florent Chuffart
      big_cur$begin = big_cur$end
1245 4016229d Florent Chuffart
      big_cur$end = tmp_var
1246 4016229d Florent Chuffart
      big_cur$length = big_cur$end - big_cur$begin + 1
1247 55c1cdff Florent Chuffart
    }
1248 4016229d Florent Chuffart
    if (big_cur$chr!=roi$chr | roi$end > big_cur$end | roi$end < big_cur$begin | roi$begin > big_cur$end | roi$begin < big_cur$begin) {
1249 4016229d Florent Chuffart
      print("WARNING! Trying to translate a roi not included in a big_cur.")
1250 ded736c8 Florent Chuffart
      return(NULL)
1251 ded736c8 Florent Chuffart
    }
1252 4016229d Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, big_cur$chr, big_cur$begin, big_cur$end, config=config)
1253 5bfac5a3 Florent Chuffart
  } else {
1254 4016229d Florent Chuffart
    # No big_cur
1255 5bfac5a3 Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, roi$chr, config=config)    
1256 5bfac5a3 Florent Chuffart
  }
1257 5bfac5a3 Florent Chuffart
  
1258 1d833b97 Florent Chuffart
  #	Convert initial roi$chr into c2c format
1259 5bfac5a3 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1260 1d833b97 Florent Chuffart
	begin_1 = roi$begin
1261 1d833b97 Florent Chuffart
  end_1 = roi$end
1262 5bfac5a3 Florent Chuffart
  if (reverse) {
1263 5bfac5a3 Florent Chuffart
  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1264 5bfac5a3 Florent Chuffart
    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1265 1d833b97 Florent Chuffart
	} else {
1266 5bfac5a3 Florent Chuffart
		tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1267 5bfac5a3 Florent Chuffart
	  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1268 1d833b97 Florent Chuffart
	}
1269 5bfac5a3 Florent Chuffart
1270 5bfac5a3 Florent Chuffart
	# Never happend conditions ...
1271 1d833b97 Florent Chuffart
	{
1272 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) == 0) {
1273 1d833b97 Florent Chuffart
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1274 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1275 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2>=begin_1,]
1276 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V2)
1277 54cc8283 Florent Chuffart
      } else {
1278 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3>=begin_1,]
1279 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V3)
1280 54cc8283 Florent Chuffart
      }
1281 1d833b97 Florent Chuffart
			if (reverse) {
1282 5bfac5a3 Florent Chuffart
		  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1283 1d833b97 Florent Chuffart
			} else {
1284 5bfac5a3 Florent Chuffart
				tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1285 1d833b97 Florent Chuffart
			}
1286 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) == 0) {
1287 4016229d Florent Chuffart
				if (!is.null(big_cur)) {
1288 1d833b97 Florent Chuffart
					return(NULL)
1289 4016229d Florent Chuffart
					tmptransfostart = c2c[c2c$V3>=big_cur$begin & c2c$V2<=big_cur$begin,]
1290 55c1cdff Florent Chuffart
				} else {
1291 1d833b97 Florent Chuffart
					print(tmptransfostart)
1292 1d833b97 Florent Chuffart
					print(tmptransfostop)
1293 5bfac5a3 Florent Chuffart
					stop("Never happend condition 1.")
1294 1d833b97 Florent Chuffart
				}
1295 1d833b97 Florent Chuffart
			}
1296 55c1cdff Florent Chuffart
		}
1297 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) == 0) {
1298 1d833b97 Florent Chuffart
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1299 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1300 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3<=end_1,]
1301 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V3)
1302 54cc8283 Florent Chuffart
      } else {
1303 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2<=end_1,]
1304 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V2)
1305 54cc8283 Florent Chuffart
      }
1306 1d833b97 Florent Chuffart
			if (reverse) {
1307 5bfac5a3 Florent Chuffart
		    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1308 1d833b97 Florent Chuffart
			} else {
1309 5bfac5a3 Florent Chuffart
			  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1310 1d833b97 Florent Chuffart
			}
1311 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) == 0) {
1312 4016229d Florent Chuffart
				if (!is.null(big_cur)) {
1313 1d833b97 Florent Chuffart
					return(NULL)
1314 4016229d Florent Chuffart
				  tmptransfostop = c2c[c2c$V3>=big_cur$end & c2c$V2<=big_cur$end,]
1315 1d833b97 Florent Chuffart
				} else {
1316 1d833b97 Florent Chuffart
					print(tmptransfostart)
1317 1d833b97 Florent Chuffart
					print(tmptransfostop)
1318 1d833b97 Florent Chuffart
					stop("Never happend condition 2.")
1319 55c1cdff Florent Chuffart
				}
1320 55c1cdff Florent Chuffart
			}
1321 1d833b97 Florent Chuffart
		}
1322 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) != 1) {
1323 1d833b97 Florent Chuffart
			# print("many start")
1324 5bfac5a3 Florent Chuffart
			tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1325 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) != 1) {
1326 1d833b97 Florent Chuffart
				print(tmptransfostart)
1327 1d833b97 Florent Chuffart
				print(tmptransfostop)
1328 1d833b97 Florent Chuffart
  			stop("Never happend condition 3.")
1329 1d833b97 Florent Chuffart
			}
1330 1d833b97 Florent Chuffart
		}
1331 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) != 1) {
1332 1d833b97 Florent Chuffart
			# print("many stop")
1333 5bfac5a3 Florent Chuffart
		  tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1334 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) != 1) {
1335 1d833b97 Florent Chuffart
				print(tmptransfostart)
1336 1d833b97 Florent Chuffart
				print(tmptransfostop)
1337 1d833b97 Florent Chuffart
  			stop("Never happend condition 4.")
1338 1d833b97 Florent Chuffart
			}
1339 5bfac5a3 Florent Chuffart
		}
1340 1d833b97 Florent Chuffart
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1341 1d833b97 Florent Chuffart
			print(tmptransfostart)
1342 5bfac5a3 Florent Chuffart
			print(tmptransfostop)
1343 1d833b97 Florent Chuffart
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1344 1d833b97 Florent Chuffart
		}
1345 55c1cdff Florent Chuffart
	}
1346 5bfac5a3 Florent Chuffart
  
1347 1d833b97 Florent Chuffart
  # Deal with strand
1348 1d833b97 Florent Chuffart
  if (tmptransfostart$V8 == 1) {
1349 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1350 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1351 1d833b97 Florent Chuffart
  } else {
1352 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1353 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1354 1d833b97 Florent Chuffart
  }
1355 1d833b97 Florent Chuffart
	# Build returned roi
1356 1d833b97 Florent Chuffart
	roi$strain_ref = strain2
1357 1d833b97 Florent Chuffart
	if (roi$strain_ref == "BY") {
1358 1d833b97 Florent Chuffart
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1359 1d833b97 Florent Chuffart
	} else {
1360 1d833b97 Florent Chuffart
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1361 1d833b97 Florent Chuffart
	}
1362 1d833b97 Florent Chuffart
  roi$begin = begin_2
1363 1d833b97 Florent Chuffart
  roi$end = end_2
1364 1d833b97 Florent Chuffart
	if (sign(roi$end - roi$begin) == 0) {
1365 5bfac5a3 Florent Chuffart
		roi$length = 1
1366 5bfac5a3 Florent Chuffart
	} else {
1367 5bfac5a3 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1368 1d833b97 Florent Chuffart
	}
1369 55c1cdff Florent Chuffart
  return(roi)
1370 1d833b97 Florent Chuffart
}, ex=function(){
1371 4016229d Florent Chuffart
	# Define new translate_cur function...
1372 4016229d Florent Chuffart
	translate_cur = function(roi, strain2, config) {
1373 1d833b97 Florent Chuffart
		strain1 = roi$strain_ref
1374 1d833b97 Florent Chuffart
		if (strain1 == strain2) {
1375 1d833b97 Florent Chuffart
			return(roi)
1376 1d833b97 Florent Chuffart
		} else {
1377 4016229d Florent Chuffart
		  stop("Here is my new translate_cur function...")
1378 5bfac5a3 Florent Chuffart
		}
1379 1d833b97 Florent Chuffart
	}
1380 1d833b97 Florent Chuffart
	# Binding it by uncomment follwing lines.
1381 4016229d Florent Chuffart
	# unlockBinding("translate_cur", as.environment("package:nm"))
1382 4016229d Florent Chuffart
	# unlockBinding("translate_cur", getNamespace("nm"))
1383 4016229d Florent Chuffart
	# assign("translate_cur", translate_cur, "package:nm")
1384 4016229d Florent Chuffart
	# assign("translate_cur", translate_cur, getNamespace("nm"))
1385 4016229d Florent Chuffart
	# lockBinding("translate_cur", getNamespace("nm"))
1386 4016229d Florent Chuffart
	# lockBinding("translate_cur", as.environment("package:nm"))
1387 1d833b97 Florent Chuffart
})
1388 1d833b97 Florent Chuffart
1389 1d833b97 Florent Chuffart
1390 5bfac5a3 Florent Chuffart
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1391 5bfac5a3 Florent Chuffart
### CURs are regions that can be aligned between the genomes
1392 5bfac5a3 Florent Chuffart
diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
1393 5bfac5a3 Florent Chuffart
min_cur_width = 4000, ##<< The minimum width of a CUR
1394 5bfac5a3 Florent Chuffart
config = NULL ##<< GLOBAL config variable
1395 5bfac5a3 Florent Chuffart
) {
1396 1d833b97 Florent Chuffart
1397 5bfac5a3 Florent Chuffart
  check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
1398 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1399 5bfac5a3 Florent Chuffart
    check_homogeneity(c2c)
1400 5bfac5a3 Florent Chuffart
  	if (length(c2c[,1]) == 0 ) {
1401 5bfac5a3 Florent Chuffart
      stop("WARNING! checking overlapping for a region corresponding to an empty c2c.")
1402 5bfac5a3 Florent Chuffart
    } else {
1403 5bfac5a3 Florent Chuffart
  		lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)})
1404 5bfac5a3 Florent Chuffart
  		upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)})
1405 5bfac5a3 Florent Chuffart
  		tmp_index = order(lower_bounds)
1406 5bfac5a3 Florent Chuffart
      lower_bounds = lower_bounds[tmp_index]
1407 5bfac5a3 Florent Chuffart
      upper_bounds = upper_bounds[tmp_index]
1408 5bfac5a3 Florent Chuffart
      tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)]
1409 5bfac5a3 Florent Chuffart
      ov_index = which(tmp_diff < 0)
1410 5bfac5a3 Florent Chuffart
      if(length(ov_index < 0) !=0 ) {
1411 5bfac5a3 Florent Chuffart
        ov_index = ov_index[1]
1412 5bfac5a3 Florent Chuffart
        print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep=""))
1413 5bfac5a3 Florent Chuffart
        c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),]
1414 5bfac5a3 Florent Chuffart
        print(c2c_corrupted)
1415 5bfac5a3 Florent Chuffart
        return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1))
1416 5bfac5a3 Florent Chuffart
      }
1417 5bfac5a3 Florent Chuffart
      return(NULL)
1418 5bfac5a3 Florent Chuffart
    }
1419 5bfac5a3 Florent Chuffart
  }
1420 1d833b97 Florent Chuffart
1421 5bfac5a3 Florent Chuffart
  check_homogeneity = function(sub_c2c) {
1422 5bfac5a3 Florent Chuffart
    tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3)
1423 5bfac5a3 Florent Chuffart
    tmp_signs = tmp_signs[tmp_signs != 0]
1424 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs[1]  != tmp_signs)) {
1425 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign)! ********************"))
1426 5bfac5a3 Florent Chuffart
      print(tmp_signs)
1427 5bfac5a3 Florent Chuffart
  	}
1428 5bfac5a3 Florent Chuffart
    tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10)
1429 5bfac5a3 Florent Chuffart
    tmp_signs2 = tmp_signs2[tmp_signs2 != 0]
1430 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs2[1]  != tmp_signs2)) {
1431 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign2)! ********************"))
1432 5bfac5a3 Florent Chuffart
      print(tmp_signs2)
1433 5bfac5a3 Florent Chuffart
  	}
1434 5bfac5a3 Florent Chuffart
  	if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1435 5bfac5a3 Florent Chuffart
  		print("*************** ERROR, non homogenous region chrs or V8! ********************")
1436 5bfac5a3 Florent Chuffart
  	}
1437 5bfac5a3 Florent Chuffart
  }
1438 1d833b97 Florent Chuffart
1439 5bfac5a3 Florent Chuffart
  test_and_squeeze_rois = function(foo, config=NULL) {
1440 5bfac5a3 Florent Chuffart
    is_it_ok = function(list1, list2) {
1441 5bfac5a3 Florent Chuffart
      bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length))
1442 5bfac5a3 Florent Chuffart
      ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0
1443 5bfac5a3 Florent Chuffart
      if (!ok) {
1444 5bfac5a3 Florent Chuffart
        print(bar[bar[,3] != 0 | bar[,6] != 0, ])
1445 5bfac5a3 Florent Chuffart
      }
1446 5bfac5a3 Florent Chuffart
      return (ok)
1447 5bfac5a3 Florent Chuffart
    }
1448 5bfac5a3 Florent Chuffart
    squeeze_rois = function(list1, list2) {
1449 5bfac5a3 Florent Chuffart
      rois = apply(t(1:nrow(list1)), 2, function(i){
1450 5bfac5a3 Florent Chuffart
        roi = list1[i,]
1451 5bfac5a3 Florent Chuffart
        roi2 = list2[i,]
1452 5bfac5a3 Florent Chuffart
        roi$begin = max(roi$begin, roi2$begin)
1453 5bfac5a3 Florent Chuffart
        roi$end = min(roi$end, roi2$end)
1454 5bfac5a3 Florent Chuffart
        roi$length =  roi$end - roi$begin + 1
1455 5bfac5a3 Florent Chuffart
        return(roi)
1456 5bfac5a3 Florent Chuffart
      })
1457 5bfac5a3 Florent Chuffart
      return(do.call(rbind, rois))
1458 5bfac5a3 Florent Chuffart
    }
1459 5bfac5a3 Florent Chuffart
    # foo_orig = compute_inter_all_strain_curs2(config=config)
1460 5bfac5a3 Florent Chuffart
    # foo = foo_orig
1461 5bfac5a3 Florent Chuffart
    STOP = FALSE
1462 5bfac5a3 Florent Chuffart
    nb_round = 0
1463 5bfac5a3 Florent Chuffart
    while(!STOP) {
1464 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1465 5bfac5a3 Florent Chuffart
      print(paste("2-2 round #", nb_round, sep=""))
1466 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1467 4016229d Florent Chuffart
      fooyjm = translate_curs(foo, "YJM", config=config)
1468 4016229d Florent Chuffart
      fooyjmby = translate_curs(fooyjm, "BY", config=config)
1469 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, fooyjmby)) {
1470 5bfac5a3 Florent Chuffart
        print("case 1")
1471 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, fooyjmby)
1472 5bfac5a3 Florent Chuffart
    		next
1473 5bfac5a3 Florent Chuffart
      }
1474 4016229d Florent Chuffart
      foorm = translate_curs(foo, "RM", config=config)
1475 4016229d Florent Chuffart
      foormby = translate_curs(foorm, "BY", config=config)
1476 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foormby)) {
1477 5bfac5a3 Florent Chuffart
        print("case 2")
1478 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foormby)
1479 5bfac5a3 Florent Chuffart
    		next
1480 5bfac5a3 Florent Chuffart
      }
1481 4016229d Florent Chuffart
      fooyjmrm = translate_curs(fooyjm, "RM", config=config)
1482 4016229d Florent Chuffart
      fooyjmrmyjm = translate_curs(fooyjmrm, "YJM", config=config)
1483 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooyjm, fooyjmrmyjm)) {
1484 5bfac5a3 Florent Chuffart
        print("case 3")
1485 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooyjm, fooyjmrmyjm)
1486 5bfac5a3 Florent Chuffart
        next
1487 5bfac5a3 Florent Chuffart
      }
1488 4016229d Florent Chuffart
      foormyjm = translate_curs(foorm, "YJM", config=config)
1489 4016229d Florent Chuffart
      foormyjmrm = translate_curs(foormyjm, "RM", config=config)
1490 5bfac5a3 Florent Chuffart
      if (!is_it_ok(foorm, foormyjmrm)) {
1491 5bfac5a3 Florent Chuffart
        print("case 4")
1492 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(foorm, foormyjmrm)
1493 5bfac5a3 Florent Chuffart
        next
1494 5bfac5a3 Florent Chuffart
      }
1495 4016229d Florent Chuffart
      foo = translate_curs(foo, "BY", config=config)
1496 5bfac5a3 Florent Chuffart
      STOP = TRUE
1497 5bfac5a3 Florent Chuffart
    }
1498 5bfac5a3 Florent Chuffart
    STOP = FALSE
1499 5bfac5a3 Florent Chuffart
    nb_round = 0
1500 5bfac5a3 Florent Chuffart
    while(!STOP) {
1501 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1502 5bfac5a3 Florent Chuffart
      print(paste("3-3 round #", nb_round, sep=""))
1503 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1504 4016229d Florent Chuffart
      foobyrm = translate_curs(fooby, "RM", config=config)
1505 4016229d Florent Chuffart
      foobyrmyjm = translate_curs(foobyrm, "YJM", config=config)
1506 4016229d Florent Chuffart
      foobyrmyjmby = translate_curs(foobyrmyjm, "BY", config=config)
1507 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyrmyjmby)) {
1508 5bfac5a3 Florent Chuffart
        print("case 1")
1509 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyrmyjmby)
1510 5bfac5a3 Florent Chuffart
      }
1511 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1512 4016229d Florent Chuffart
      foobyyjm = translate_curs(fooby, "YJM", config=config)
1513 4016229d Florent Chuffart
      foobyyjmrm = translate_curs(foobyyjm, "RM", config=config)
1514 4016229d Florent Chuffart
      foobyyjmrmby = translate_curs(foobyyjmrm, "BY", config=config)
1515 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyyjmrmby)) {
1516 5bfac5a3 Florent Chuffart
        print("case 2")
1517 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyyjmrmby)
1518 5bfac5a3 Florent Chuffart
        next
1519 5bfac5a3 Florent Chuffart
      }
1520 4016229d Florent Chuffart
      foo = translate_curs(foo, "BY", config=config)
1521 5bfac5a3 Florent Chuffart
      STOP = TRUE
1522 5bfac5a3 Florent Chuffart
    }
1523 5bfac5a3 Florent Chuffart
    print("end")
1524 5bfac5a3 Florent Chuffart
    return(foo)
1525 5bfac5a3 Florent Chuffart
  }
1526 1d833b97 Florent Chuffart
1527 5bfac5a3 Florent Chuffart
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) {
1528 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, config=config)
1529 5bfac5a3 Florent Chuffart
    # computing diffs
1530 55c1cdff Florent Chuffart
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
1531 55c1cdff Florent Chuffart
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
1532 1d833b97 Florent Chuffart
    # Filtering
1533 1d833b97 Florent Chuffart
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1534 1d833b97 Florent Chuffart
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1535 5bfac5a3 Florent Chuffart
    rois = apply(t(1:length(indexes_start)), 2, function(i) {
1536 5bfac5a3 Florent Chuffart
      if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start)))
1537 5bfac5a3 Florent Chuffart
      returned_rois = NULL
1538 1d833b97 Florent Chuffart
  		start = indexes_start[i]
1539 1d833b97 Florent Chuffart
  		stop = indexes_stop[i]
1540 1d833b97 Florent Chuffart
  		sub_c2c = c2c[start:stop,]
1541 5bfac5a3 Florent Chuffart
      check_homogeneity(sub_c2c)
1542 1d833b97 Florent Chuffart
  		if (strain1 == "BY") {
1543 1d833b97 Florent Chuffart
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1544 5bfac5a3 Florent Chuffart
  		} else {
1545 1d833b97 Florent Chuffart
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1546 1d833b97 Florent Chuffart
  		}
1547 5bfac5a3 Florent Chuffart
  		roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1)
1548 5bfac5a3 Florent Chuffart
  		roi$length = roi$end - roi$begin + 1
1549 1d833b97 Florent Chuffart
  		if (roi$length >= min_cur_width) {
1550 5bfac5a3 Florent Chuffart
  			lower_bound = roi$begin
1551 5bfac5a3 Florent Chuffart
  			upper_bound = roi$end
1552 5bfac5a3 Florent Chuffart
        check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1553 5bfac5a3 Florent Chuffart
        while(!is.null(check)) {
1554 5bfac5a3 Florent Chuffart
          # print(check)
1555 5bfac5a3 Florent Chuffart
      		roi1 = roi
1556 5bfac5a3 Florent Chuffart
          roi1$end = check[[1]]
1557 5bfac5a3 Florent Chuffart
      		roi1$length = roi1$end - roi1$begin + 1
1558 5bfac5a3 Florent Chuffart
      		if (roi1$length >= min_cur_width) {
1559 5bfac5a3 Florent Chuffart
            returned_rois = dfadd(returned_rois,roi1)
1560 5bfac5a3 Florent Chuffart
          }
1561 5bfac5a3 Florent Chuffart
          roi$begin = check[[2]]
1562 5bfac5a3 Florent Chuffart
      		roi$length = roi$end - roi$begin + 1
1563 5bfac5a3 Florent Chuffart
      		if (roi$length >= min_cur_width) {
1564 5bfac5a3 Florent Chuffart
      			lower_bound = min(roi$begin, roi$end)
1565 5bfac5a3 Florent Chuffart
      			upper_bound = max(roi$begin, roi$end)
1566 5bfac5a3 Florent Chuffart
            check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1567 5bfac5a3 Florent Chuffart
          } else {
1568 5bfac5a3 Florent Chuffart
            check = NULL
1569 5bfac5a3 Florent Chuffart
            roi = NULL
1570 5bfac5a3 Florent Chuffart
          }
1571 5bfac5a3 Florent Chuffart
        }
1572 5bfac5a3 Florent Chuffart
        returned_rois = dfadd(returned_rois,roi)
1573 1d833b97 Florent Chuffart
  	  }
1574 5bfac5a3 Florent Chuffart
    })
1575 5bfac5a3 Florent Chuffart
    rois = do.call(rbind,rois)
1576 5bfac5a3 Florent Chuffart
    rois = rois[order(as.numeric(rois$chr), rois$begin), ]
1577 1d833b97 Florent Chuffart
  	return(rois)
1578 1d833b97 Florent Chuffart
  }
1579 1d833b97 Florent Chuffart
1580 4016229d Florent Chuffart
  translate_curs = function(rois, target_strain, config) {
1581 5bfac5a3 Florent Chuffart
    tr_rois = apply(t(1:nrow(rois)), 2, function(i){
1582 5bfac5a3 Florent Chuffart
      roi = rois[i,]
1583 4016229d Florent Chuffart
      tr_roi = translate_cur(roi, target_strain, config=config)  
1584 5bfac5a3 Florent Chuffart
      tmp_begin = min(tr_roi$begin, tr_roi$end)
1585 5bfac5a3 Florent Chuffart
      tmp_end = max(tr_roi$begin, tr_roi$end)
1586 5bfac5a3 Florent Chuffart
      tr_roi$begin = tmp_begin
1587 5bfac5a3 Florent Chuffart
      tr_roi$end = tmp_end
1588 5bfac5a3 Florent Chuffart
      tr_roi$length =  tr_roi$end - tr_roi$begin + 1
1589 5bfac5a3 Florent Chuffart
      return(tr_roi)
1590 5bfac5a3 Florent Chuffart
    })
1591 5bfac5a3 Florent Chuffart
    tr_rois = do.call(rbind, tr_rois)
1592 5bfac5a3 Florent Chuffart
    return(tr_rois)
1593 5bfac5a3 Florent Chuffart
  }
1594 729c934e Florent Chuffart
1595 5bfac5a3 Florent Chuffart
  combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"))
1596 5bfac5a3 Florent Chuffart
  rois = list()
1597 5bfac5a3 Florent Chuffart
  for (combi in combis) {
1598 5bfac5a3 Florent Chuffart
    strain1 = combi[1]
1599 5bfac5a3 Florent Chuffart
    strain2 = combi[2]
1600 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1601 5bfac5a3 Florent Chuffart
    rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1602 5bfac5a3 Florent Chuffart
    strain1 = combi[2]
1603 5bfac5a3 Florent Chuffart
    strain2 = combi[1]
1604 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1605 5bfac5a3 Florent Chuffart
    rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1606 4016229d Florent Chuffart
    tr_rois_rev = translate_curs(rois_rev, combi[1], config)      
1607 5bfac5a3 Florent Chuffart
    region1 = rois_fwd
1608 5bfac5a3 Florent Chuffart
    region2 = tr_rois_rev
1609 5bfac5a3 Florent Chuffart
    rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev)
1610 5bfac5a3 Florent Chuffart
  }
1611 5bfac5a3 Florent Chuffart
  reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
1612 5bfac5a3 Florent Chuffart
  reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
1613 4016229d Florent Chuffart
  tr_reducted_1_rois = translate_curs(reducted_1_rois, "RM", config)  
1614 5bfac5a3 Florent Chuffart
  reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]])
1615 5bfac5a3 Florent Chuffart
  reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ]
1616 4016229d Florent Chuffart
  reducted_rois = translate_curs(reducted_2_rois, "BY", config)  
1617 5bfac5a3 Florent Chuffart
  reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
1618 5bfac5a3 Florent Chuffart
  squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
1619 5bfac5a3 Florent Chuffart
  return (squeezed_rois)
1620 1d833b97 Florent Chuffart
}
1621 1d833b97 Florent Chuffart
1622 5bfac5a3 Florent Chuffart
intersect_region = function(# Returns the intersection of 2 list on regions.
1623 5bfac5a3 Florent Chuffart
### This function...
1624 5bfac5a3 Florent Chuffart
region1, ##<< Original regions.
1625 5bfac5a3 Florent Chuffart
region2 ##<< Regions to intersect.
1626 5bfac5a3 Florent Chuffart
) {
1627 5bfac5a3 Florent Chuffart
  intersection = apply(t(1:nrow(region1)), 2, function(i) {
1628 5bfac5a3 Florent Chuffart
    roi1 = region1[i, ]
1629 5bfac5a3 Florent Chuffart
    sub_regions2 = region2[region2$chr == roi1$chr, ]
1630 5bfac5a3 Florent Chuffart
    sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end |
1631 5bfac5a3 Florent Chuffart
                                roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end |
1632 5bfac5a3 Florent Chuffart
                                sub_regions2$begin < roi1$begin  & roi1$end < sub_regions2$end
1633 5bfac5a3 Florent Chuffart
                                , ]
1634 5bfac5a3 Florent Chuffart
    if (nrow(sub_regions2) == 0) {
1635 5bfac5a3 Florent Chuffart
      print("removing a region")
1636 5bfac5a3 Florent Chuffart
      return(NULL)
1637 5bfac5a3 Florent Chuffart
    } else if (nrow(sub_regions2) > 1) {
1638 5bfac5a3 Florent Chuffart
      print("more than one region in intersect_region")          
1639 5bfac5a3 Florent Chuffart
      return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])})))
1640 5bfac5a3 Florent Chuffart
    } else {
1641 5bfac5a3 Florent Chuffart
      roi2 = sub_regions2[1,]
1642 5bfac5a3 Florent Chuffart
      if (roi1$begin < roi2$begin) {
1643 5bfac5a3 Florent Chuffart
        print("not the same begin")
1644 5bfac5a3 Florent Chuffart
        roi1$begin = roi2$begin
1645 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1646 5bfac5a3 Florent Chuffart
      }
1647 5bfac5a3 Florent Chuffart
      if (roi1$end > roi2$end) {
1648 5bfac5a3 Florent Chuffart
        print("not the same end")
1649 5bfac5a3 Florent Chuffart
        roi1$end = roi2$end
1650 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1651 5bfac5a3 Florent Chuffart
      }
1652 5bfac5a3 Florent Chuffart
      return(roi1)
1653 5bfac5a3 Florent Chuffart
    }         
1654 5bfac5a3 Florent Chuffart
  })
1655 5bfac5a3 Florent Chuffart
  return(do.call(rbind,intersection))
1656 5bfac5a3 Florent Chuffart
}
1657 1d833b97 Florent Chuffart
1658 55c1cdff Florent Chuffart
build_replicates = structure(function(# Stage replicates data
1659 61914235 Florent Chuffart
### This function loads in memory data corresponding to the given experiments.
1660 61914235 Florent Chuffart
expe, ##<< a list of vector corresponding to vector of replicates.
1661 61914235 Florent Chuffart
roi, ##<< the region that we are interested in.
1662 61914235 Florent Chuffart
only_fetch=FALSE, ##<< filter or not inputs.
1663 61914235 Florent Chuffart
get_genome=FALSE,##<< Load or not corresponding genome.
1664 61914235 Florent Chuffart
all_samples, ##<< Global list of samples.
1665 61914235 Florent Chuffart
config=NULL ##<< GLOBAL config variable.
1666 61914235 Florent Chuffart
) {
1667 61914235 Florent Chuffart
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1668 61914235 Florent Chuffart
  	samples=list()
1669 61914235 Florent Chuffart
  	for (i in samples_ids) {
1670 61914235 Florent Chuffart
  		sample = as.list(all_samples[all_samples$id==i,])
1671 a0b91fee Florent Chuffart
      sample$orig_roi = roi
1672 4016229d Florent Chuffart
      sample$roi = translate_cur(roi, sample$strain, config = config)
1673 61914235 Florent Chuffart
  		if (get_genome) {
1674 61914235 Florent Chuffart
  			# Get Genome
1675 61914235 Florent Chuffart
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1676 5bfac5a3 Florent Chuffart
  			sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1677 61914235 Florent Chuffart
  		}
1678 61914235 Florent Chuffart
  		# Get inputs
1679 61914235 Florent Chuffart
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1680 55c1cdff Florent Chuffart
  		sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE)
1681 5bfac5a3 Florent Chuffart
  		sample$total_reads = sum(sample$inputs[,4])
1682 61914235 Florent Chuffart
  		if (!only_fetch) {
1683 b8a95426 Florent Chuffart
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
1684 61914235 Florent Chuffart
  	  }
1685 61914235 Florent Chuffart
  	  # Get TF outputs for Mnase_Seq samples
1686 5bfac5a3 Florent Chuffart
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {
1687 61914235 Florent Chuffart
  			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
1688 61914235 Florent Chuffart
  			sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t")
1689 61914235 Florent Chuffart
  			if (!only_fetch) {
1690 61914235 Florent Chuffart
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1691 61914235 Florent Chuffart
    		}
1692 61914235 Florent Chuffart
  		}
1693 5bfac5a3 Florent Chuffart
  		samples[[length(samples) + 1]] = sample
1694 61914235 Florent Chuffart
  	}
1695 61914235 Florent Chuffart
  	return(samples)
1696 61914235 Florent Chuffart
  }
1697 61914235 Florent Chuffart
	replicates = list()
1698 61914235 Florent Chuffart
	for(samples_ids in expe) {
1699 61914235 Florent Chuffart
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
1700 61914235 Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
1701 61914235 Florent Chuffart
	}
1702 61914235 Florent Chuffart
	return(replicates)
1703 61914235 Florent Chuffart
  }, ex = function() {
1704 61914235 Florent Chuffart
    # library(rjson)
1705 61914235 Florent Chuffart
    # library(nucleominer)
1706 55c1cdff Florent Chuffart
    #
1707 61914235 Florent Chuffart
    # # Read config file
1708 61914235 Florent Chuffart
    # json_conf_file = "nucleo_miner_config.json"
1709 61914235 Florent Chuffart
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1710 61914235 Florent Chuffart
    # # Read sample file
1711 55c1cdff Florent Chuffart
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
1712 61914235 Florent Chuffart
    # # here are the sample ids in a list
1713 61914235 Florent Chuffart
    # expes = list(c(1))
1714 61914235 Florent Chuffart
    # # here is the region that we wnt to see the coverage
1715 55c1cdff Florent Chuffart
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY")
1716 61914235 Florent Chuffart
    # # it displays the corverage
1717 61914235 Florent Chuffart
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
1718 55c1cdff Florent Chuffart
    # out = watch_samples(replicates, config$READ_LENGTH,
1719 55c1cdff Florent Chuffart
    #       plot_coverage = TRUE,
1720 55c1cdff Florent Chuffart
    #       plot_squared_reads = FALSE,
1721 55c1cdff Florent Chuffart
    #       plot_ref_genome = FALSE,
1722 55c1cdff Florent Chuffart
    #       plot_arrow_raw_reads = FALSE,
1723 55c1cdff Florent Chuffart
    #       plot_arrow_nuc_reads = FALSE,
1724 55c1cdff Florent Chuffart
    #       plot_gaussian_reads = FALSE,
1725 55c1cdff Florent Chuffart
    #       plot_gaussian_unified_reads = FALSE,
1726 55c1cdff Florent Chuffart
    #       plot_ellipse_nucs = FALSE,
1727 55c1cdff Florent Chuffart
    #       plot_wp_nucs = FALSE,
1728 55c1cdff Florent Chuffart
    #       plot_wp_nuc_model = FALSE,
1729 55c1cdff Florent Chuffart
    #       plot_common_nucs = FALSE,
1730 61914235 Florent Chuffart
    #       height = 50)
1731 61914235 Florent Chuffart
  })
1732 61914235 Florent Chuffart
1733 61914235 Florent Chuffart
1734 61914235 Florent Chuffart
1735 61914235 Florent Chuffart
1736 61914235 Florent Chuffart
1737 61914235 Florent Chuffart
1738 61914235 Florent Chuffart
1739 61914235 Florent Chuffart
1740 61914235 Florent Chuffart
1741 61914235 Florent Chuffart
1742 61914235 Florent Chuffart
1743 61914235 Florent Chuffart
1744 61914235 Florent Chuffart
1745 61914235 Florent Chuffart
1746 61914235 Florent Chuffart
1747 61914235 Florent Chuffart
1748 61914235 Florent Chuffart
1749 61914235 Florent Chuffart
1750 61914235 Florent Chuffart
1751 61914235 Florent Chuffart
1752 61914235 Florent Chuffart
1753 61914235 Florent Chuffart
1754 61914235 Florent Chuffart
1755 61914235 Florent Chuffart
1756 61914235 Florent Chuffart
1757 61914235 Florent Chuffart
1758 61914235 Florent Chuffart
1759 61914235 Florent Chuffart
1760 61914235 Florent Chuffart
1761 61914235 Florent Chuffart
1762 61914235 Florent Chuffart
1763 61914235 Florent Chuffart
1764 61914235 Florent Chuffart
1765 1d833b97 Florent Chuffart
1766 1d833b97 Florent Chuffart
1767 1d833b97 Florent Chuffart
1768 1d833b97 Florent Chuffart
1769 1d833b97 Florent Chuffart
1770 1d833b97 Florent Chuffart
1771 1d833b97 Florent Chuffart
1772 1d833b97 Florent Chuffart
1773 1d833b97 Florent Chuffart
1774 1d833b97 Florent Chuffart
1775 1d833b97 Florent Chuffart
1776 1d833b97 Florent Chuffart
1777 1d833b97 Florent Chuffart
1778 1d833b97 Florent Chuffart
1779 1d833b97 Florent Chuffart
1780 1d833b97 Florent Chuffart
1781 1d833b97 Florent Chuffart
1782 1d833b97 Florent Chuffart
1783 1d833b97 Florent Chuffart
1784 1d833b97 Florent Chuffart
1785 1d833b97 Florent Chuffart
1786 1d833b97 Florent Chuffart
1787 1d833b97 Florent Chuffart
1788 1d833b97 Florent Chuffart
1789 1d833b97 Florent Chuffart
1790 1d833b97 Florent Chuffart
1791 1d833b97 Florent Chuffart
1792 1d833b97 Florent Chuffart
1793 1d833b97 Florent Chuffart
1794 1d833b97 Florent Chuffart
1795 1d833b97 Florent Chuffart
1796 1d833b97 Florent Chuffart
1797 1d833b97 Florent Chuffart
1798 1d833b97 Florent Chuffart
1799 6b5a528c Florent Chuffart
1800 6b5a528c Florent Chuffart
1801 6b5a528c Florent Chuffart
1802 6b5a528c Florent Chuffart
1803 6b5a528c Florent Chuffart
1804 6b5a528c Florent Chuffart
1805 6b5a528c Florent Chuffart
1806 6b5a528c Florent Chuffart
1807 6b5a528c Florent Chuffart
1808 6b5a528c Florent Chuffart
1809 6b5a528c Florent Chuffart
1810 6b5a528c Florent Chuffart
1811 6b5a528c Florent Chuffart
1812 6b5a528c Florent Chuffart
1813 6b5a528c Florent Chuffart
1814 6b5a528c Florent Chuffart
1815 55c1cdff Florent Chuffart
watch_samples = function(# Watching analysis of samples
1816 6b5a528c Florent Chuffart
### This function allows to view analysis for a particuler region of the genome.
1817 6b5a528c Florent Chuffart
replicates, ##<< replicates under the form...
1818 6b5a528c Florent Chuffart
read_length, ##<< length of the reads
1819 6b5a528c Florent Chuffart
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
1820 6b5a528c Florent Chuffart
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
1821 6b5a528c Florent Chuffart
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
1822 6b5a528c Florent Chuffart
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
1823 6b5a528c Florent Chuffart
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
1824 6b5a528c Florent Chuffart
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
1825 6b5a528c Florent Chuffart
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
1826 6b5a528c Florent Chuffart
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
1827 7646593d Florent Chuffart
change_col = TRUE, ##<< Change the color of each nucleosome.
1828 6b5a528c Florent Chuffart
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
1829 9fdbfada Florent Chuffart
plot_fuzzy_nucs = TRUE,  ##<< Plot (or not) cluster of fuzzy
1830 6b5a528c Florent Chuffart
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
1831 9fdbfada Florent Chuffart
plot_common_nucs = FALSE,  ##<< Plot (or not) aligned reads.
1832 9fdbfada Florent Chuffart
plot_common_unrs = FALSE,  ##<< Plot (or not) unaligned nucleosomal refgions (UNRs).
1833 6b5a528c Florent Chuffart
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
1834 729c934e Florent Chuffart
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
1835 6e0010bc Florent Chuffart
plot_sample_id = FALSE, ##<<  Plot (or not) the sample id for each sample.
1836 55c1cdff Florent Chuffart
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
1837 6b5a528c Florent Chuffart
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
1838 1d833b97 Florent Chuffart
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1839 6e0010bc Florent Chuffart
main=NULL, ##<< main title of the produced plot
1840 6e0010bc Florent Chuffart
xlab=NULL, ##<< xlab of the produced plot
1841 6e0010bc Florent Chuffart
ylab="#reads (per million reads)", ##<< ylab of the produced plot
1842 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1843 6b5a528c Florent Chuffart
){
1844 6b5a528c Florent Chuffart
  returned_list = list()
1845 6b5a528c Florent Chuffart
  # Computing global display parameters
1846 6b5a528c Florent Chuffart
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
1847 6b5a528c Florent Chuffart
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
1848 6b5a528c Florent Chuffart
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
1849 6b5a528c Florent Chuffart
  } else {
1850 6b5a528c Florent Chuffart
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
1851 6b5a528c Florent Chuffart
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
1852 6b5a528c Florent Chuffart
  }
1853 6b5a528c Florent Chuffart
	base_glo = 0
1854 6b5a528c Florent Chuffart
	nb_rank_glo = 0
1855 6b5a528c Florent Chuffart
  for (samples in replicates) {
1856 6b5a528c Florent Chuffart
  	nb_rank_glo = nb_rank_glo + length(samples)
1857 6b5a528c Florent Chuffart
  }
1858 6b5a528c Florent Chuffart
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
1859 6b5a528c Florent Chuffart
	y_min_glo = min(ylim_glo)
1860 6b5a528c Florent Chuffart
	y_max_glo = max(ylim_glo)
1861 6b5a528c Florent Chuffart
  delta_y_glo = y_max_glo - y_min_glo
1862 6b5a528c Florent Chuffart
  # Plot main frame
1863 6e0010bc Florent Chuffart
  if (is.null(xlab)) {
1864 6e0010bc Florent Chuffart
    xlab = paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr)
1865 6e0010bc Florent Chuffart
  }
1866 6e0010bc Florent Chuffart
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab=ylab, xlab=xlab, main=main )
1867 6b5a528c Florent Chuffart
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
1868 6b5a528c Florent Chuffart
  # Go
1869 6b5a528c Florent Chuffart
	replicates_wp_nucs = list()
1870 9fdbfada Florent Chuffart
  wp_maps = list()
1871 9fdbfada Florent Chuffart
  fuzzy_maps = list()
1872 6b5a528c Florent Chuffart
  for (replicate_rank in 1:length(replicates)) {
1873 6b5a528c Florent Chuffart
		# Computing replicate parameters
1874 6b5a528c Florent Chuffart
		nb_rank = length(samples)
1875 6b5a528c Florent Chuffart
		base = (replicate_rank-1) * height * nb_rank
1876 6b5a528c Florent Chuffart
		ylim = c(base, base + height * nb_rank)
1877 6b5a528c Florent Chuffart
		y_min = min(ylim)
1878 6b5a528c Florent Chuffart
		y_max = max(ylim)
1879 6b5a528c Florent Chuffart
	  delta_y = y_max - y_min
1880 6b5a528c Florent Chuffart
		samples = replicates[[replicate_rank]]
1881 6b5a528c Florent Chuffart
		for (sample_rank in 1:length(samples)) {
1882 6b5a528c Florent Chuffart
			# computing sample parameters
1883 6b5a528c Florent Chuffart
			sample = samples[[sample_rank]]
1884 6b5a528c Florent Chuffart
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
1885 6e0010bc Florent Chuffart
      if (plot_sample_id) {
1886 6e0010bc Florent Chuffart
  			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
1887 6e0010bc Florent Chuffart
      }
1888 6b5a528c Florent Chuffart
1889 6b5a528c Florent Chuffart
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
1890 6b5a528c Florent Chuffart
			  x_min = sample$roi[["begin"]]
1891 6b5a528c Florent Chuffart
			  x_max = sample$roi[["end"]]
1892 6b5a528c Florent Chuffart
		  } else {
1893 6b5a528c Florent Chuffart
			  x_min = - sample$roi[["begin"]]
1894 6b5a528c Florent Chuffart
			  x_max = - sample$roi[["end"]]
1895 5bfac5a3 Florent Chuffart
		  }
1896 6b5a528c Florent Chuffart
			shift = x_min_glo - x_min
1897 6b5a528c Florent Chuffart
	    # Plot Genome seq
1898 6b5a528c Florent Chuffart
			if (plot_ref_genome) {
1899 6b5a528c Florent Chuffart
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
1900 6b5a528c Florent Chuffart
		  }
1901 6b5a528c Florent Chuffart
			# Plot reads
1902 6b5a528c Florent Chuffart
			reads = sample$inputs
1903 5bfac5a3 Florent Chuffart
			signs = sign_from_strand(reads[,3])
1904 6b5a528c Florent Chuffart
			if (plot_arrow_raw_reads) {
1905 55c1cdff Florent Chuffart
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev,
1906 5bfac5a3 Florent Chuffart
				col=1, length=0.15/nb_rank)
1907 6b5a528c Florent Chuffart
			}
1908 5bfac5a3 Florent Chuffart
	    if (plot_squared_reads) {
1909 6b5a528c Florent Chuffart
        # require(plotrix)
1910 6b5a528c Florent Chuffart
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
1911 6b5a528c Florent Chuffart
			}
1912 5bfac5a3 Florent Chuffart
	    if (plot_coverage) {
1913 a52f85d7 Florent Chuffart
        if (length(reads[,1]) != 0) {
1914 a52f85d7 Florent Chuffart
          step_h = sign(x_min) * signs * reads[,4]
1915 a52f85d7 Florent Chuffart
          step_b = sign(x_min) * reads[,2] + shift
1916 a52f85d7 Florent Chuffart
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
1917 55c1cdff Florent Chuffart
          steps_x = min(step_b, step_e):max(step_b, step_e)
1918 a52f85d7 Florent Chuffart
          steps_y = rep(0, length(steps_x))
1919 55c1cdff Florent Chuffart
          for (i in 1:length(step_h)) {
1920 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
1921 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
1922 a52f85d7 Florent Chuffart
          }
1923 a52f85d7 Florent Chuffart
          tmp_index = which(steps_y != 0)
1924 a52f85d7 Florent Chuffart
          steps_x = steps_x[tmp_index]
1925 a52f85d7 Florent Chuffart
          steps_y = steps_y[tmp_index]
1926 a52f85d7 Florent Chuffart
          tmp_current_level = 0
1927 55c1cdff Florent Chuffart
          for (i in 1:length(steps_y)) {
1928 a52f85d7 Florent Chuffart
            steps_y[i] = tmp_current_level + steps_y[i]
1929 a52f85d7 Florent Chuffart
            tmp_current_level = steps_y[i]
1930 a52f85d7 Florent Chuffart
          }
1931 a52f85d7 Florent Chuffart
          steps_y = c(0, steps_y)
1932 a52f85d7 Florent Chuffart
          steps_y = steps_y * 1000000/sample$total_reads
1933 a52f85d7 Florent Chuffart
        } else {
1934 a52f85d7 Florent Chuffart
          steps_y = c(0, 0, 0)
1935 1e310114 Florent Chuffart
          steps_x = c(x_min, x_max)
1936 6b5a528c Florent Chuffart
        }
1937 0d1475ac Florent Chuffart
        # print(steps_x)
1938 0d1475ac Florent Chuffart
        # print(steps_y)
1939 6b5a528c Florent Chuffart
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
1940 6b5a528c Florent Chuffart
        abline(y_lev,0)
1941 6b5a528c Florent Chuffart
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
1942 6b5a528c Florent Chuffart
			}
1943 6b5a528c Florent Chuffart
			# Plot nucs
1944 6b5a528c Florent Chuffart
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
1945 6b5a528c Florent Chuffart
				nucs = sample$outputs
1946 6b5a528c Florent Chuffart
				if (length(nucs$center) > 0) {
1947 6b5a528c Florent Chuffart
					col = 1
1948 6b5a528c Florent Chuffart
		      for (i in 1:length(nucs$center)) {
1949 6e0010bc Florent Chuffart
            foo<<-nucs
1950 7646593d Florent Chuffart
            if (change_col) {
1951 55c1cdff Florent Chuffart
  						col = col + 1
1952 6e0010bc Florent Chuffart
              } else {
1953 6e0010bc Florent Chuffart
                col = "blue"
1954 6e0010bc Florent Chuffart
              }
1955 6b5a528c Florent Chuffart
		        nuc = nucs[i,]
1956 6b5a528c Florent Chuffart
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
1957 6b5a528c Florent Chuffart
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
1958 55c1cdff Florent Chuffart
						total_involved_reads = sum(involved_reads[,4])
1959 6b5a528c Florent Chuffart
						if (plot_arrow_nuc_reads ) {
1960 55c1cdff Florent Chuffart
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev,
1961 5bfac5a3 Florent Chuffart
							col=col, length=0.15/nb_rank)
1962 6b5a528c Florent Chuffart
						}
1963 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
1964 6b5a528c Florent Chuffart
  						flatted_reads = flat_reads(involved_reads, nuc$width)
1965 6b5a528c Florent Chuffart
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1966 6b5a528c Florent Chuffart
		  			}
1967 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads ) {
1968 6b5a528c Florent Chuffart
							flatted_reads = flat_reads(involved_reads, nuc$width)
1969 6b5a528c Florent Chuffart
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
1970 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
1971 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
1972 6b5a528c Florent Chuffart
	          }
1973 6b5a528c Florent Chuffart
	          if (plot_gaussian_unified_reads ) {
1974 6e0010bc Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=1)
1975 6b5a528c Florent Chuffart
	          }
1976 6b5a528c Florent Chuffart
	          if (plot_ellipse_nucs) {
1977 6b5a528c Florent Chuffart
				      # require(plotrix)
1978 55c1cdff Florent Chuffart
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col)
1979 6b5a528c Florent Chuffart
						}
1980 6b5a528c Florent Chuffart
		      }
1981 6b5a528c Florent Chuffart
		    } else {
1982 6b5a528c Florent Chuffart
		      print("WARNING! No nucs to print.")
1983 6b5a528c Florent Chuffart
		    }
1984 6b5a528c Florent Chuffart
			}
1985 6b5a528c Florent Chuffart
	  }
1986 55c1cdff Florent Chuffart
1987 6b5a528c Florent Chuffart
	  # Plot wp nucs
1988 9fdbfada Florent Chuffart
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) {
1989 6b5a528c Florent Chuffart
			if (samples[[1]]$marker == "Mnase_Seq") {
1990 55c1cdff Florent Chuffart
				if (is.null(aggregated_intra_strain_nucs)) {
1991 5bfac5a3 Florent Chuffart
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
1992 6b5a528c Florent Chuffart
				} else {
1993 6b5a528c Florent Chuffart
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
1994 6b5a528c Florent Chuffart
				}
1995 6b5a528c Florent Chuffart
		  } else {
1996 6b5a528c Florent Chuffart
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
1997 6b5a528c Florent Chuffart
		  }
1998 729c934e Florent Chuffart
      if (plot_chain) {
1999 55c1cdff Florent Chuffart
        tf_nucs = lapply(wp_nucs, function(nuc) {
2000 729c934e Florent Chuffart
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
2001 729c934e Florent Chuffart
            tmp_nuc = tmp_nuc[[1]]
2002 729c934e Florent Chuffart
            tmp_nuc$inputs = NULL
2003 729c934e Florent Chuffart
            tmp_nuc$original_reads = NULL
2004 729c934e Florent Chuffart
            tmp_nuc$wp = nuc$wp
2005 729c934e Florent Chuffart
            # print(tmp_nuc)
2006 729c934e Florent Chuffart
            return(tmp_nuc)
2007 55c1cdff Florent Chuffart
          })
2008 dc58ee6c Florent Chuffart
          return(do.call(rbind, bar))
2009 dc58ee6c Florent Chuffart
        })
2010 729c934e Florent Chuffart
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
2011 55c1cdff Florent Chuffart
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2
2012 dc58ee6c Florent Chuffart
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
2013 dc58ee6c Florent Chuffart
        tmp_y_prev = tmp_y[-length(tmp_y)]
2014 dc58ee6c Florent Chuffart
        tmp_y_next = tmp_y[-1]
2015 dc58ee6c Florent Chuffart
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
2016 dc58ee6c Florent Chuffart
        tmp_track = unlist(tf_nucs$track)
2017 dc58ee6c Florent Chuffart
        tmp_track_prev = tmp_track[-length(tmp_track)]
2018 dc58ee6c Florent Chuffart
        tmp_track_next = tmp_track[-1]
2019 6e0010bc Florent Chuffart
        # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
2020 6e0010bc Florent Chuffart
        if (is.null(config$TRACK_LOD_OFFSET)) {
2021 6e0010bc Florent Chuffart
          config$TRACK_LOD_OFFSET = 0
2022 6e0010bc Florent Chuffart
        }
2023 6e0010bc Florent Chuffart
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LOD_OFFSET * 25
2024 dc58ee6c Florent Chuffart
        tmp_x_prev = tmp_x[-length(tmp_x)]
2025 dc58ee6c Florent Chuffart
        tmp_x_next = tmp_x[-1]
2026 dc58ee6c Florent Chuffart
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
2027 dc58ee6c Florent Chuffart
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
2028 7e2d37e1 Florent Chuffart
        tmp_llr_inter =signif(unlist(tf_nucs$llr_score)[-1], 2)
2029 55c1cdff Florent Chuffart
        new_tmp_x = c()
2030 55c1cdff Florent Chuffart
        new_tmp_y = c()
2031 dc58ee6c Florent Chuffart
        index_odd = 1:length(tmp_x) * 2 - 1
2032 55c1cdff Florent Chuffart
        index_even = (1:(length(tmp_x) - 1)) * 2
2033 55c1cdff Florent Chuffart
        new_tmp_x[index_odd] = tmp_x
2034 dc58ee6c Florent Chuffart
        new_tmp_y[index_odd] = tmp_y
2035 dc58ee6c Florent Chuffart
        new_tmp_x[index_even] = tmp_x_inter
2036 dc58ee6c Florent Chuffart
        new_tmp_y[index_even] = tmp_y_inter
2037 ec2936ea Florent Chuffart
        lines(new_tmp_x , new_tmp_y, lwd=2)
2038 dc58ee6c Florent Chuffart
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
2039 ec2936ea Florent Chuffart
        points(tmp_x, tmp_y, cex=4, lwd=2)
2040 dc58ee6c Florent Chuffart
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
2041 6e0010bc Florent Chuffart
        if (is.null(config$LEGEND_LOD_POS)) {
2042 6e0010bc Florent Chuffart
          pos = 2
2043 6e0010bc Florent Chuffart
        } else {
2044 6e0010bc Florent Chuffart
          pos = config$LEGEND_LOD_POS
2045 6e0010bc Florent Chuffart
        }
2046 7e2d37e1 Florent Chuffart
        col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
2047 7e2d37e1 Florent Chuffart
        text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) 
2048 37b11e9b Florent Chuffart
        
2049 729c934e Florent Chuffart
      }
2050 55c1cdff Florent Chuffart
2051 9fdbfada Florent Chuffart
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
2052 729c934e Florent Chuffart
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
2053 9fdbfada Florent Chuffart
        strain = samples[[1]]$strain
2054 9fdbfada Florent Chuffart
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo")
2055 9fdbfada Florent Chuffart
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
2056 9fdbfada Florent Chuffart
2057 9fdbfada Florent Chuffart
        if (plot_fuzzy_nucs) {
2058 9fdbfada Florent Chuffart
          fuzzy_map = fuzzy_maps[[strain]]
2059 4016229d Florent Chuffart
          if (!is.null(fuzzy_map)) {
2060 4016229d Florent Chuffart
            if (nrow(fuzzy_map) > 0) {
2061 4016229d Florent Chuffart
              rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1)                    
2062 4016229d Florent Chuffart
            }
2063 d492bca4 Florent Chuffart
          }
2064 9fdbfada Florent Chuffart
        } 
2065 9fdbfada Florent Chuffart
2066 9fdbfada Florent Chuffart
        if (plot_wp_nucs) {
2067 9fdbfada Florent Chuffart
    			for (wp_nuc in wp_nucs) {
2068 9fdbfada Florent Chuffart
    				if (wp_nuc$wp){
2069 9fdbfada Florent Chuffart
    					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2070 9fdbfada Florent Chuffart
    					if (plot_wp_nuc_model) {
2071 9fdbfada Florent Chuffart
      					all_original_reads = c()
2072 9fdbfada Florent Chuffart
      					for(initial_nuc in wp_nuc$nucs) {
2073 9fdbfada Florent Chuffart
      						all_original_reads = c(all_original_reads, initial_nuc$original_reads)
2074 9fdbfada Florent Chuffart
      					}
2075 9fdbfada Florent Chuffart
      					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2076 9fdbfada Florent Chuffart
    					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
2077 9fdbfada Florent Chuffart
    				  }
2078 729c934e Florent Chuffart
  				  }
2079 729c934e Florent Chuffart
  				}
2080 729c934e Florent Chuffart
  			}
2081 729c934e Florent Chuffart
      }
2082 6b5a528c Florent Chuffart
		}
2083 5bfac5a3 Florent Chuffart
	}
2084 55c1cdff Florent Chuffart
2085 5bfac5a3 Florent Chuffart
	if (plot_common_nucs) {
2086 9fdbfada Florent Chuffart
    if (is.null(aligned_inter_strain_nucs)) {
2087 9fdbfada Florent Chuffart
      aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2088 b69443b5 Florent Chuffart
      if (!is.null(aligned_inter_strain_nucs)) {
2089 4016229d Florent Chuffart
        aligned_inter_strain_nucs$cur_index = "foo" 
2090 b69443b5 Florent Chuffart
      }
2091 9fdbfada Florent Chuffart
    }
2092 55c1cdff Florent Chuffart
2093 9fdbfada Florent Chuffart
    #Plot common wp nucs
2094 9fdbfada Florent Chuffart
    mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2095 9fdbfada Florent Chuffart
    for (replicate_rank in 1:length(replicates)) {
2096 9fdbfada Florent Chuffart
      nb_rank[[replicate_rank]] = length(samples)
2097 9fdbfada Florent Chuffart
      base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2098 9fdbfada Florent Chuffart
      ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2099 9fdbfada Florent Chuffart
      y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2100 9fdbfada Florent Chuffart
      y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2101 9fdbfada Florent Chuffart
      delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2102 9fdbfada Florent Chuffart
      mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2103 9fdbfada Florent Chuffart
2104 9fdbfada Florent Chuffart
      samples = replicates[[replicate_rank]]
2105 9fdbfada Florent Chuffart
      for (sample_rank in 1:length(samples)) {
2106 9fdbfada Florent Chuffart
        sample = samples[[sample_rank]]
2107 9fdbfada Florent Chuffart
        y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2108 9fdbfada Florent Chuffart
        if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2109 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = sample$roi[["begin"]]
2110 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = sample$roi[["end"]]
2111 9fdbfada Florent Chuffart
        } else {
2112 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = - sample$roi[["begin"]]
2113 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = - sample$roi[["end"]]
2114 6b5a528c Florent Chuffart
        }
2115 9fdbfada Florent Chuffart
        shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2116 55c1cdff Florent Chuffart
      }
2117 9fdbfada Florent Chuffart
    }
2118 b69443b5 Florent Chuffart
    print(aligned_inter_strain_nucs)
2119 b69443b5 Florent Chuffart
    if (!is.null(aligned_inter_strain_nucs)) {
2120 b69443b5 Florent Chuffart
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2121 b69443b5 Florent Chuffart
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2122 b69443b5 Florent Chuffart
        tmp_xs = tmp_ys = c()
2123 b69443b5 Florent Chuffart
        for (replicate_rank in 1:length(replicates)) {
2124 b69443b5 Florent Chuffart
          samples = replicates[[replicate_rank]]
2125 b69443b5 Florent Chuffart
          strain = samples[[1]]$strain
2126 b69443b5 Florent Chuffart
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2127 b69443b5 Florent Chuffart
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2128 b69443b5 Florent Chuffart
        }
2129 37b11e9b Florent Chuffart
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]])*8, cex=dev.size()[1]*400/(x_max[[1]]-x_min[[1]]), pch=19)
2130 55c1cdff Florent Chuffart
      }
2131 9fdbfada Florent Chuffart
    }
2132 9fdbfada Florent Chuffart
    
2133 9fdbfada Florent Chuffart
    if (plot_common_unrs) {
2134 9fdbfada Florent Chuffart
      combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain)
2135 9fdbfada Florent Chuffart
      roi = as.list(samples[[1]]$roi)
2136 4016229d Florent Chuffart
      cur_index = "foo"
2137 9fdbfada Florent Chuffart
      common_nuc_results = list()
2138 9fdbfada Florent Chuffart
      common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs
2139 4016229d Florent Chuffart
      unrs = get_unrs(combi, roi, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) 
2140 ec2936ea Florent Chuffart
      rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], border=4, lwd=10, col=adjustcolor(4, alpha.f = 0.05))        
2141 9fdbfada Florent Chuffart
    }
2142 9fdbfada Florent Chuffart
2143 6b5a528c Florent Chuffart
	}
2144 6b5a528c Florent Chuffart
  return(returned_list)
2145 6b5a528c Florent Chuffart
}
2146 6b5a528c Florent Chuffart
2147 6b5a528c Florent Chuffart
2148 6b5a528c Florent Chuffart
2149 9fdbfada Florent Chuffart
get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain.
2150 9fdbfada Florent Chuffart
### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions.
2151 0a13b5e3 Florent Chuffart
wp_map, ##<< Well positionned nucleosomes map.
2152 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2153 9fdbfada Florent Chuffart
strain, ##<< The strain we want to extracvt the fuzzy map.
2154 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable.
2155 9fdbfada Florent Chuffart
) {
2156 9fdbfada Florent Chuffart
  fuzzy_map = wp_map[wp_map$wp==0, ]
2157 9fdbfada Florent Chuffart
  if (nrow(fuzzy_map) > 0) {
2158 9fdbfada Florent Chuffart
    fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,])
2159 9fdbfada Florent Chuffart
    if (!is.null(fuzzy_map)) {
2160 9fdbfada Florent Chuffart
      fuzzy_map = union_regions(fuzzy_map)
2161 9fdbfada Florent Chuffart
      fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config)
2162 9fdbfada Florent Chuffart
    }
2163 9fdbfada Florent Chuffart
  }
2164 9fdbfada Florent Chuffart
  return(fuzzy_map)
2165 9fdbfada Florent Chuffart
}
2166 9fdbfada Florent Chuffart
2167 9fdbfada Florent Chuffart
2168 9fdbfada Florent Chuffart
2169 9fdbfada Florent Chuffart
2170 9fdbfada Florent Chuffart
2171 9fdbfada Florent Chuffart
get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs).
2172 9fdbfada Florent Chuffart
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline.
2173 9fdbfada Florent Chuffart
combi, ##<< The strain combination to consider.
2174 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2175 4016229d Florent Chuffart
cur_index, ##<< The region of interest index.
2176 9fdbfada Florent Chuffart
wp_maps, ##<< Well positionned nucleosomes maps.
2177 9fdbfada Florent Chuffart
fuzzy_maps, ##<< Fuzzy nucleosomes maps.
2178 9fdbfada Florent Chuffart
common_nuc_results, ##<< Common wp nuc maps
2179 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable
2180 9fdbfada Florent Chuffart
) {
2181 4016229d Florent Chuffart
  # print(cur_index)
2182 9fdbfada Florent Chuffart
2183 9fdbfada Florent Chuffart
  tmp_combi_key = paste(combi[1], combi[2], sep="_")
2184 9fdbfada Florent Chuffart
  tmp_common_nucs = common_nuc_results[[tmp_combi_key]]
2185 4016229d Florent Chuffart
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$cur_index == cur_index, ]
2186 9fdbfada Florent Chuffart
2187 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from", combi[1]))
2188 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[1]]]
2189 4016229d Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2190 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2191 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2192 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2193 9fdbfada Florent Chuffart
  # Let's go!
2194 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2195 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2196 9fdbfada Florent Chuffart
      return(NULL)
2197 9fdbfada Florent Chuffart
    } else {
2198 9fdbfada Florent Chuffart
      return (index_nuc)
2199 9fdbfada Florent Chuffart
    }
2200 9fdbfada Florent Chuffart
  }))
2201 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2202 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2203 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2204 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2205 9fdbfada Florent Chuffart
  }
2206 9fdbfada Florent Chuffart
  tmp_unr_nucs_1 = tmp_unr
2207 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)}
2208 9fdbfada Florent Chuffart
  agg_unr_1 = tmp_unr_nucs_1
2209 9fdbfada Florent Chuffart
2210 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from ", combi[2]))
2211 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[2]]]
2212 4016229d Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2213 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2214 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2215 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2216 9fdbfada Florent Chuffart
  # Let's go!
2217 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2218 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2219 9fdbfada Florent Chuffart
      return(NULL)
2220 9fdbfada Florent Chuffart
    } else {
2221 9fdbfada Florent Chuffart
      return (index_nuc)
2222 9fdbfada Florent Chuffart
    }
2223 9fdbfada Florent Chuffart
  }))
2224 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2225 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2226 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2227 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2228 9fdbfada Florent Chuffart
  }
2229 9fdbfada Florent Chuffart
  tmp_unr_nucs_2 = tmp_unr
2230 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)}
2231 9fdbfada Florent Chuffart
  agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config)
2232 4016229d Florent Chuffart
  tr_agg_unr_2 = translate_regions(agg_unr_2, combi, cur_index, roi=roi, config=config)
2233 9fdbfada Florent Chuffart
  tr_agg_unr_2 = union_regions(tr_agg_unr_2)
2234 9fdbfada Florent Chuffart
2235 f0703205 Florent Chuffart
  # print("Dealing with unr from both...")
2236 9fdbfada Florent Chuffart
  all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2))
2237 9fdbfada Florent Chuffart
2238 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[1]))
2239 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2240 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2241 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2242 9fdbfada Florent Chuffart
  # Let's go!
2243 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2244 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2245 9fdbfada Florent Chuffart
      return (index_nuc)
2246 9fdbfada Florent Chuffart
    } else {
2247 9fdbfada Florent Chuffart
      return(NULL)
2248 9fdbfada Florent Chuffart
    }
2249 9fdbfada Florent Chuffart
  }))
2250 9fdbfada Florent Chuffart
  wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2251 9fdbfada Florent Chuffart
  
2252 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[2]))
2253 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2254 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2255 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2256 9fdbfada Florent Chuffart
  # Let's go!
2257 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2258 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2259 9fdbfada Florent Chuffart
      return (index_nuc)
2260 9fdbfada Florent Chuffart
    } else {
2261 9fdbfada Florent Chuffart
      return(NULL)
2262 9fdbfada Florent Chuffart
    }
2263 9fdbfada Florent Chuffart
  }))
2264 9fdbfada Florent Chuffart
  wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2265 9fdbfada Florent Chuffart
  wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config)
2266 9fdbfada Florent Chuffart
  if (nrow(wp_nucs_2) == 0) {
2267 9fdbfada Florent Chuffart
    tr_wp_nucs_2 = wp_nucs_2
2268 9fdbfada Florent Chuffart
  } else {
2269 4016229d Florent Chuffart
    tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, cur_index, roi=roi, config=config)      
2270 9fdbfada Florent Chuffart
  }
2271 9fdbfada Florent Chuffart
2272 f0703205 Florent Chuffart
  # print("Dealing with wp from both...")
2273 9fdbfada Florent Chuffart
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
2274 9fdbfada Florent Chuffart
2275 f0703205 Florent Chuffart
  # print("Dealing with unr and wp...")
2276 9fdbfada Florent Chuffart
  non_inter_unr = substract_region(all_unr, all_wp)
2277 9fdbfada Florent Chuffart
  non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config)
2278 9fdbfada Florent Chuffart
  if (is.null(non_inter_unr)) { return(NULL) }
2279 9fdbfada Florent Chuffart
  non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound
2280 37b11e9b Florent Chuffart
  min_unr_width = 75
2281 37b11e9b Florent Chuffart
  non_inter_unr = non_inter_unr[non_inter_unr$len >= min_unr_width,]
2282 9fdbfada Florent Chuffart
2283 9fdbfada Florent Chuffart
  non_inter_unr$index_nuc = 1:length(non_inter_unr[,1])
2284 9fdbfada Florent Chuffart
  return (non_inter_unr)
2285 9fdbfada Florent Chuffart
}
2286 9fdbfada Florent Chuffart
2287 9fdbfada Florent Chuffart