root / src / R / nucleominer.R @ cc54d805
Historique | Voir | Annoter | Télécharger (87,29 ko)
1 | 6b5a528c | Florent Chuffart | FDR = structure(function# False Discovery Rate |
---|---|---|---|
2 | 6b5a528c | Florent Chuffart | ### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper |
3 | 55c1cdff | Florent Chuffart | ##author<< Gael Yvert, |
4 | 6b5a528c | Florent Chuffart | ( |
5 | 6b5a528c | Florent Chuffart | x, ##<< A vector x of independent p-values. |
6 | 6b5a528c | Florent Chuffart | FDR ##<< The specified FDR. |
7 | 6b5a528c | Florent Chuffart | ) { |
8 | 6b5a528c | Florent Chuffart | x <- sort(na.omit(x)) |
9 | 6b5a528c | Florent Chuffart | N = length(x) |
10 | 6b5a528c | Florent Chuffart | i = 1; |
11 | 6b5a528c | Florent Chuffart | while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR |
12 | 6b5a528c | Florent Chuffart | if (i == 1) |
13 | 6b5a528c | Florent Chuffart | return (NA) |
14 | 6b5a528c | Florent Chuffart | else |
15 | 6b5a528c | Florent Chuffart | return( x[i-1] ) |
16 | 6b5a528c | Florent Chuffart | ### Return the the corresponding cutoff. |
17 | 6b5a528c | Florent Chuffart | }, ex=function(){ |
18 | 6b5a528c | Florent Chuffart | print("example") |
19 | 6b5a528c | Florent Chuffart | }) |
20 | 6b5a528c | Florent Chuffart | |
21 | 6b5a528c | Florent Chuffart | |
22 | 6b5a528c | Florent Chuffart | |
23 | 7e2d37e1 | Florent Chuffart | llr_score_nvecs = structure(function # Likelihood ratio |
24 | 7e2d37e1 | Florent Chuffart | ### Compute the log likelihood ratio of two or more set of value. |
25 | 6b5a528c | Florent Chuffart | ( |
26 | 7e2d37e1 | Florent Chuffart | xs ##<< list of vectors. |
27 | 6b5a528c | Florent Chuffart | ) { |
28 | 7e2d37e1 | Florent Chuffart | l = length(xs) |
29 | 7e2d37e1 | Florent Chuffart | if (l < 1) { |
30 | 7e2d37e1 | Florent Chuffart | return(NA) |
31 | 7e2d37e1 | Florent Chuffart | } |
32 | 7e2d37e1 | Florent Chuffart | if (l == 1) { |
33 | 7e2d37e1 | Florent Chuffart | return(1) |
34 | 7e2d37e1 | Florent Chuffart | } |
35 | 7e2d37e1 | Florent Chuffart | sumllX = 0 |
36 | 7e2d37e1 | Florent Chuffart | for (i in 1:l) { |
37 | 7e2d37e1 | Florent Chuffart | x = xs[[i]] |
38 | 7e2d37e1 | Florent Chuffart | if (length(x) <= 1) { |
39 | 7e2d37e1 | Florent Chuffart | return(NA) |
40 | 7e2d37e1 | Florent Chuffart | } |
41 | 7e2d37e1 | Florent Chuffart | meanX = mean(x) |
42 | 7e2d37e1 | Florent Chuffart | sdX = sd(x) |
43 | 7e2d37e1 | Florent Chuffart | llX = sum(log(dnorm(x,mean=meanX,sd=sdX))) |
44 | 7e2d37e1 | Florent Chuffart | sumllX = sumllX + llX |
45 | 7e2d37e1 | Florent Chuffart | } |
46 | 7e2d37e1 | Florent Chuffart | meanXglo = mean(unlist(xs)) |
47 | 7e2d37e1 | Florent Chuffart | sdXglo = sd(unlist(xs)) |
48 | 7e2d37e1 | Florent Chuffart | llXYZ = sum(log(dnorm(unlist(xs),mean=meanXglo,sd=sdXglo))) |
49 | 7e2d37e1 | Florent Chuffart | ratio = sumllX - llXYZ |
50 | 6b5a528c | Florent Chuffart | return(ratio) |
51 | 7e2d37e1 | Florent Chuffart | ### Returns the log likelihood ratio. |
52 | 6b5a528c | Florent Chuffart | }, ex=function(){ |
53 | e5603c3f | Florent Chuffart | # LLR score for 2 set of values |
54 | 6b5a528c | Florent Chuffart | mean1=5; sd1=2; card2 = 250 |
55 | 6b5a528c | Florent Chuffart | mean2=6; sd2=3; card1 = 200 |
56 | 6b5a528c | Florent Chuffart | x1 = rnorm(card1, mean1, sd1) |
57 | 55c1cdff | Florent Chuffart | x2 = rnorm(card2, mean2, sd2) |
58 | 6b5a528c | Florent Chuffart | min = floor(min(c(x1,x2))) |
59 | 6b5a528c | Florent Chuffart | max = ceiling(max(c(x1,x2))) |
60 | 6b5a528c | Florent Chuffart | hist(c(x1,x2), xlim=c(min, max), breaks=min:max) |
61 | 6b5a528c | Florent Chuffart | lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2) |
62 | 6b5a528c | Florent Chuffart | lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3) |
63 | 6b5a528c | Florent Chuffart | lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4) |
64 | 7e2d37e1 | Florent Chuffart | llr_score_nvecs(list(x1,x2)) |
65 | 6b5a528c | Florent Chuffart | }) |
66 | 6b5a528c | Florent Chuffart | |
67 | 6b5a528c | Florent Chuffart | dfadd = structure(function# Adding list to a dataframe. |
68 | 6b5a528c | Florent Chuffart | ### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}. |
69 | 6b5a528c | Florent Chuffart | (df, ##<< A dataframe |
70 | 6b5a528c | Florent Chuffart | l ##<< A list |
71 | 6b5a528c | Florent Chuffart | ) { |
72 | 6b5a528c | Florent Chuffart | if (is.null(df)) { |
73 | 6b5a528c | Florent Chuffart | df = data.frame(l,stringsAsFactors=FALSE) |
74 | 6b5a528c | Florent Chuffart | } else { |
75 | 6b5a528c | Florent Chuffart | df = rbind(df, data.frame(l,stringsAsFactors=FALSE)) |
76 | 6b5a528c | Florent Chuffart | } |
77 | 6b5a528c | Florent Chuffart | return(df) |
78 | 6b5a528c | Florent Chuffart | ### Return the dataframe \emph{df}. |
79 | 6b5a528c | Florent Chuffart | }, ex=function(){ |
80 | 6b5a528c | Florent Chuffart | ## Here dataframe is NULL |
81 | 6b5a528c | Florent Chuffart | print(df) |
82 | 6b5a528c | Florent Chuffart | df = NULL |
83 | 5bfac5a3 | Florent Chuffart | |
84 | 6b5a528c | Florent Chuffart | # Initialize df |
85 | 6b5a528c | Florent Chuffart | df = dfadd(df, list(key1 = "value1", key2 = "value2")) |
86 | 6b5a528c | Florent Chuffart | print(df) |
87 | 5bfac5a3 | Florent Chuffart | |
88 | 6b5a528c | Florent Chuffart | # Adding elements to df |
89 | 6b5a528c | Florent Chuffart | df = dfadd(df, list(key1 = "value1'", key2 = "value2'")) |
90 | 6b5a528c | Florent Chuffart | print(df) |
91 | 6b5a528c | Florent Chuffart | }) |
92 | 6b5a528c | Florent Chuffart | |
93 | 6b5a528c | Florent Chuffart | |
94 | 6b5a528c | Florent Chuffart | sign_from_strand = function( |
95 | 55c1cdff | Florent Chuffart | ### Get the sign of strand |
96 | 6b5a528c | Florent Chuffart | strands) { |
97 | 6b5a528c | Florent Chuffart | apply(t(strands), 2, function(strand) { if (strand == "F") return(1) else return(-1)}) |
98 | 6b5a528c | Florent Chuffart | ### If strand in forward then returns 1 else returns -1 |
99 | 6b5a528c | Florent Chuffart | } |
100 | 6b5a528c | Florent Chuffart | |
101 | 6b5a528c | Florent Chuffart | flat_reads = function( |
102 | 6b5a528c | Florent Chuffart | ### Extract reads coordinates from TempleteFilter input sequence |
103 | 55c1cdff | Florent Chuffart | reads, ##<< TemplateFilter input reads |
104 | 6b5a528c | Florent Chuffart | nuc_width ##<< Width used to shift F and R reads. |
105 | 6b5a528c | Florent Chuffart | ) { |
106 | 6b5a528c | Florent Chuffart | F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])})) |
107 | 6b5a528c | Florent Chuffart | R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])})) |
108 | 6b5a528c | Florent Chuffart | flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads)) ) |
109 | 6b5a528c | Florent Chuffart | return(list(F_flatted_reads, R_flatted_reads, flatted_reads)) |
110 | 6b5a528c | Florent Chuffart | ### Returns a list of F reads, R reads and joint/shifted F and R reads. |
111 | 6b5a528c | Florent Chuffart | } |
112 | 6b5a528c | Florent Chuffart | |
113 | 6b5a528c | Florent Chuffart | |
114 | 6b5a528c | Florent Chuffart | |
115 | 6b5a528c | Florent Chuffart | filter_tf_outputs = function(# Filter TemplateFilter outputs |
116 | ec2936ea | Florent Chuffart | ### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlapping threshold. |
117 | 6b5a528c | Florent Chuffart | tf_outputs, ##<< TemplateFilter outputs. |
118 | 6b5a528c | Florent Chuffart | chr, ##<< Chromosome observed, here chr is an integer. |
119 | 6b5a528c | Florent Chuffart | x_min, ##<< Coordinate of the first bp observed. |
120 | 6b5a528c | Florent Chuffart | x_max, ##<< Coordinate of the last bp observed. |
121 | 6b5a528c | Florent Chuffart | nuc_width = 160, ##<< Nucleosome width. |
122 | 6b5a528c | Florent Chuffart | ol_bp = 59, ##<< Overlap Threshold. |
123 | 6b5a528c | Florent Chuffart | corr_thres = 0.5 ##<< Correlation threshold. |
124 | 6b5a528c | Florent Chuffart | ) { |
125 | 6b5a528c | Florent Chuffart | if (x_min < 0) { |
126 | 0d5fbf02 | Florent Chuffart | tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= -x_max & tf_outputs$center + tf_outputs$width/2 <= -x_min,] |
127 | 6b5a528c | Florent Chuffart | } else { |
128 | 0d5fbf02 | Florent Chuffart | tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= x_min & tf_outputs$center + tf_outputs$width/2 <= x_max,] |
129 | 6b5a528c | Florent Chuffart | } |
130 | 6b5a528c | Florent Chuffart | tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2 |
131 | 6b5a528c | Florent Chuffart | tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2 |
132 | 7e2d37e1 | Florent Chuffart | tf_outputs = tf_outputs[tf_outputs$correlation.score >= corr_thres,] |
133 | 7e2d37e1 | Florent Chuffart | tf_outputs = tf_outputs[order(tf_outputs$correlation.score, decreasing=TRUE),] |
134 | 6b5a528c | Florent Chuffart | i = 1 |
135 | 55c1cdff | Florent Chuffart | while (i <= length(tf_outputs[,1])) { |
136 | 7e2d37e1 | Florent Chuffart | lb = tf_outputs[i,]$lower_bound |
137 | 7e2d37e1 | Florent Chuffart | ub = tf_outputs[i,]$upper_bound |
138 | 7e2d37e1 | Florent Chuffart | tf_outputs = tf_outputs[!(tf_outputs$lower_bound <= (ub-ol_bp) & tf_outputs$upper_bound > ub) & !(tf_outputs$upper_bound >= (lb+ol_bp) & tf_outputs$lower_bound < lb),] |
139 | 6b5a528c | Florent Chuffart | i = i+1 |
140 | 6b5a528c | Florent Chuffart | } |
141 | 6b5a528c | Florent Chuffart | return(tf_outputs) |
142 | 6b5a528c | Florent Chuffart | ### Returns filtered TemplateFilter Outputs |
143 | 6b5a528c | Florent Chuffart | } |
144 | 6b5a528c | Florent Chuffart | |
145 | 6b5a528c | Florent Chuffart | |
146 | 6b5a528c | Florent Chuffart | |
147 | 6b5a528c | Florent Chuffart | filter_tf_inputs = function(# Filter TemplateFilter inputs |
148 | 6b5a528c | Florent Chuffart | ### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads. |
149 | 6b5a528c | Florent Chuffart | inputs, ##<< TF inputs to be filtered. |
150 | 6b5a528c | Florent Chuffart | chr, ##<< Chromosome observed, here chr is an integer. |
151 | 6b5a528c | Florent Chuffart | x_min, ##<< Coordinate of the first bp observed. |
152 | 6b5a528c | Florent Chuffart | x_max, ##<< Coordinate of the last bp observed. |
153 | 6b5a528c | Florent Chuffart | nuc_width = 160, ##<< Nucleosome width. |
154 | 6b5a528c | Florent Chuffart | only_f = FALSE, ##<< Filter only F reads. |
155 | b8a95426 | Florent Chuffart | only_r = FALSE, ##<< Filter only R reads. |
156 | 3761ede2 | Florent Chuffart | filter_for_coverage = FALSE ##<< Does it filter for plot coverage? |
157 | 6b5a528c | Florent Chuffart | ) { |
158 | 3761ede2 | Florent Chuffart | if (only_f) { |
159 | 3761ede2 | Florent Chuffart | inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,] |
160 | 3761ede2 | Florent Chuffart | } else if (only_r) { |
161 | 3761ede2 | Florent Chuffart | inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,] |
162 | 3761ede2 | Florent Chuffart | } else { |
163 | 3761ede2 | Florent Chuffart | inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,] |
164 | 3761ede2 | Florent Chuffart | } |
165 | b8a95426 | Florent Chuffart | if (!filter_for_coverage) { |
166 | 3761ede2 | Florent Chuffart | corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3]) |
167 | 3761ede2 | Florent Chuffart | inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,] |
168 | b8a95426 | Florent Chuffart | } |
169 | 3761ede2 | Florent Chuffart | return(inputs) |
170 | 6b5a528c | Florent Chuffart | ### Returns filtred inputs. |
171 | 6b5a528c | Florent Chuffart | } |
172 | 6b5a528c | Florent Chuffart | |
173 | 6b5a528c | Florent Chuffart | |
174 | 6b5a528c | Florent Chuffart | |
175 | 3761ede2 | Florent Chuffart | # filter_tf_inputs = function(# Filter TemplateFilter inputs |
176 | 3761ede2 | Florent Chuffart | # ### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads. |
177 | 3761ede2 | Florent Chuffart | # inputs, ##<< TF inputs to be filtered. |
178 | 3761ede2 | Florent Chuffart | # chr, ##<< Chromosome observed, here chr is an integer. |
179 | 3761ede2 | Florent Chuffart | # x_min, ##<< Coordinate of the first bp observed. |
180 | 3761ede2 | Florent Chuffart | # x_max, ##<< Coordinate of the last bp observed. |
181 | 3761ede2 | Florent Chuffart | # nuc_width = 160, ##<< Nucleosome width. |
182 | 3761ede2 | Florent Chuffart | # only_f = FALSE, ##<< Filter only F reads. |
183 | 3761ede2 | Florent Chuffart | # only_r = FALSE, ##<< Filter only R reads. |
184 | 3761ede2 | Florent Chuffart | # filter_for_coverage = FALSE, ##<< Does it filter for plot coverage? |
185 | 3761ede2 | Florent Chuffart | # USE_DPLYR = FALSE ##<< Use dplyr lib to filter reads. |
186 | 3761ede2 | Florent Chuffart | # ) { |
187 | 3761ede2 | Florent Chuffart | # n = names(inputs) |
188 | 3761ede2 | Florent Chuffart | # |
189 | 3761ede2 | Florent Chuffart | # if (!USE_DPLYR) { |
190 | 3761ede2 | Florent Chuffart | # if (only_f) { |
191 | 3761ede2 | Florent Chuffart | # inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,] |
192 | 3761ede2 | Florent Chuffart | # } else if (only_r) { |
193 | 3761ede2 | Florent Chuffart | # inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,] |
194 | 3761ede2 | Florent Chuffart | # } else { |
195 | 3761ede2 | Florent Chuffart | # inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,] |
196 | 3761ede2 | Florent Chuffart | # } |
197 | 3761ede2 | Florent Chuffart | # } else { |
198 | 3761ede2 | Florent Chuffart | # names(inputs) = c("chr", "pos", "str", "lev") |
199 | 3761ede2 | Florent Chuffart | # if (only_f) { |
200 | 3761ede2 | Florent Chuffart | # inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, str == "F", pos <= x_max + nuc_width) |
201 | 3761ede2 | Florent Chuffart | # } else if (only_r) { |
202 | 3761ede2 | Florent Chuffart | # inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, str == "R" & pos <= x_max + nuc_width) |
203 | 3761ede2 | Florent Chuffart | # } else { |
204 | 3761ede2 | Florent Chuffart | # inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, pos <= x_max + nuc_width) |
205 | 3761ede2 | Florent Chuffart | # } |
206 | 3761ede2 | Florent Chuffart | # # if (!filter_for_coverage) { |
207 | 3761ede2 | Florent Chuffart | # # inputs$corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3]) |
208 | 3761ede2 | Florent Chuffart | # # inputs = filter(inputs, chr == chr, corrected_inputs_coords >= x_min, corrected_inputs_coords <= x_max) |
209 | 3761ede2 | Florent Chuffart | # # inputs$corrected_inputs_coords = NULL |
210 | 3761ede2 | Florent Chuffart | # # } |
211 | 3761ede2 | Florent Chuffart | # } |
212 | 3761ede2 | Florent Chuffart | # |
213 | 3761ede2 | Florent Chuffart | # if (!filter_for_coverage) { |
214 | 3761ede2 | Florent Chuffart | # corrected_inputs_coords = inputs_out[,2] + nuc_width/2 * sign_from_strand(inputs_out[,3]) |
215 | 3761ede2 | Florent Chuffart | # inputs_out = inputs_out[inputs_out[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,] |
216 | 3761ede2 | Florent Chuffart | # } |
217 | 3761ede2 | Florent Chuffart | # |
218 | 3761ede2 | Florent Chuffart | # names(inputs_out) = n |
219 | 3761ede2 | Florent Chuffart | # return(inputs_out) |
220 | 3761ede2 | Florent Chuffart | # ### Returns filtred inputs. |
221 | 3761ede2 | Florent Chuffart | # } |
222 | 3761ede2 | Florent Chuffart | |
223 | 3761ede2 | Florent Chuffart | |
224 | 3761ede2 | Florent Chuffart | |
225 | 6b5a528c | Florent Chuffart | get_comp_strand = function( |
226 | 6b5a528c | Florent Chuffart | ### Compute the complementatry strand. |
227 | 6b5a528c | Florent Chuffart | strand ##<< The original strand. |
228 | 6b5a528c | Florent Chuffart | ) { |
229 | 6b5a528c | Florent Chuffart | apply(t(strand),2, function(n){ |
230 | 55c1cdff | Florent Chuffart | if (n=="a") {return("t")} |
231 | 6b5a528c | Florent Chuffart | if (n=="t") {return("a")} |
232 | 55c1cdff | Florent Chuffart | if (n=="c") {return("g")} |
233 | 55c1cdff | Florent Chuffart | if (n=="g") {return("c")} |
234 | 6b5a528c | Florent Chuffart | }) |
235 | 6b5a528c | Florent Chuffart | ### Returns the complementatry strand. |
236 | 6b5a528c | Florent Chuffart | } |
237 | 6b5a528c | Florent Chuffart | |
238 | 6b5a528c | Florent Chuffart | |
239 | 55c1cdff | Florent Chuffart | aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes. |
240 | 5badc2fd | Florent Chuffart | ### This function aggregates nucleosomes from replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center (dyad). A chain of nucleosomes is builts across all replicates. Adjacent nucleosomes of the chain are compared two by two. Comparison is based on a log likelihood ratio (LLR1). depending on the LLR1 value nucleosomes are merged (low LLR) or separated (high LLR). Finally the function returns a list of clusters and all computed llr_scores. Each cluster ows an attribute wp for "well positioned". This attribute is set to TRUE if the cluster is composed of exactly one nucleosome of each sample. |
241 | 6b5a528c | Florent Chuffart | samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=..., end=..., chr=..., genome=...)}. |
242 | e5603c3f | Florent Chuffart | llr_thres=20, ##<< Log likelihood ratio threshold to decide between merging and separating |
243 | 6b5a528c | Florent Chuffart | coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound. |
244 | 6b5a528c | Florent Chuffart | ){ |
245 | 6b5a528c | Florent Chuffart | end_of_tracks = function(tracks) { |
246 | 6b5a528c | Florent Chuffart | if (length(tracks) == 0) { |
247 | 6b5a528c | Florent Chuffart | return(TRUE) |
248 | 6b5a528c | Florent Chuffart | } |
249 | 6b5a528c | Florent Chuffart | for (lower_bound in tracks) { |
250 | 6b5a528c | Florent Chuffart | if(!is.na(lower_bound)) { |
251 | 6b5a528c | Florent Chuffart | if (lower_bound < coord_max) { |
252 | 6b5a528c | Florent Chuffart | return(FALSE) |
253 | 6b5a528c | Florent Chuffart | } |
254 | 6b5a528c | Florent Chuffart | } |
255 | 6b5a528c | Florent Chuffart | } |
256 | 6b5a528c | Florent Chuffart | return(TRUE) |
257 | 6b5a528c | Florent Chuffart | } |
258 | d973538c | Florent Chuffart | store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center) { |
259 | 6b5a528c | Florent Chuffart | if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) { |
260 | 6b5a528c | Florent Chuffart | new_cluster$wp = TRUE |
261 | 6b5a528c | Florent Chuffart | center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2 |
262 | 6b5a528c | Florent Chuffart | if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) { |
263 | 6b5a528c | Florent Chuffart | clusters[[length(clusters) + 1]] = new_cluster |
264 | 6b5a528c | Florent Chuffart | # print(new_cluster) |
265 | 6b5a528c | Florent Chuffart | } |
266 | 6b5a528c | Florent Chuffart | } else { |
267 | 6b5a528c | Florent Chuffart | new_cluster$wp = FALSE |
268 | 6b5a528c | Florent Chuffart | center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2 |
269 | 6b5a528c | Florent Chuffart | if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) { |
270 | 6b5a528c | Florent Chuffart | clusters[[length(clusters) + 1]] = new_cluster |
271 | 6b5a528c | Florent Chuffart | } |
272 | 6b5a528c | Florent Chuffart | } |
273 | 6b5a528c | Florent Chuffart | return(clusters) |
274 | 6b5a528c | Florent Chuffart | } |
275 | 6b5a528c | Florent Chuffart | strain = samples[[1]]$strain |
276 | 7e2d37e1 | Florent Chuffart | llr_scores = c() |
277 | 6b5a528c | Florent Chuffart | min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end) |
278 | 6b5a528c | Florent Chuffart | max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end) |
279 | 6b5a528c | Florent Chuffart | # compute clusters |
280 | 6b5a528c | Florent Chuffart | clusters = list() |
281 | 6b5a528c | Florent Chuffart | cluster_contents = list() |
282 | 6b5a528c | Florent Chuffart | # Init reader |
283 | 6b5a528c | Florent Chuffart | indexes = c() |
284 | 6b5a528c | Florent Chuffart | track_readers = c() |
285 | 6b5a528c | Florent Chuffart | current_nuc = NULL |
286 | 7e2d37e1 | Florent Chuffart | llr_score = llr_thres + 1 |
287 | 55c1cdff | Florent Chuffart | # Read nucs from TF outputs |
288 | 6b5a528c | Florent Chuffart | tf_outs = list() |
289 | 6b5a528c | Florent Chuffart | i = 1 |
290 | 6b5a528c | Florent Chuffart | for (sample in samples) { |
291 | 6b5a528c | Florent Chuffart | tf_outs[[i]] = sample$outputs |
292 | 55c1cdff | Florent Chuffart | tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),] |
293 | 6b5a528c | Florent Chuffart | indexes[i] = 1 |
294 | 6b5a528c | Florent Chuffart | if (is.na(tf_outs[[i]][indexes[i],]$center)) { |
295 | 5bfac5a3 | Florent Chuffart | track_readers[i] = coord_max |
296 | 6b5a528c | Florent Chuffart | } else { |
297 | 5bfac5a3 | Florent Chuffart | track_readers[i] = tf_outs[[i]][indexes[i],]$center |
298 | 6b5a528c | Florent Chuffart | } |
299 | 5bfac5a3 | Florent Chuffart | i = i+1 |
300 | 6b5a528c | Florent Chuffart | } |
301 | d973538c | Florent Chuffart | nb_tracks = length(tf_outs) |
302 | 6b5a528c | Florent Chuffart | # print(track_readers) |
303 | 6b5a528c | Florent Chuffart | new_cluster = NULL |
304 | 6b5a528c | Florent Chuffart | nb_nucs_in_cluster = 0 |
305 | 6b5a528c | Florent Chuffart | nuc_from_track = c() |
306 | d973538c | Florent Chuffart | for (i in 1:nb_tracks){ |
307 | 6b5a528c | Florent Chuffart | nuc_from_track[i] = FALSE |
308 | 6b5a528c | Florent Chuffart | } |
309 | 6b5a528c | Florent Chuffart | # Start clustering |
310 | 6b5a528c | Florent Chuffart | while (!end_of_tracks(track_readers)) { |
311 | 6b5a528c | Florent Chuffart | new_center = min(track_readers) |
312 | 5bfac5a3 | Florent Chuffart | current_track = which(track_readers == new_center)[1] |
313 | 6b5a528c | Florent Chuffart | new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],]) |
314 | 6b5a528c | Florent Chuffart | new_nuc$chr = substr(new_nuc$chr,4,1000000L) |
315 | 6b5a528c | Florent Chuffart | new_nuc$inputs = samples[[current_track]]$inputs |
316 | 6b5a528c | Florent Chuffart | new_nuc$chr = samples[[current_track]]$roi$chr |
317 | 6b5a528c | Florent Chuffart | new_nuc$track = current_track |
318 | 5bfac5a3 | Florent Chuffart | |
319 | 55c1cdff | Florent Chuffart | new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width) |
320 | 6b5a528c | Florent Chuffart | flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width) |
321 | 6b5a528c | Florent Chuffart | new_nuc$original_reads = flatted_reads[[3]] |
322 | 6b5a528c | Florent Chuffart | |
323 | 6b5a528c | Florent Chuffart | new_upper_bound = new_nuc$upper_bound |
324 | 6b5a528c | Florent Chuffart | |
325 | 6b5a528c | Florent Chuffart | if (!is.null(current_nuc)) { |
326 | 7e2d37e1 | Florent Chuffart | llr_score = llr_score_nvecs(list(current_nuc$original_reads,new_nuc$original_reads)) |
327 | 7e2d37e1 | Florent Chuffart | llr_scores = c(llr_scores,llr_score) |
328 | 6b5a528c | Florent Chuffart | } |
329 | 7e2d37e1 | Florent Chuffart | # print(paste(llr_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" ")) |
330 | 7e2d37e1 | Florent Chuffart | if (is.na(llr_score)) { |
331 | 7e2d37e1 | Florent Chuffart | llr_score = llr_thres + 1 |
332 | 6b5a528c | Florent Chuffart | } |
333 | 7e2d37e1 | Florent Chuffart | # Store llr_score |
334 | 7e2d37e1 | Florent Chuffart | new_nuc$llr_score = llr_score |
335 | 7e2d37e1 | Florent Chuffart | if (llr_score < llr_thres) { |
336 | 6b5a528c | Florent Chuffart | # aggregate to current cluster |
337 | 6b5a528c | Florent Chuffart | # update bound |
338 | 6b5a528c | Florent Chuffart | if (new_nuc$upper_bound > new_cluster$upper_bound) { |
339 | 55c1cdff | Florent Chuffart | new_cluster$upper_bound = new_nuc$upper_bound |
340 | 6b5a528c | Florent Chuffart | } |
341 | 6b5a528c | Florent Chuffart | if (new_nuc$lower_bound < new_cluster$lower_bound) { |
342 | 55c1cdff | Florent Chuffart | new_cluster$lower_bound = new_nuc$lower_bound |
343 | 6b5a528c | Florent Chuffart | } |
344 | 6b5a528c | Florent Chuffart | # add nucleosome to current cluster |
345 | 6b5a528c | Florent Chuffart | nuc_from_track[current_track] = TRUE |
346 | 55c1cdff | Florent Chuffart | nb_nucs_in_cluster = nb_nucs_in_cluster + 1 |
347 | 6b5a528c | Florent Chuffart | new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc |
348 | 6b5a528c | Florent Chuffart | } else { |
349 | 6b5a528c | Florent Chuffart | if (!is.null(new_cluster)) { |
350 | 6b5a528c | Florent Chuffart | # store old cluster |
351 | d973538c | Florent Chuffart | clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center) |
352 | 6b5a528c | Florent Chuffart | } |
353 | 6b5a528c | Florent Chuffart | # Reinit current cluster composition stuff |
354 | 6b5a528c | Florent Chuffart | nb_nucs_in_cluster = 0 |
355 | 6b5a528c | Florent Chuffart | nuc_from_track = c() |
356 | d973538c | Florent Chuffart | for (i in 1:nb_tracks){ |
357 | 6b5a528c | Florent Chuffart | nuc_from_track[i] = FALSE |
358 | 6b5a528c | Florent Chuffart | } |
359 | 55c1cdff | Florent Chuffart | # create new cluster |
360 | 6b5a528c | Florent Chuffart | new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list()) |
361 | 6b5a528c | Florent Chuffart | # update upper bound |
362 | 6b5a528c | Florent Chuffart | current_upper_bound = new_upper_bound |
363 | 6b5a528c | Florent Chuffart | # add nucleosome to current cluster |
364 | 55c1cdff | Florent Chuffart | nb_nucs_in_cluster = nb_nucs_in_cluster + 1 |
365 | 6b5a528c | Florent Chuffart | nuc_from_track[current_track] = TRUE |
366 | 6b5a528c | Florent Chuffart | new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc |
367 | 5bfac5a3 | Florent Chuffart | |
368 | 6b5a528c | Florent Chuffart | } |
369 | 5bfac5a3 | Florent Chuffart | |
370 | 6b5a528c | Florent Chuffart | current_nuc = new_nuc |
371 | 6b5a528c | Florent Chuffart | |
372 | 6b5a528c | Florent Chuffart | # update indexes |
373 | 55c1cdff | Florent Chuffart | if (indexes[current_track] < length(tf_outs[[current_track]]$center)) { |
374 | 6b5a528c | Florent Chuffart | indexes[current_track] = indexes[current_track] + 1 |
375 | 6b5a528c | Florent Chuffart | # update track |
376 | 6b5a528c | Florent Chuffart | track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center |
377 | 6b5a528c | Florent Chuffart | } else { |
378 | 6b5a528c | Florent Chuffart | # update track |
379 | 6b5a528c | Florent Chuffart | track_readers[current_track] = coord_max |
380 | 6b5a528c | Florent Chuffart | } |
381 | 6b5a528c | Florent Chuffart | } |
382 | 6b5a528c | Florent Chuffart | # store last cluster |
383 | 6b5a528c | Florent Chuffart | if (!is.null(new_cluster)) { |
384 | 6b5a528c | Florent Chuffart | # store old cluster |
385 | d973538c | Florent Chuffart | clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center) |
386 | 6b5a528c | Florent Chuffart | } |
387 | 7e2d37e1 | Florent Chuffart | return(list(clusters, llr_scores)) |
388 | 7e2d37e1 | Florent Chuffart | ### Returns a list of clusterized nucleosomes, and all computed llr scores. |
389 | 5bfac5a3 | Florent Chuffart | }, ex=function(){ |
390 | 6b5a528c | Florent Chuffart | # Dealing with a region of interest |
391 | 6b5a528c | Florent Chuffart | roi =list(name="example", begin=1000, end=1300, chr="1", genome=rep("A",301)) |
392 | 6b5a528c | Florent Chuffart | samples = list() |
393 | 6b5a528c | Florent Chuffart | for (i in 1:3) { |
394 | 6b5a528c | Florent Chuffart | # Create TF output |
395 | 6b5a528c | Florent Chuffart | tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9) |
396 | 6b5a528c | Florent Chuffart | outputs = dfadd(NULL,tf_nuc) |
397 | 6b5a528c | Florent Chuffart | outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end) |
398 | 6b5a528c | Florent Chuffart | # Generate corresponding reads |
399 | 6b5a528c | Florent Chuffart | nb_reads = round(runif(1,170,230)) |
400 | 6b5a528c | Florent Chuffart | reads = round(rnorm(nb_reads, tf_nuc$center,20)) |
401 | 6b5a528c | Florent Chuffart | u_reads = sort(unique(reads)) |
402 | 6b5a528c | Florent Chuffart | strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2)))) |
403 | 6b5a528c | Florent Chuffart | counts = apply(t(u_reads), 2, function(r) { sum(reads == r)}) |
404 | 6b5a528c | Florent Chuffart | shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)}) |
405 | 6b5a528c | Florent Chuffart | u_reads = u_reads + shifts |
406 | 55c1cdff | Florent Chuffart | inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)), |
407 | 55c1cdff | Florent Chuffart | "V2" = u_reads, |
408 | 55c1cdff | Florent Chuffart | "V3" = strands, |
409 | 6b5a528c | Florent Chuffart | "V4" = counts), stringsAsFactors=FALSE) |
410 | 6b5a528c | Florent Chuffart | samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs) |
411 | 6b5a528c | Florent Chuffart | } |
412 | 6b5a528c | Florent Chuffart | print(aggregate_intra_strain_nucs(samples)) |
413 | 6b5a528c | Florent Chuffart | }) |
414 | 6b5a528c | Florent Chuffart | |
415 | 7646593d | Florent Chuffart | flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output |
416 | 7646593d | Florent Chuffart | ### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function. |
417 | 7646593d | Florent Chuffart | partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function |
418 | d973538c | Florent Chuffart | cur_index, ##<< the index of the roi involved |
419 | d973538c | Florent Chuffart | nb_tracks=3 ##<< the number of replicates |
420 | 7646593d | Florent Chuffart | ) { |
421 | 7646593d | Florent Chuffart | if (length(partial_strain_maps) == 0 ){ |
422 | 4016229d | Florent Chuffart | print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." )) |
423 | 55c1cdff | Florent Chuffart | tmp_strain_maps = list() |
424 | 7646593d | Florent Chuffart | } else { |
425 | 7646593d | Florent Chuffart | tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){ |
426 | 7646593d | Florent Chuffart | tmp_nuc = partial_strain_maps[[i]] |
427 | 7646593d | Florent Chuffart | tmp_nuc_as_list = list() |
428 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]] |
429 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]]) |
430 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]]) |
431 | 4016229d | Florent Chuffart | tmp_nuc_as_list[["cur_index"]] = cur_index |
432 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["index_nuc"]] = i |
433 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp) |
434 | 7646593d | Florent Chuffart | all_original_reads = c() |
435 | 7646593d | Florent Chuffart | for (j in 1:length(tmp_nuc$nucs)) { |
436 | 7646593d | Florent Chuffart | all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads) |
437 | 7646593d | Florent Chuffart | } |
438 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads) |
439 | 0d5fbf02 | Florent Chuffart | tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs) |
440 | 7646593d | Florent Chuffart | if (tmp_nuc$wp) { |
441 | d973538c | Florent Chuffart | for (i in 1:(nb_tracks-1)) { |
442 | d973538c | Florent Chuffart | tmp_nuc_as_list[[paste("llr", i, sep="_")]] = signif(tmp_nuc$nucs[[i + 1]]$llr_score,5) |
443 | d973538c | Florent Chuffart | } |
444 | 7646593d | Florent Chuffart | } else { |
445 | d973538c | Florent Chuffart | for (i in 1:(nb_tracks-1)) { |
446 | d973538c | Florent Chuffart | tmp_nuc_as_list[[paste("llr", i, sep="_")]] = NA |
447 | d973538c | Florent Chuffart | } |
448 | 7646593d | Florent Chuffart | } |
449 | 7646593d | Florent Chuffart | return(tmp_nuc_as_list) |
450 | 7646593d | Florent Chuffart | }) |
451 | 7646593d | Florent Chuffart | tmp_strain_maps = do.call("rbind", tmp_strain_map) |
452 | 7646593d | Florent Chuffart | } |
453 | 21b8928f | Florent Chuffart | return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE)) |
454 | 7646593d | Florent Chuffart | ### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function. |
455 | 7646593d | Florent Chuffart | } |
456 | 6b5a528c | Florent Chuffart | |
457 | 6b5a528c | Florent Chuffart | align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains. |
458 | e5603c3f | Florent Chuffart | ### This function aligns nucleosomes between two strains for a given genome region. |
459 | 55c1cdff | Florent Chuffart | replicates, ##<< Set of replicates, ideally 3 per strain. |
460 | e5603c3f | Florent Chuffart | wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's NULL this list will be computed. |
461 | e5603c3f | Florent Chuffart | wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's NULL this list will be computed. |
462 | 6b5a528c | Florent Chuffart | corr_thres=0.5, ##<< Correlation threshold. |
463 | e5603c3f | Florent Chuffart | llr_thres=100, ##<< Log likelihood ratio threshold to decide between merging and separating |
464 | 1d833b97 | Florent Chuffart | config=NULL, ##<< GLOBAL config variable |
465 | 5bfac5a3 | Florent Chuffart | ... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed. |
466 | 6b5a528c | Florent Chuffart | ) { |
467 | 6b5a528c | Florent Chuffart | if (length(replicates) < 2) { |
468 | 6b5a528c | Florent Chuffart | stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.") |
469 | 6b5a528c | Florent Chuffart | } else if (length(replicates) > 2) { |
470 | 5bfac5a3 | Florent Chuffart | print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.") |
471 | 6b5a528c | Florent Chuffart | } |
472 | 6b5a528c | Florent Chuffart | common_nuc = NULL |
473 | 7e2d37e1 | Florent Chuffart | llr_scores = c() |
474 | 6b5a528c | Florent Chuffart | chr = replicates[[1]][[1]]$roi$chr |
475 | 6b5a528c | Florent Chuffart | min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end) |
476 | 6b5a528c | Florent Chuffart | max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end) |
477 | 6b5a528c | Florent Chuffart | strain_ref1 = replicates[[1]][[1]]$strain |
478 | 6b5a528c | Florent Chuffart | strain_ref2 = replicates[[2]][[1]]$strain |
479 | 4016229d | Florent Chuffart | big_cur = replicates[[1]][[1]]$roi |
480 | 4016229d | Florent Chuffart | orig_big_cur = replicates[[1]][[1]]$orig_roi |
481 | 4016229d | Florent Chuffart | if(big_cur$end - big_cur$begin < 0) { |
482 | 4016229d | Florent Chuffart | tmp_begin = big_cur$begin |
483 | 4016229d | Florent Chuffart | big_cur$begin = big_cur$end |
484 | 4016229d | Florent Chuffart | big_cur$end = tmp_begin |
485 | 6b5a528c | Florent Chuffart | } |
486 | 6b5a528c | Florent Chuffart | # GO! |
487 | 6b5a528c | Florent Chuffart | if (is.null(wp_nucs_strain_ref1)) { |
488 | 6b5a528c | Florent Chuffart | wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]] |
489 | 6b5a528c | Florent Chuffart | } |
490 | 6b5a528c | Florent Chuffart | if (is.null(wp_nucs_strain_ref2)) { |
491 | 6b5a528c | Florent Chuffart | wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]] |
492 | 6b5a528c | Florent Chuffart | } |
493 | 6b5a528c | Florent Chuffart | lws = c() |
494 | 6b5a528c | Florent Chuffart | ups = c() |
495 | 6b5a528c | Florent Chuffart | for (na in wp_nucs_strain_ref2) { |
496 | 6b5a528c | Florent Chuffart | lws = c(lws, na$lower_bound) |
497 | 6b5a528c | Florent Chuffart | ups = c(ups, na$upper_bound) |
498 | 6b5a528c | Florent Chuffart | } |
499 | 6b5a528c | Florent Chuffart | |
500 | 5bfac5a3 | Florent Chuffart | print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep="")) |
501 | 6b5a528c | Florent Chuffart | roi_strain_ref1 = NULL |
502 | 6b5a528c | Florent Chuffart | roi_strain_ref2 = NULL |
503 | 6b5a528c | Florent Chuffart | if (length(wp_nucs_strain_ref1) > 0) { |
504 | 6b5a528c | Florent Chuffart | for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){ |
505 | 6b5a528c | Florent Chuffart | # print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep="")) |
506 | 6b5a528c | Florent Chuffart | nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]] |
507 | 6b5a528c | Florent Chuffart | # Filtering on Well Positionned |
508 | 883439d6 | Florent Chuffart | if (nuc_strain_ref1$wp) { |
509 | 6b5a528c | Florent Chuffart | roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1) |
510 | 4016229d | Florent Chuffart | roi_strain_ref2 = translate_cur(roi_strain_ref1, strain_ref2, big_cur=orig_big_cur, config=config) |
511 | 1d833b97 | Florent Chuffart | if (!is.null(roi_strain_ref2)){ |
512 | 6b5a528c | Florent Chuffart | # LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER) |
513 | 6b5a528c | Florent Chuffart | lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin) |
514 | 6b5a528c | Florent Chuffart | upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin) |
515 | 6b5a528c | Florent Chuffart | matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2) |
516 | 6b5a528c | Florent Chuffart | for (index_nuc_strain_ref2 in matching_nas) { |
517 | 6b5a528c | Florent Chuffart | nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]] |
518 | 6b5a528c | Florent Chuffart | # Filtering on Well Positionned |
519 | 55c1cdff | Florent Chuffart | nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2) |
520 | 4016229d | Florent Chuffart | if (!is.null(translate_cur(nuc_strain_ref2_to_roi, strain_ref1, big_cur=orig_big_cur, config=config)) & |
521 | 1e34bb1f | Florent Chuffart | nuc_strain_ref2$wp) { |
522 | 5bfac5a3 | Florent Chuffart | # Filtering on correlation Score and collecting reads |
523 | 6b5a528c | Florent Chuffart | SKIP = FALSE |
524 | 6b5a528c | Florent Chuffart | # TODO: This for loop could be done before working on strain_ref2. Isn't it? |
525 | 6b5a528c | Florent Chuffart | reads_strain_ref1 = c() |
526 | 6b5a528c | Florent Chuffart | for (nuc in nuc_strain_ref1$nucs){ |
527 | 5bfac5a3 | Florent Chuffart | reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads) |
528 | 6b5a528c | Florent Chuffart | if (nuc$corr < corr_thres) { |
529 | 6b5a528c | Florent Chuffart | SKIP = TRUE |
530 | 6b5a528c | Florent Chuffart | } |
531 | 6b5a528c | Florent Chuffart | } |
532 | 6b5a528c | Florent Chuffart | reads_strain_ref2 = c() |
533 | 6b5a528c | Florent Chuffart | for (nuc in nuc_strain_ref2$nucs){ |
534 | 5bfac5a3 | Florent Chuffart | reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads) |
535 | 6b5a528c | Florent Chuffart | if (nuc$corr < corr_thres) { |
536 | 6b5a528c | Florent Chuffart | SKIP = TRUE |
537 | 6b5a528c | Florent Chuffart | } |
538 | 6b5a528c | Florent Chuffart | } |
539 | 5bfac5a3 | Florent Chuffart | # Filtering on correlation Score |
540 | 6b5a528c | Florent Chuffart | if (!SKIP) { |
541 | 6b5a528c | Florent Chuffart | # tranlation of reads into strain 2 coords |
542 | 6b5a528c | Florent Chuffart | diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2 |
543 | 6b5a528c | Florent Chuffart | reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1)) |
544 | 7e2d37e1 | Florent Chuffart | llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2)) |
545 | 7e2d37e1 | Florent Chuffart | llr_scores = c(llr_scores, llr_score) |
546 | e5603c3f | Florent Chuffart | # Filtering on LLR Score |
547 | 67157457 | Florent Chuffart | if (llr_score < llr_thres) { |
548 | 6b5a528c | Florent Chuffart | tmp_nuc = list() |
549 | 6b5a528c | Florent Chuffart | # strain_ref1 |
550 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr |
551 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound |
552 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound |
553 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5) |
554 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5) |
555 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1) |
556 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1 |
557 | 6b5a528c | Florent Chuffart | # strain_ref2 |
558 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr |
559 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound |
560 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound |
561 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5) |
562 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5) |
563 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2) |
564 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2 |
565 | 6b5a528c | Florent Chuffart | # common |
566 | 7e2d37e1 | Florent Chuffart | tmp_nuc[["llr_score"]] = signif(llr_score,5) |
567 | dadb6a4d | Florent Chuffart | tmp_nuc[["common_wp_pval"]] = signif(1-pchisq(2*tmp_nuc[["llr_score"]], df=2),5) |
568 | dadb6a4d | Florent Chuffart | tmp_nuc[["diff"]] = abs(abs((roi_strain_ref2$begin - roi_strain_ref2$end)) / 2 - abs((nuc_strain_ref2$lower_bound - nuc_strain_ref2$upper_bound)) / 2) |
569 | 6b5a528c | Florent Chuffart | common_nuc = dfadd(common_nuc, tmp_nuc) |
570 | ec2936ea | Florent Chuffart | } |
571 | 6b5a528c | Florent Chuffart | } |
572 | 6b5a528c | Florent Chuffart | } |
573 | 5bfac5a3 | Florent Chuffart | } |
574 | 6b5a528c | Florent Chuffart | } else { |
575 | 6b5a528c | Florent Chuffart | print("WARNING! No roi for strain ref 2.") |
576 | 6b5a528c | Florent Chuffart | } |
577 | 6b5a528c | Florent Chuffart | } |
578 | 6b5a528c | Florent Chuffart | } |
579 | 5bfac5a3 | Florent Chuffart | |
580 | 67157457 | Florent Chuffart | if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) { |
581 | 67157457 | Florent Chuffart | index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] == common_nuc[,1:3][-1,] ,1,sum) == 3) |
582 | 67157457 | Florent Chuffart | to_remove_list = c() |
583 | 67157457 | Florent Chuffart | for (i in 1:length(index_redundant)) { |
584 | 67157457 | Florent Chuffart | if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) { |
585 | 67157457 | Florent Chuffart | to_remove = index_redundant[i] |
586 | 67157457 | Florent Chuffart | } else { |
587 | 67157457 | Florent Chuffart | to_remove = index_redundant[i] + 1 |
588 | 67157457 | Florent Chuffart | } |
589 | 67157457 | Florent Chuffart | to_remove_list = c(to_remove_list, to_remove) |
590 | 67157457 | Florent Chuffart | } |
591 | 67157457 | Florent Chuffart | common_nuc = common_nuc[-to_remove_list,] |
592 | 67157457 | Florent Chuffart | } |
593 | 5bfac5a3 | Florent Chuffart | |
594 | 67157457 | Florent Chuffart | if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) { |
595 | 67157457 | Florent Chuffart | index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3) |
596 | 67157457 | Florent Chuffart | to_remove_list = c() |
597 | 67157457 | Florent Chuffart | for (i in 1:length(index_redundant)) { |
598 | 67157457 | Florent Chuffart | if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) { |
599 | 67157457 | Florent Chuffart | to_remove = index_redundant[i] |
600 | 67157457 | Florent Chuffart | } else { |
601 | 67157457 | Florent Chuffart | to_remove = index_redundant[i] + 1 |
602 | 67157457 | Florent Chuffart | } |
603 | 67157457 | Florent Chuffart | to_remove_list = c(to_remove_list, to_remove) |
604 | 67157457 | Florent Chuffart | } |
605 | 67157457 | Florent Chuffart | common_nuc = common_nuc[-to_remove_list,] |
606 | 67157457 | Florent Chuffart | } |
607 | 5bfac5a3 | Florent Chuffart | |
608 | 7e2d37e1 | Florent Chuffart | return(list(common_nuc, llr_scores)) |
609 | 6b5a528c | Florent Chuffart | } else { |
610 | 6b5a528c | Florent Chuffart | print("WARNING, no nucs for strain_ref1.") |
611 | 6b5a528c | Florent Chuffart | return(NULL) |
612 | 6b5a528c | Florent Chuffart | } |
613 | 7e2d37e1 | Florent Chuffart | ### Returns a list of clusterized nucleosomes, and all computed llr scores. |
614 | 5bfac5a3 | Florent Chuffart | }, ex=function(){ |
615 | 1d833b97 | Florent Chuffart | |
616 | 4016229d | Florent Chuffart | # Define new translate_cur function... |
617 | 4016229d | Florent Chuffart | translate_cur = function(roi, strain2, big_cur=NULL, config=NULL) { |
618 | 1d833b97 | Florent Chuffart | return(roi) |
619 | 1d833b97 | Florent Chuffart | } |
620 | 1d833b97 | Florent Chuffart | # Binding it by uncomment follwing lines. |
621 | 4016229d | Florent Chuffart | unlockBinding("translate_cur", as.environment("package:nucleominer")) |
622 | 4016229d | Florent Chuffart | unlockBinding("translate_cur", getNamespace("nucleominer")) |
623 | 4016229d | Florent Chuffart | assign("translate_cur", translate_cur, "package:nucleominer") |
624 | 4016229d | Florent Chuffart | assign("translate_cur", translate_cur, getNamespace("nucleominer")) |
625 | 4016229d | Florent Chuffart | lockBinding("translate_cur", getNamespace("nucleominer")) |
626 | 4016229d | Florent Chuffart | lockBinding("translate_cur", as.environment("package:nucleominer")) |
627 | 1d833b97 | Florent Chuffart | |
628 | 6b5a528c | Florent Chuffart | # Dealing with a region of interest |
629 | 6b5a528c | Florent Chuffart | roi =list(name="example", begin=1000, end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1") |
630 | 4016229d | Florent Chuffart | roi2 = translate_cur(roi, roi$strain_ref1) |
631 | 6b5a528c | Florent Chuffart | replicates = list() |
632 | 6b5a528c | Florent Chuffart | for (j in 1:2) { |
633 | 6b5a528c | Florent Chuffart | samples = list() |
634 | 6b5a528c | Florent Chuffart | for (i in 1:3) { |
635 | 6b5a528c | Florent Chuffart | # Create TF output |
636 | 6b5a528c | Florent Chuffart | tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9) |
637 | 6b5a528c | Florent Chuffart | outputs = dfadd(NULL,tf_nuc) |
638 | 6b5a528c | Florent Chuffart | outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end) |
639 | 6b5a528c | Florent Chuffart | # Generate corresponding reads |
640 | 6b5a528c | Florent Chuffart | nb_reads = round(runif(1,170,230)) |
641 | 6b5a528c | Florent Chuffart | reads = round(rnorm(nb_reads, tf_nuc$center,20)) |
642 | 6b5a528c | Florent Chuffart | u_reads = sort(unique(reads)) |
643 | 6b5a528c | Florent Chuffart | strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2)))) |
644 | 6b5a528c | Florent Chuffart | counts = apply(t(u_reads), 2, function(r) { sum(reads == r)}) |
645 | 6b5a528c | Florent Chuffart | shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)}) |
646 | 6b5a528c | Florent Chuffart | u_reads = u_reads + shifts |
647 | 55c1cdff | Florent Chuffart | inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)), |
648 | 55c1cdff | Florent Chuffart | "V2" = u_reads, |
649 | 55c1cdff | Florent Chuffart | "V3" = strands, |
650 | 6b5a528c | Florent Chuffart | "V4" = counts), stringsAsFactors=FALSE) |
651 | 6b5a528c | Florent Chuffart | samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs) |
652 | 6b5a528c | Florent Chuffart | } |
653 | 6b5a528c | Florent Chuffart | replicates[[length(replicates) + 1]] = samples |
654 | 6b5a528c | Florent Chuffart | } |
655 | 6b5a528c | Florent Chuffart | print(align_inter_strain_nucs(replicates)) |
656 | 6b5a528c | Florent Chuffart | }) |
657 | 6b5a528c | Florent Chuffart | |
658 | 6b5a528c | Florent Chuffart | |
659 | 6b5a528c | Florent Chuffart | |
660 | 6b5a528c | Florent Chuffart | |
661 | 6b5a528c | Florent Chuffart | |
662 | 6b5a528c | Florent Chuffart | |
663 | 6b5a528c | Florent Chuffart | |
664 | 6b5a528c | Florent Chuffart | |
665 | 6b5a528c | Florent Chuffart | |
666 | 6b5a528c | Florent Chuffart | |
667 | 6b5a528c | Florent Chuffart | |
668 | 6b5a528c | Florent Chuffart | |
669 | 6b5a528c | Florent Chuffart | |
670 | 6b5a528c | Florent Chuffart | |
671 | 6b5a528c | Florent Chuffart | |
672 | 6b5a528c | Florent Chuffart | |
673 | 6b5a528c | Florent Chuffart | |
674 | 6b5a528c | Florent Chuffart | |
675 | 6b5a528c | Florent Chuffart | |
676 | 6b5a528c | Florent Chuffart | |
677 | 6b5a528c | Florent Chuffart | |
678 | 6b5a528c | Florent Chuffart | |
679 | 6b5a528c | Florent Chuffart | |
680 | 6b5a528c | Florent Chuffart | |
681 | 6b5a528c | Florent Chuffart | |
682 | 6b5a528c | Florent Chuffart | |
683 | 6b5a528c | Florent Chuffart | fetch_mnase_replicates = function(# Prefetch data |
684 | 6b5a528c | Florent Chuffart | ### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates. |
685 | 6b5a528c | Florent Chuffart | strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids. |
686 | 6b5a528c | Florent Chuffart | roi, ##<< Region of interest. |
687 | 6b5a528c | Florent Chuffart | all_samples, ##<< Global list of samples. |
688 | 1d833b97 | Florent Chuffart | config=NULL, ##<< GLOBAL config variable |
689 | 6b5a528c | Florent Chuffart | only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking. |
690 | 6b5a528c | Florent Chuffart | get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence. |
691 | 6b5a528c | Florent Chuffart | get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files. |
692 | 6b5a528c | Florent Chuffart | ) { |
693 | 6b5a528c | Florent Chuffart | samples=list() |
694 | 6b5a528c | Florent Chuffart | samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id) |
695 | 6b5a528c | Florent Chuffart | for (i in samples_ids) { |
696 | 6b5a528c | Florent Chuffart | sample = as.list(all_samples[all_samples$id==i,]) |
697 | a0b91fee | Florent Chuffart | sample$orig_roi = roi |
698 | 4016229d | Florent Chuffart | sample$roi = translate_cur(roi, sample$strain, config = config) |
699 | 6b5a528c | Florent Chuffart | if (get_genome) { |
700 | 6b5a528c | Florent Chuffart | # Get Genome |
701 | 5bfac5a3 | Florent Chuffart | sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end] |
702 | 6b5a528c | Florent Chuffart | } |
703 | 6b5a528c | Florent Chuffart | # Get inputs |
704 | 55c1cdff | Florent Chuffart | sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE) |
705 | 5bfac5a3 | Florent Chuffart | sample$total_reads = sum(sample$inputs[,4]) |
706 | 6b5a528c | Florent Chuffart | if (!only_fetch) { |
707 | 6b5a528c | Florent Chuffart | sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300) |
708 | 6b5a528c | Florent Chuffart | } |
709 | 6b5a528c | Florent Chuffart | # Get TF outputs for Mnase_Seq samples |
710 | 5bfac5a3 | Florent Chuffart | if (sample$marker == "Mnase_Seq" & get_ouputs) { |
711 | a52f85d7 | Florent Chuffart | sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t") |
712 | 6b5a528c | Florent Chuffart | if (!only_fetch) { |
713 | 6b5a528c | Florent Chuffart | sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300) |
714 | 6b5a528c | Florent Chuffart | } |
715 | 6b5a528c | Florent Chuffart | } |
716 | 5bfac5a3 | Florent Chuffart | samples[[length(samples) + 1]] = sample |
717 | 6b5a528c | Florent Chuffart | } |
718 | 6b5a528c | Florent Chuffart | return(samples) |
719 | 6b5a528c | Florent Chuffart | } |
720 | 6b5a528c | Florent Chuffart | |
721 | 6b5a528c | Florent Chuffart | substract_region = function(# Substract to a list of regions an other list of regions that intersect it. |
722 | 6b5a528c | Florent Chuffart | ### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two. |
723 | 55c1cdff | Florent Chuffart | region1, ##<< Original regions. |
724 | 6b5a528c | Florent Chuffart | region2 ##<< Regions to substract. |
725 | 6b5a528c | Florent Chuffart | ) { |
726 | 6b5a528c | Florent Chuffart | rec_substract_region = function(region1, region2) { |
727 | 6b5a528c | Florent Chuffart | non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) { |
728 | 6b5a528c | Florent Chuffart | cur_fuzzy = region1[i,] |
729 | 6b5a528c | Florent Chuffart | inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,] |
730 | 6b5a528c | Florent Chuffart | if (length(inter_wp[,1]) > 0) { |
731 | 6b5a528c | Florent Chuffart | ret = c() |
732 | 6b5a528c | Florent Chuffart | for (j in 1:length(inter_wp[,1])) { |
733 | 6b5a528c | Florent Chuffart | cur_wp = inter_wp[j,] |
734 | 6b5a528c | Florent Chuffart | if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) { |
735 | 6b5a528c | Florent Chuffart | # remove cur_fuzzy |
736 | 6b5a528c | Florent Chuffart | ret = c() |
737 | 6b5a528c | Florent Chuffart | break |
738 | 6b5a528c | Florent Chuffart | } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) { |
739 | 55c1cdff | Florent Chuffart | # crop fuzzy |
740 | 55c1cdff | Florent Chuffart | cur_fuzzy$lower_bound = cur_wp$upper_bound + 1 |
741 | 6b5a528c | Florent Chuffart | ret = cur_fuzzy |
742 | 6b5a528c | Florent Chuffart | } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) { |
743 | 55c1cdff | Florent Chuffart | # crop fuzzy |
744 | 55c1cdff | Florent Chuffart | cur_fuzzy$upper_bound = cur_wp$lower_bound - 1 |
745 | 6b5a528c | Florent Chuffart | ret = cur_fuzzy |
746 | 6b5a528c | Florent Chuffart | } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) { |
747 | 6b5a528c | Florent Chuffart | # split fuzzy |
748 | 6b5a528c | Florent Chuffart | tmp_ret_fuzzy_1 = cur_fuzzy |
749 | 55c1cdff | Florent Chuffart | tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1 |
750 | 6b5a528c | Florent Chuffart | tmp_ret_fuzzy_2 = cur_fuzzy |
751 | 55c1cdff | Florent Chuffart | tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1 |
752 | 6b5a528c | Florent Chuffart | ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp) |
753 | 6b5a528c | Florent Chuffart | # print(ret) |
754 | 6b5a528c | Florent Chuffart | # ret = cur_fuzzy |
755 | 6b5a528c | Florent Chuffart | break |
756 | 6b5a528c | Florent Chuffart | } else { |
757 | 6b5a528c | Florent Chuffart | stop("WARNING NO ADAPTED CASE!") |
758 | 55c1cdff | Florent Chuffart | } |
759 | 6b5a528c | Florent Chuffart | } |
760 | 6b5a528c | Florent Chuffart | return(ret) |
761 | 6b5a528c | Florent Chuffart | } else { |
762 | 6b5a528c | Florent Chuffart | return(cur_fuzzy) |
763 | 6b5a528c | Florent Chuffart | } |
764 | 6b5a528c | Florent Chuffart | }) |
765 | 6b5a528c | Florent Chuffart | } |
766 | 21b8928f | Florent Chuffart | non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4]) |
767 | 6b5a528c | Florent Chuffart | if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)} |
768 | b20637ed | Florent Chuffart | tmp_ulist = unlist(non_inter_fuzzy) |
769 | b20637ed | Florent Chuffart | tmp_names = names(tmp_ulist)[1:4] |
770 | 55c1cdff | Florent Chuffart | non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE) |
771 | b20637ed | Florent Chuffart | names(non_inter_fuzzy) = tmp_names |
772 | 55c1cdff | Florent Chuffart | non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr) |
773 | 55c1cdff | Florent Chuffart | non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr) |
774 | b20637ed | Florent Chuffart | non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound) |
775 | b20637ed | Florent Chuffart | non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound) |
776 | 55c1cdff | Florent Chuffart | non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),] |
777 | b20637ed | Florent Chuffart | return(non_inter_fuzzy) |
778 | 6b5a528c | Florent Chuffart | } |
779 | 6b5a528c | Florent Chuffart | |
780 | e5603c3f | Florent Chuffart | union_regions = function(# Aggregate regions that intersect themselves. |
781 | 55c1cdff | Florent Chuffart | ### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable. |
782 | 6b5a528c | Florent Chuffart | regions ##<< The Regions to be aggregated |
783 | 6b5a528c | Florent Chuffart | ) { |
784 | 5bfac5a3 | Florent Chuffart | if (is.null(regions)) {return(regions)} |
785 | 0303dbe8 | Florent Chuffart | if (nrow(regions) == 0) {return(regions)} |
786 | 6b5a528c | Florent Chuffart | old_length = length(regions[,1]) |
787 | 6b5a528c | Florent Chuffart | new_length = 0 |
788 | 55c1cdff | Florent Chuffart | |
789 | 6b5a528c | Florent Chuffart | while (old_length != new_length) { |
790 | 6b5a528c | Florent Chuffart | regions = regions[order(regions$lower_bound), ] |
791 | 55c1cdff | Florent Chuffart | regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE) |
792 | 6b5a528c | Florent Chuffart | vec_end_1 = which(regions$stop) |
793 | 6b5a528c | Florent Chuffart | if (length(vec_end_1) == 0) { |
794 | 55c1cdff | Florent Chuffart | vec_end_1 = c(length(regions$stop)) |
795 | 55c1cdff | Florent Chuffart | } |
796 | 6b5a528c | Florent Chuffart | if (vec_end_1[length(vec_end_1)] != length(regions$stop)) { |
797 | 6b5a528c | Florent Chuffart | vec_end_1 = c(vec_end_1, length(regions$stop)) |
798 | 6b5a528c | Florent Chuffart | } |
799 | 6b5a528c | Florent Chuffart | vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1) |
800 | 6b5a528c | Florent Chuffart | union = apply(t(1:length(vec_beg_1)), 2, function(i) { |
801 | 6b5a528c | Florent Chuffart | chr = regions$chr[vec_beg_1[i]] |
802 | 6b5a528c | Florent Chuffart | lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]]) |
803 | 6b5a528c | Florent Chuffart | upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]]) |
804 | 4016229d | Florent Chuffart | cur_index = regions$cur_index[vec_beg_1[i]] |
805 | 4016229d | Florent Chuffart | data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, cur_index=cur_index)) |
806 | 6b5a528c | Florent Chuffart | }) |
807 | 59ad95ca | Florent Chuffart | union = collapse_regions(union) |
808 | 6b5a528c | Florent Chuffart | old_length = length(regions[,1]) |
809 | 6b5a528c | Florent Chuffart | new_length = length(union[,1]) |
810 | 6b5a528c | Florent Chuffart | regions = union |
811 | 6b5a528c | Florent Chuffart | } |
812 | 6b5a528c | Florent Chuffart | return(union) |
813 | 6b5a528c | Florent Chuffart | } |
814 | 6b5a528c | Florent Chuffart | |
815 | 0303dbe8 | Florent Chuffart | # remove_aligned_wp = function(# Remove wp nucs from common nucs list. |
816 | 0303dbe8 | Florent Chuffart | # ### It is based on common wp nucs index on nucs and region. |
817 | 0303dbe8 | Florent Chuffart | # strain_maps, ##<< Nuc maps. |
818 | 4016229d | Florent Chuffart | # cur_index, ##<< The region of interest index. |
819 | 0303dbe8 | Florent Chuffart | # tmp_common_nucs, ##<< the list of wp nucs. |
820 | 0303dbe8 | Florent Chuffart | # strain##<< The strain to consider. |
821 | 0303dbe8 | Florent Chuffart | # ){ |
822 | 0303dbe8 | Florent Chuffart | # fuzzy_nucs = strain_maps[[strain]] |
823 | 4016229d | Florent Chuffart | # fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$cur_index == cur_index,] |
824 | 0303dbe8 | Florent Chuffart | # fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),] |
825 | 0303dbe8 | Florent Chuffart | # if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)} |
826 | 0303dbe8 | Florent Chuffart | # if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"} |
827 | 0303dbe8 | Florent Chuffart | # anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]] |
828 | 0303dbe8 | Florent Chuffart | # fuzzy_nucs = fuzzy_nucs[-anti_index_1,] |
829 | 0303dbe8 | Florent Chuffart | # return(fuzzy_nucs) |
830 | 0303dbe8 | Florent Chuffart | # } |
831 | 6b5a528c | Florent Chuffart | |
832 | 6b5a528c | Florent Chuffart | translate_regions = function(# Translate a list of regions from a strain ref to another. |
833 | e5603c3f | Florent Chuffart | ### This function is an elaborated call to translate_cur. |
834 | 55c1cdff | Florent Chuffart | regions, ##<< Regions to be translated. |
835 | 6b5a528c | Florent Chuffart | combi, ##<< Combination of strains. |
836 | 4016229d | Florent Chuffart | cur_index, ##<< The region of interest index. |
837 | 1d833b97 | Florent Chuffart | config=NULL, ##<< GLOBAL config variable |
838 | 6b5a528c | Florent Chuffart | roi ##<< The region of interest. |
839 | 6b5a528c | Florent Chuffart | ) { |
840 | 6b5a528c | Florent Chuffart | tr_regions = apply(t(1:length(regions[,1])), 2, function(i) { |
841 | 6b5a528c | Florent Chuffart | tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2]) |
842 | 4016229d | Florent Chuffart | big_cur = roi |
843 | 4016229d | Florent Chuffart | trs_tmp_regions_ref2 = translate_cur(tmp_regions_ref2, combi[1], config = config, big_cur = big_cur) |
844 | 5bfac5a3 | Florent Chuffart | if (is.null(trs_tmp_regions_ref2)) { |
845 | 5bfac5a3 | Florent Chuffart | return(NULL) |
846 | 5bfac5a3 | Florent Chuffart | } else { |
847 | 4016229d | Florent Chuffart | return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), cur_index=cur_index))) |
848 | 5bfac5a3 | Florent Chuffart | } |
849 | 5bfac5a3 | Florent Chuffart | }) |
850 | 5bfac5a3 | Florent Chuffart | |
851 | 59ad95ca | Florent Chuffart | return(collapse_regions(tr_regions)) |
852 | 59ad95ca | Florent Chuffart | } |
853 | 59ad95ca | Florent Chuffart | |
854 | 59ad95ca | Florent Chuffart | collapse_regions = function(# reformat an "apply manipulated" list of regions |
855 | 59ad95ca | Florent Chuffart | ### Utils to reformat an "apply manipulated" list of regions |
856 | 59ad95ca | Florent Chuffart | regions ##< a list of regions |
857 | 59ad95ca | Florent Chuffart | ) { |
858 | 5bfac5a3 | Florent Chuffart | if (is.null(regions)) { |
859 | 5bfac5a3 | Florent Chuffart | return(NULL) |
860 | 5bfac5a3 | Florent Chuffart | } else { |
861 | 5bfac5a3 | Florent Chuffart | regions = do.call(rbind, regions) |
862 | 5bfac5a3 | Florent Chuffart | regions$chr = as.character(regions$chr) |
863 | 5bfac5a3 | Florent Chuffart | regions$chr = as.numeric(regions$chr) |
864 | 5bfac5a3 | Florent Chuffart | regions$lower_bound = as.numeric(regions$lower_bound) |
865 | 5bfac5a3 | Florent Chuffart | regions$upper_bound = as.numeric(regions$upper_bound) |
866 | 5bfac5a3 | Florent Chuffart | regions = regions[order(regions$lower_bound),] |
867 | 5bfac5a3 | Florent Chuffart | return(regions) |
868 | 5bfac5a3 | Florent Chuffart | } |
869 | 6b5a528c | Florent Chuffart | } |
870 | 6b5a528c | Florent Chuffart | |
871 | 6b5a528c | Florent Chuffart | |
872 | 6b5a528c | Florent Chuffart | crop_fuzzy = function(# Crop bound of regions according to region of interest bound |
873 | 4016229d | Florent Chuffart | ### The fucntion is no more necessary since we remove "big_cur" bug in translate_cur function. |
874 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs, ##<< the regiuons to be croped. |
875 | 6b5a528c | Florent Chuffart | roi, ##<< The region of interest. |
876 | 1d833b97 | Florent Chuffart | strain, ##<< The strain to consider. |
877 | 1d833b97 | Florent Chuffart | config=NULL ##<< GLOBAL config variable |
878 | 6b5a528c | Florent Chuffart | ) { |
879 | 4016229d | Florent Chuffart | tr_roi = translate_cur(roi, strain, config = config) |
880 | 6b5a528c | Florent Chuffart | tr_roi_begin = min(tr_roi$begin, tr_roi$end) |
881 | 6b5a528c | Florent Chuffart | tr_roi_end = max(tr_roi$begin, tr_roi$end) |
882 | 6b5a528c | Florent Chuffart | if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) { |
883 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin |
884 | 6b5a528c | Florent Chuffart | } |
885 | 6b5a528c | Florent Chuffart | if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) { |
886 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin |
887 | 6b5a528c | Florent Chuffart | } |
888 | 6b5a528c | Florent Chuffart | if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) { |
889 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end |
890 | 6b5a528c | Florent Chuffart | } |
891 | 6b5a528c | Florent Chuffart | if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) { |
892 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end |
893 | 6b5a528c | Florent Chuffart | } |
894 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,] |
895 | 6b5a528c | Florent Chuffart | return(tmp_fuzzy_nucs) |
896 | 6b5a528c | Florent Chuffart | } |
897 | 6b5a528c | Florent Chuffart | |
898 | 55c1cdff | Florent Chuffart | get_all_reads = function(# Retrieve Reads |
899 | 6b5a528c | Florent Chuffart | ### Retrieve reads for a given marker, combi, form. |
900 | 6b5a528c | Florent Chuffart | marker, ##<< The marker to considere. |
901 | 6b5a528c | Florent Chuffart | combi, ##<< The starin combination to considere. |
902 | cc54c799 | Florent Chuffart | form="wp", ##<< The nuc form to considere. |
903 | cc54c799 | Florent Chuffart | config=NULL ##<< GLOBAL config variable |
904 | 6b5a528c | Florent Chuffart | ) { |
905 | 6b5a528c | Florent Chuffart | all_reads = NULL |
906 | 6b5a528c | Florent Chuffart | for (manip in c("Mnase_Seq", marker)) { |
907 | 4016229d | Florent Chuffart | if (form == "unr") { |
908 | 4016229d | Florent Chuffart | out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="") |
909 | 5bfac5a3 | Florent Chuffart | tmp_res = read.table(file=out_filename, header=TRUE) |
910 | 6b5a528c | Florent Chuffart | tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,] |
911 | 6b5a528c | Florent Chuffart | tmp_res$form = form |
912 | 6b5a528c | Florent Chuffart | } else if (form == "wp") { |
913 | cc54c799 | Florent Chuffart | out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="") |
914 | 5bfac5a3 | Florent Chuffart | tmp_res = read.table(file=out_filename, header=TRUE) |
915 | 6b5a528c | Florent Chuffart | tmp_res$form = form |
916 | 4016229d | Florent Chuffart | } else if (form == "wpunr") { |
917 | cc54c799 | Florent Chuffart | out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="") |
918 | 5bfac5a3 | Florent Chuffart | tmp_res = read.table(file=out_filename, header=TRUE) |
919 | 6b5a528c | Florent Chuffart | tmp_res$form = "wp" |
920 | 4016229d | Florent Chuffart | out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="") |
921 | 5bfac5a3 | Florent Chuffart | tmp_res2 = read.table(file=out_filename, header=TRUE) |
922 | 6b5a528c | Florent Chuffart | tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,] |
923 | 4016229d | Florent Chuffart | tmp_res2$form = "unr" |
924 | 6b5a528c | Florent Chuffart | tmp_res = rbind(tmp_res, tmp_res2) |
925 | 6b5a528c | Florent Chuffart | } |
926 | 6b5a528c | Florent Chuffart | if (is.null(all_reads)) { |
927 | 6b5a528c | Florent Chuffart | all_reads = tmp_res[,c(1:9,length(tmp_res))] |
928 | 55c1cdff | Florent Chuffart | } |
929 | 6b5a528c | Florent Chuffart | tmp_res = tmp_res[,-c(1:9,length(tmp_res))] |
930 | 6b5a528c | Florent Chuffart | all_reads = cbind(all_reads, tmp_res) |
931 | 6b5a528c | Florent Chuffart | } |
932 | 5bfac5a3 | Florent Chuffart | return(all_reads) |
933 | 6b5a528c | Florent Chuffart | } |
934 | 6b5a528c | Florent Chuffart | |
935 | e5603c3f | Florent Chuffart | get_design = function(# Build the design for DESeq |
936 | 6b5a528c | Florent Chuffart | ### This function build the design according sample properties. |
937 | 6b5a528c | Florent Chuffart | marker, ##<< The marker to considere. |
938 | 6b5a528c | Florent Chuffart | combi, ##<< The starin combination to considere. |
939 | 6b5a528c | Florent Chuffart | all_samples ##<< Global list of samples. |
940 | 6b5a528c | Florent Chuffart | ) { |
941 | 6b5a528c | Florent Chuffart | off1 = 0 |
942 | 6b5a528c | Florent Chuffart | off2 = 0 |
943 | 6b5a528c | Florent Chuffart | manips = c("Mnase_Seq", marker) |
944 | 6b5a528c | Florent Chuffart | design_rownames = c() |
945 | 6b5a528c | Florent Chuffart | design_manip = c() |
946 | 6b5a528c | Florent Chuffart | design_strain = c() |
947 | 5bfac5a3 | Florent Chuffart | off2index = function(off) { |
948 | 55c1cdff | Florent Chuffart | switch(toString(off), |
949 | 6b5a528c | Florent Chuffart | "1"=c(0,1,1), |
950 | 6b5a528c | Florent Chuffart | "2"=c(1,0,1), |
951 | 55c1cdff | Florent Chuffart | "3"=c(1,1,0), |
952 | 6b5a528c | Florent Chuffart | c(1,1,1) |
953 | 5bfac5a3 | Florent Chuffart | ) |
954 | 6b5a528c | Florent Chuffart | } |
955 | 6b5a528c | Florent Chuffart | for (manip in manips) { |
956 | 6b5a528c | Florent Chuffart | tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) & all_samples$marker == manip), ] |
957 | 6b5a528c | Florent Chuffart | tmp_samples = tmp_samples[order(tmp_samples$strain), ] |
958 | 6b5a528c | Florent Chuffart | if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) { |
959 | 6b5a528c | Florent Chuffart | tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,] |
960 | 6b5a528c | Florent Chuffart | } else { |
961 | 6b5a528c | Florent Chuffart | if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) { |
962 | 6b5a528c | Florent Chuffart | tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,] |
963 | 6b5a528c | Florent Chuffart | } |
964 | 6b5a528c | Florent Chuffart | } |
965 | 5bfac5a3 | Florent Chuffart | design_manip = c(design_manip, rep(manip, length(tmp_samples$id))) |
966 | 5bfac5a3 | Florent Chuffart | for (strain in combi) { |
967 | 6b5a528c | Florent Chuffart | cols = apply(t(tmp_samples[ (tmp_samples$strain == strain & tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")}) |
968 | 6b5a528c | Florent Chuffart | design_strain = c(design_strain, rep(strain, length(cols))) |
969 | 6b5a528c | Florent Chuffart | design_rownames = c(design_rownames, cols) |
970 | 6b5a528c | Florent Chuffart | } |
971 | 6b5a528c | Florent Chuffart | } |
972 | 6b5a528c | Florent Chuffart | snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain) |
973 | 5bfac5a3 | Florent Chuffart | return(snep_design) |
974 | 6b5a528c | Florent Chuffart | } |
975 | 6b5a528c | Florent Chuffart | |
976 | 6b5a528c | Florent Chuffart | plot_dist_samples = function(# Plot the distribution of reads. |
977 | e5603c3f | Florent Chuffart | ### This fuxntion use the DESeq nomalization feature to compare qualitatively the distribution. |
978 | 6b5a528c | Florent Chuffart | strain, ##<< The strain to considere. |
979 | 6b5a528c | Florent Chuffart | marker, ##<< The marker to considere. |
980 | 6b5a528c | Florent Chuffart | res, ##<< Data |
981 | 6b5a528c | Florent Chuffart | all_samples, ##<< Global list of samples. |
982 | 6b5a528c | Florent Chuffart | NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot. |
983 | 5bfac5a3 | Florent Chuffart | ) { |
984 | 6b5a528c | Florent Chuffart | cols = apply(t(all_samples[ (all_samples$strain == strain & all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")}) |
985 | 6b5a528c | Florent Chuffart | snepCountTable = res[,cols] |
986 | 6b5a528c | Florent Chuffart | snepDesign = data.frame( |
987 | 6b5a528c | Florent Chuffart | row.names = cols, |
988 | 6b5a528c | Florent Chuffart | manip = rep(marker, length(cols)), |
989 | 6b5a528c | Florent Chuffart | strain = rep(strain, length(cols)) |
990 | 6b5a528c | Florent Chuffart | ) |
991 | 6b5a528c | Florent Chuffart | cdsFull = newCountDataSet(snepCountTable, snepDesign) |
992 | 6b5a528c | Florent Chuffart | sizeFactors = estimateSizeFactors(cdsFull) |
993 | 6b5a528c | Florent Chuffart | # print(sizeFactors[[1]]) |
994 | 6b5a528c | Florent Chuffart | sample_ids = all_samples[ (all_samples$strain == strain & all_samples$marker == marker), ]$id |
995 | 6b5a528c | Florent Chuffart | if (NEWPLOT) { |
996 | 5bfac5a3 | Florent Chuffart | plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker)) |
997 | 6b5a528c | Florent Chuffart | NEWPLOT = FALSE |
998 | 6b5a528c | Florent Chuffart | } |
999 | 6b5a528c | Florent Chuffart | for (it in 1:length(sample_ids)) { |
1000 | 6b5a528c | Florent Chuffart | sample_id = sample_ids[it] |
1001 | 6b5a528c | Florent Chuffart | lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it) |
1002 | 6b5a528c | Florent Chuffart | } |
1003 | 7164d3ac | Florent Chuffart | legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols) |
1004 | 6b5a528c | Florent Chuffart | } |
1005 | 6b5a528c | Florent Chuffart | |
1006 | e5603c3f | Florent Chuffart | analyse_design = function(# Launch DESeq methods. |
1007 | e5603c3f | Florent Chuffart | ### This function is based on DESeq example. It normalizes data, fit data to GLM model with and without interaction term and compares the two models. |
1008 | e5603c3f | Florent Chuffart | snep_design, ##<< The design to consider. |
1009 | e5603c3f | Florent Chuffart | reads ##<< The data to consider. |
1010 | 6b5a528c | Florent Chuffart | ) { |
1011 | 6b5a528c | Florent Chuffart | snep_count_table = reads[, rownames(snep_design)] |
1012 | 6b5a528c | Florent Chuffart | cdsFull = newCountDataSet(snep_count_table, snep_design) |
1013 | 6b5a528c | Florent Chuffart | cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum") |
1014 | 6b5a528c | Florent Chuffart | fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain) |
1015 | 6b5a528c | Florent Chuffart | |
1016 | 6b5a528c | Florent Chuffart | cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum") |
1017 | 6b5a528c | Florent Chuffart | fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain) |
1018 | 6b5a528c | Florent Chuffart | |
1019 | 6b5a528c | Florent Chuffart | pvalsGLM = nbinomGLMTest( fit1, fit0 ) |
1020 | 5bfac5a3 | Florent Chuffart | return(list(fit1, fit0, snep_design, pvalsGLM)) |
1021 | 6b5a528c | Florent Chuffart | } |
1022 | 6b5a528c | Florent Chuffart | |
1023 | 6b5a528c | Florent Chuffart | |
1024 | 6b5a528c | Florent Chuffart | |
1025 | 6b5a528c | Florent Chuffart | |
1026 | 6b5a528c | Florent Chuffart | |
1027 | 6b5a528c | Florent Chuffart | |
1028 | 6b5a528c | Florent Chuffart | |
1029 | 6b5a528c | Florent Chuffart | |
1030 | 6b5a528c | Florent Chuffart | get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form. |
1031 | 55c1cdff | Florent Chuffart | ### This function uses |
1032 | 6b5a528c | Florent Chuffart | marker, ##<< The marker involved. |
1033 | 6b5a528c | Florent Chuffart | combi, ##<< The strain combination involved. |
1034 | 55c1cdff | Florent Chuffart | form, ##<< the nuc form involved. |
1035 | cc54c799 | Florent Chuffart | all_samples, ##<< Global list of samples. |
1036 | e5603c3f | Florent Chuffart | FDR = 0.0001, ## the specific False Discover Rate |
1037 | cc54c799 | Florent Chuffart | config=NULL ##<< GLOBAL config variable |
1038 | 6b5a528c | Florent Chuffart | ) { |
1039 | 6b5a528c | Florent Chuffart | # PRETREAT |
1040 | 4016229d | Florent Chuffart | snep_design = get_design(marker, combi, all_samples) |
1041 | cc54c799 | Florent Chuffart | reads = get_all_reads(marker, combi, form, config=config) |
1042 | 6b5a528c | Florent Chuffart | # RUN ANALYSE |
1043 | 4016229d | Florent Chuffart | tmp_analyse = analyse_design(snep_design, reads) |
1044 | 6b5a528c | Florent Chuffart | # RESULTS |
1045 | 6b5a528c | Florent Chuffart | fit1 = tmp_analyse[[1]] |
1046 | 6b5a528c | Florent Chuffart | fit0 = tmp_analyse[[2]] |
1047 | 6b5a528c | Florent Chuffart | k = names(fit1) |
1048 | 6b5a528c | Florent Chuffart | reads[[k[2]]] = signif(fit1[[k[2]]], 5) |
1049 | 6b5a528c | Florent Chuffart | reads[[k[3]]] = signif(fit1[[k[3]]], 5) |
1050 | 6b5a528c | Florent Chuffart | reads[[k[4]]] = signif(fit1[[k[4]]], 5) |
1051 | 6b5a528c | Florent Chuffart | reads$pvalsGLM = signif(tmp_analyse[[4]], 5) |
1052 | 6b5a528c | Florent Chuffart | snep_design = tmp_analyse[[3]] |
1053 | 4016229d | Florent Chuffart | # print(snep_design) |
1054 | e5603c3f | Florent Chuffart | thres = FDR(reads$pvalsGLM, FDR) |
1055 | 6b5a528c | Florent Chuffart | reads$snep_index = reads$pvalsGLM < thres |
1056 | 5badc2fd | Florent Chuffart | print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", FDR*100,"% of FDR.", sep = "")) |
1057 | 6b5a528c | Florent Chuffart | return(reads) |
1058 | 5bfac5a3 | Florent Chuffart | }, ex=function(){ |
1059 | 6b5a528c | Florent Chuffart | marker = "H3K4me1" |
1060 | 55c1cdff | Florent Chuffart | combi = c("BY", "YJM") |
1061 | 4016229d | Florent Chuffart | form = "wpunr" # "wp" | "unr" | "wpunr" |
1062 | 6b5a528c | Florent Chuffart | # foo = get_sneps(marker, combi, form) |
1063 | 6b5a528c | Florent Chuffart | # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp") |
1064 | 6b5a528c | Florent Chuffart | }) |
1065 | 6b5a528c | Florent Chuffart | |
1066 | 6b5a528c | Florent Chuffart | |
1067 | 6b5a528c | Florent Chuffart | |
1068 | 6b5a528c | Florent Chuffart | |
1069 | 6b5a528c | Florent Chuffart | |
1070 | 6b5a528c | Florent Chuffart | |
1071 | 6b5a528c | Florent Chuffart | |
1072 | 6b5a528c | Florent Chuffart | |
1073 | 6b5a528c | Florent Chuffart | |
1074 | 6b5a528c | Florent Chuffart | |
1075 | 6b5a528c | Florent Chuffart | |
1076 | 6b5a528c | Florent Chuffart | |
1077 | 6b5a528c | Florent Chuffart | |
1078 | 6b5a528c | Florent Chuffart | |
1079 | 6b5a528c | Florent Chuffart | |
1080 | 6b5a528c | Florent Chuffart | |
1081 | 6b5a528c | Florent Chuffart | |
1082 | 6b5a528c | Florent Chuffart | |
1083 | 6b5a528c | Florent Chuffart | |
1084 | 6b5a528c | Florent Chuffart | |
1085 | 6b5a528c | Florent Chuffart | |
1086 | 6b5a528c | Florent Chuffart | |
1087 | 6b5a528c | Florent Chuffart | |
1088 | 6b5a528c | Florent Chuffart | |
1089 | 1d833b97 | Florent Chuffart | ROM2ARAB = function(# Roman to Arabic pair list. |
1090 | e5603c3f | Florent Chuffart | ### Utility to convert Roman numbers into Arabic numbers |
1091 | 1d833b97 | Florent Chuffart | ){list( |
1092 | 1d833b97 | Florent Chuffart | "I" = 1, |
1093 | 1d833b97 | Florent Chuffart | "II" = 2, |
1094 | 1d833b97 | Florent Chuffart | "III" = 3, |
1095 | 1d833b97 | Florent Chuffart | "IV" = 4, |
1096 | 1d833b97 | Florent Chuffart | "V" = 5, |
1097 | 1d833b97 | Florent Chuffart | "VI" = 6, |
1098 | 1d833b97 | Florent Chuffart | "VII" = 7, |
1099 | 1d833b97 | Florent Chuffart | "VIII" = 8, |
1100 | 1d833b97 | Florent Chuffart | "IX" = 9, |
1101 | 1d833b97 | Florent Chuffart | "X" = 10, |
1102 | 1d833b97 | Florent Chuffart | "XI" = 11, |
1103 | 1d833b97 | Florent Chuffart | "XII" = 12, |
1104 | 1d833b97 | Florent Chuffart | "XIII" = 13, |
1105 | 1d833b97 | Florent Chuffart | "XIV" = 14, |
1106 | 1d833b97 | Florent Chuffart | "XV" = 15, |
1107 | 1d833b97 | Florent Chuffart | "XVI" = 16, |
1108 | 1d833b97 | Florent Chuffart | "XVII" = 17, |
1109 | 1d833b97 | Florent Chuffart | "XVIII" = 18, |
1110 | 1d833b97 | Florent Chuffart | "XIX" = 19, |
1111 | 1d833b97 | Florent Chuffart | "XX" = 20 |
1112 | 1d833b97 | Florent Chuffart | )} |
1113 | 1d833b97 | Florent Chuffart | |
1114 | 1d833b97 | Florent Chuffart | switch_pairlist = structure(function(# Switch a pairlist |
1115 | 1d833b97 | Florent Chuffart | ### Take a pairlist key:value and return the switched pairlist value:key. |
1116 | 55c1cdff | Florent Chuffart | l ##<< The pairlist to switch. |
1117 | 1d833b97 | Florent Chuffart | ) { |
1118 | 1d833b97 | Florent Chuffart | ret = list() |
1119 | 1d833b97 | Florent Chuffart | for (name in names(l)) { |
1120 | 1d833b97 | Florent Chuffart | ret[[as.character(l[[name]])]] = name |
1121 | 1d833b97 | Florent Chuffart | } |
1122 | 1d833b97 | Florent Chuffart | ret |
1123 | 1d833b97 | Florent Chuffart | ### The switched pairlist. |
1124 | 1d833b97 | Florent Chuffart | }, ex=function(){ |
1125 | 1d833b97 | Florent Chuffart | l = list(key1 = "value1", key2 = "value2") |
1126 | 1d833b97 | Florent Chuffart | print(switch_pairlist(l)) |
1127 | 1d833b97 | Florent Chuffart | }) |
1128 | 1d833b97 | Florent Chuffart | |
1129 | 1d833b97 | Florent Chuffart | ARAB2ROM = function(# Arabic to Roman pair list. |
1130 | e5603c3f | Florent Chuffart | ### Utility to convert Arabic numbers to Roman numbers |
1131 | 1d833b97 | Florent Chuffart | ){switch_pairlist(ROM2ARAB())} |
1132 | 1d833b97 | Florent Chuffart | |
1133 | 1d833b97 | Florent Chuffart | |
1134 | 1d833b97 | Florent Chuffart | |
1135 | 1d833b97 | Florent Chuffart | |
1136 | 5bfac5a3 | Florent Chuffart | c2c_extraction = function(# Extract a sub part of the corresponding c2c file |
1137 | e5603c3f | Florent Chuffart | ### This fonction allows to access to a specific part of the c2c file. |
1138 | 5bfac5a3 | Florent Chuffart | strain1, ##<< the key strain |
1139 | 5bfac5a3 | Florent Chuffart | strain2, ##<< the target strain |
1140 | e5603c3f | Florent Chuffart | chr=NULL, ##<< if defined, the c2c will be filtered according to the chromosome value |
1141 | e5603c3f | Florent Chuffart | lower_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome upper than lower_bound |
1142 | e5603c3f | Florent Chuffart | upper_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome lower than upper_bound |
1143 | 5bfac5a3 | Florent Chuffart | config=NULL##<< GLOBAL config variable |
1144 | 1d833b97 | Florent Chuffart | ) { |
1145 | 1d833b97 | Florent Chuffart | reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM" |
1146 | 1d833b97 | Florent Chuffart | # Launch c2c file |
1147 | 1d833b97 | Florent Chuffart | if (reverse) { |
1148 | 5bfac5a3 | Florent Chuffart | c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]] |
1149 | 1d833b97 | Florent Chuffart | } else { |
1150 | 5bfac5a3 | Florent Chuffart | c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]] |
1151 | 1d833b97 | Florent Chuffart | } |
1152 | 5bfac5a3 | Florent Chuffart | c2c = get_content(c2c_filename, "table", stringsAsFactors=FALSE) |
1153 | 5bfac5a3 | Florent Chuffart | # Filtering unagapped |
1154 | 1d833b97 | Florent Chuffart | c2c = c2c[c2c$V6=="-",] |
1155 | 1d833b97 | Florent Chuffart | # Reverse |
1156 | 1d833b97 | Florent Chuffart | if (reverse) { |
1157 | 1d833b97 | Florent Chuffart | tmp_col = c2c$V1 |
1158 | 1d833b97 | Florent Chuffart | c2c$V1 = c2c$V7 |
1159 | 1d833b97 | Florent Chuffart | c2c$V7 = tmp_col |
1160 | 1d833b97 | Florent Chuffart | tmp_col = c2c$V2 |
1161 | 1d833b97 | Florent Chuffart | c2c$V2 = c2c$V9 |
1162 | 1d833b97 | Florent Chuffart | c2c$V9 = tmp_col |
1163 | 1d833b97 | Florent Chuffart | tmp_col = c2c$V3 |
1164 | 1d833b97 | Florent Chuffart | c2c$V3 = c2c$V10 |
1165 | 1d833b97 | Florent Chuffart | c2c$V10 = tmp_col |
1166 | 1d833b97 | Florent Chuffart | } |
1167 | 5bfac5a3 | Florent Chuffart | if (!is.null(chr)) { |
1168 | 5bfac5a3 | Florent Chuffart | if (strain1 == "BY") { |
1169 | 5bfac5a3 | Florent Chuffart | chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="") |
1170 | 5bfac5a3 | Florent Chuffart | } else if (strain1 == "RM") { |
1171 | 5bfac5a3 | Florent Chuffart | chro_1 = paste("supercontig_1.",chr,sep="") |
1172 | 5bfac5a3 | Florent Chuffart | } else if (strain1 == "YJM") { |
1173 | 5bfac5a3 | Florent Chuffart | chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]] |
1174 | 5bfac5a3 | Florent Chuffart | } |
1175 | 5bfac5a3 | Florent Chuffart | c2c = c2c[c2c$V1 == chro_1,] |
1176 | 5bfac5a3 | Florent Chuffart | if (!is.null(lower_bound)) { |
1177 | 5bfac5a3 | Florent Chuffart | if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound} |
1178 | 5bfac5a3 | Florent Chuffart | if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound} |
1179 | 5bfac5a3 | Florent Chuffart | c2c = c2c[c2c$V2 - c2c$V3 != 0,] |
1180 | 5bfac5a3 | Florent Chuffart | } |
1181 | 5bfac5a3 | Florent Chuffart | if (!is.null(upper_bound)) { |
1182 | 5bfac5a3 | Florent Chuffart | if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound} |
1183 | 5bfac5a3 | Florent Chuffart | if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound} |
1184 | 5bfac5a3 | Florent Chuffart | c2c = c2c[c2c$V2 - c2c$V3 != 0,] |
1185 | 5bfac5a3 | Florent Chuffart | } |
1186 | 5bfac5a3 | Florent Chuffart | } |
1187 | 5bfac5a3 | Florent Chuffart | return(c2c) |
1188 | 5bfac5a3 | Florent Chuffart | # It retruns the appropriate c2c file part. |
1189 | 5bfac5a3 | Florent Chuffart | } |
1190 | 5bfac5a3 | Florent Chuffart | |
1191 | 4016229d | Florent Chuffart | translate_cur = structure(function(# Translate coords of a genome region. |
1192 | 5bfac5a3 | Florent Chuffart | ### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example. |
1193 | 5bfac5a3 | Florent Chuffart | roi, ##<< Original genome region of interest. |
1194 | 5bfac5a3 | Florent Chuffart | strain2, ##<< The strain in wich you want the genome region of interest. |
1195 | 5bfac5a3 | Florent Chuffart | config=NULL, ##<< GLOBAL config variable |
1196 | 4016229d | Florent Chuffart | big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed. |
1197 | 5bfac5a3 | Florent Chuffart | ) { |
1198 | d973538c | Florent Chuffart | strain1 = roi$strain_ref |
1199 | d973538c | Florent Chuffart | # Do something or nothing? |
1200 | d973538c | Florent Chuffart | if (is.null(config$NEUTRAL_TRANSLATE_CUR)) { |
1201 | d973538c | Florent Chuffart | config$NEUTRAL_TRANSLATE_CUR = FALSE |
1202 | d973538c | Florent Chuffart | } |
1203 | d973538c | Florent Chuffart | if (strain1 == strain2 | config$NEUTRAL_TRANSLATE_CUR) { |
1204 | d973538c | Florent Chuffart | roi$strain_ref = strain2 |
1205 | d973538c | Florent Chuffart | roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) |
1206 | 5bfac5a3 | Florent Chuffart | return(roi) |
1207 | 5bfac5a3 | Florent Chuffart | } |
1208 | d973538c | Florent Chuffart | |
1209 | 5bfac5a3 | Florent Chuffart | # Extract c2c file |
1210 | 4016229d | Florent Chuffart | if (!is.null(big_cur)) { |
1211 | 4016229d | Florent Chuffart | # Dealing with big_cur |
1212 | 4016229d | Florent Chuffart | if (roi$strain_ref != big_cur$strain_ref) { |
1213 | 4016229d | Florent Chuffart | big_cur = translate_cur(big_cur, roi$strain_ref, config=config) |
1214 | 62d69d53 | Florent Chuffart | } |
1215 | 4016229d | Florent Chuffart | if (big_cur$end < big_cur$begin) { |
1216 | 4016229d | Florent Chuffart | tmp_var = big_cur$begin |
1217 | 4016229d | Florent Chuffart | big_cur$begin = big_cur$end |
1218 | 4016229d | Florent Chuffart | big_cur$end = tmp_var |
1219 | 4016229d | Florent Chuffart | big_cur$length = big_cur$end - big_cur$begin + 1 |
1220 | 55c1cdff | Florent Chuffart | } |
1221 | 4016229d | Florent Chuffart | if (big_cur$chr!=roi$chr | roi$end > big_cur$end | roi$end < big_cur$begin | roi$begin > big_cur$end | roi$begin < big_cur$begin) { |
1222 | 4016229d | Florent Chuffart | print("WARNING! Trying to translate a roi not included in a big_cur.") |
1223 | ded736c8 | Florent Chuffart | return(NULL) |
1224 | ded736c8 | Florent Chuffart | } |
1225 | 4016229d | Florent Chuffart | c2c = c2c_extraction(strain1, strain2, big_cur$chr, big_cur$begin, big_cur$end, config=config) |
1226 | 5bfac5a3 | Florent Chuffart | } else { |
1227 | 4016229d | Florent Chuffart | # No big_cur |
1228 | 5bfac5a3 | Florent Chuffart | c2c = c2c_extraction(strain1, strain2, roi$chr, config=config) |
1229 | 5bfac5a3 | Florent Chuffart | } |
1230 | 5bfac5a3 | Florent Chuffart | |
1231 | 1d833b97 | Florent Chuffart | # Convert initial roi$chr into c2c format |
1232 | 5bfac5a3 | Florent Chuffart | reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM" |
1233 | 1d833b97 | Florent Chuffart | begin_1 = roi$begin |
1234 | 1d833b97 | Florent Chuffart | end_1 = roi$end |
1235 | 5bfac5a3 | Florent Chuffart | if (reverse) { |
1236 | 5bfac5a3 | Florent Chuffart | tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),] |
1237 | 5bfac5a3 | Florent Chuffart | tmptransfostop = c2c[(c2c$V3>=end_1 & c2c$V2<=end_1 & c2c$V8==1) | (c2c$V2>=end_1 & c2c$V3<=end_1 & c2c$V8==-1),] |
1238 | 1d833b97 | Florent Chuffart | } else { |
1239 | 5bfac5a3 | Florent Chuffart | tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,] |
1240 | 5bfac5a3 | Florent Chuffart | tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,] |
1241 | 1d833b97 | Florent Chuffart | } |
1242 | 5bfac5a3 | Florent Chuffart | |
1243 | 5bfac5a3 | Florent Chuffart | # Never happend conditions ... |
1244 | 1d833b97 | Florent Chuffart | { |
1245 | 1d833b97 | Florent Chuffart | if (length(tmptransfostart$V8) == 0) { |
1246 | 1d833b97 | Florent Chuffart | # begin_1 is between to lines: shift begin_1 to the start of 2nd line. |
1247 | 8c87ee64 | Florent Chuffart | if (sum(c2c$V3 >= c2c$V2) != 0) { |
1248 | 5bfac5a3 | Florent Chuffart | tmp_c2c = c2c[c2c$V2>=begin_1,] |
1249 | f56c14b2 | Florent Chuffart | begin_1 = min(tmp_c2c$V2) |
1250 | 54cc8283 | Florent Chuffart | } else { |
1251 | 5bfac5a3 | Florent Chuffart | tmp_c2c = c2c[c2c$V3>=begin_1,] |
1252 | f56c14b2 | Florent Chuffart | begin_1 = min(tmp_c2c$V3) |
1253 | 54cc8283 | Florent Chuffart | } |
1254 | 1d833b97 | Florent Chuffart | if (reverse) { |
1255 | 5bfac5a3 | Florent Chuffart | tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),] |
1256 | 1d833b97 | Florent Chuffart | } else { |
1257 | 5bfac5a3 | Florent Chuffart | tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,] |
1258 | 1d833b97 | Florent Chuffart | } |
1259 | 1d833b97 | Florent Chuffart | if (length(tmptransfostart$V8) == 0) { |
1260 | 4016229d | Florent Chuffart | if (!is.null(big_cur)) { |
1261 | 1d833b97 | Florent Chuffart | return(NULL) |
1262 | 4016229d | Florent Chuffart | tmptransfostart = c2c[c2c$V3>=big_cur$begin & c2c$V2<=big_cur$begin,] |
1263 | 55c1cdff | Florent Chuffart | } else { |
1264 | 1d833b97 | Florent Chuffart | print(tmptransfostart) |
1265 | 1d833b97 | Florent Chuffart | print(tmptransfostop) |
1266 | 5bfac5a3 | Florent Chuffart | stop("Never happend condition 1.") |
1267 | 1d833b97 | Florent Chuffart | } |
1268 | 1d833b97 | Florent Chuffart | } |
1269 | 55c1cdff | Florent Chuffart | } |
1270 | 1d833b97 | Florent Chuffart | if (length(tmptransfostop$V8) == 0) { |
1271 | 1d833b97 | Florent Chuffart | # end_1 is between to lines: shift end_1 to the end of 2nd line. |
1272 | 8c87ee64 | Florent Chuffart | if (sum(c2c$V3 >= c2c$V2) != 0) { |
1273 | 5bfac5a3 | Florent Chuffart | tmp_c2c = c2c[c2c$V3<=end_1,] |
1274 | f56c14b2 | Florent Chuffart | end_1 = max(tmp_c2c$V3) |
1275 | 54cc8283 | Florent Chuffart | } else { |
1276 | 5bfac5a3 | Florent Chuffart | tmp_c2c = c2c[c2c$V2<=end_1,] |
1277 | f56c14b2 | Florent Chuffart | end_1 = max(tmp_c2c$V2) |
1278 | 54cc8283 | Florent Chuffart | } |
1279 | 1d833b97 | Florent Chuffart | if (reverse) { |
1280 | 5bfac5a3 | Florent Chuffart | tmptransfostop = c2c[(c2c$V3>=end_1 & c2c$V2<=end_1 & c2c$V8==1) | (c2c$V2>=end_1 & c2c$V3<=end_1 & c2c$V8==-1),] |
1281 | 1d833b97 | Florent Chuffart | } else { |
1282 | 5bfac5a3 | Florent Chuffart | tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,] |
1283 | 1d833b97 | Florent Chuffart | } |
1284 | 1d833b97 | Florent Chuffart | if (length(tmptransfostop$V8) == 0) { |
1285 | 4016229d | Florent Chuffart | if (!is.null(big_cur)) { |
1286 | 1d833b97 | Florent Chuffart | return(NULL) |
1287 | 4016229d | Florent Chuffart | tmptransfostop = c2c[c2c$V3>=big_cur$end & c2c$V2<=big_cur$end,] |
1288 | 1d833b97 | Florent Chuffart | } else { |
1289 | 1d833b97 | Florent Chuffart | print(tmptransfostart) |
1290 | 1d833b97 | Florent Chuffart | print(tmptransfostop) |
1291 | 1d833b97 | Florent Chuffart | stop("Never happend condition 2.") |
1292 | 55c1cdff | Florent Chuffart | } |
1293 | 55c1cdff | Florent Chuffart | } |
1294 | 1d833b97 | Florent Chuffart | } |
1295 | 1d833b97 | Florent Chuffart | if (length(tmptransfostart$V8) != 1) { |
1296 | 1d833b97 | Florent Chuffart | # print("many start") |
1297 | 5bfac5a3 | Florent Chuffart | tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,] |
1298 | 1d833b97 | Florent Chuffart | if (length(tmptransfostart$V8) != 1) { |
1299 | 1d833b97 | Florent Chuffart | print(tmptransfostart) |
1300 | 1d833b97 | Florent Chuffart | print(tmptransfostop) |
1301 | 1d833b97 | Florent Chuffart | stop("Never happend condition 3.") |
1302 | 1d833b97 | Florent Chuffart | } |
1303 | 1d833b97 | Florent Chuffart | } |
1304 | 1d833b97 | Florent Chuffart | if (length(tmptransfostop$V8) != 1) { |
1305 | 1d833b97 | Florent Chuffart | # print("many stop") |
1306 | 5bfac5a3 | Florent Chuffart | tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,] |
1307 | 1d833b97 | Florent Chuffart | if (length(tmptransfostop$V8) != 1) { |
1308 | 1d833b97 | Florent Chuffart | print(tmptransfostart) |
1309 | 1d833b97 | Florent Chuffart | print(tmptransfostop) |
1310 | 1d833b97 | Florent Chuffart | stop("Never happend condition 4.") |
1311 | 1d833b97 | Florent Chuffart | } |
1312 | 5bfac5a3 | Florent Chuffart | } |
1313 | 1d833b97 | Florent Chuffart | if (tmptransfostart$V7 != tmptransfostop$V7) { |
1314 | 1d833b97 | Florent Chuffart | print(tmptransfostart) |
1315 | 5bfac5a3 | Florent Chuffart | print(tmptransfostop) |
1316 | 1d833b97 | Florent Chuffart | stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.") |
1317 | 1d833b97 | Florent Chuffart | } |
1318 | 55c1cdff | Florent Chuffart | } |
1319 | 5bfac5a3 | Florent Chuffart | |
1320 | 1d833b97 | Florent Chuffart | # Deal with strand |
1321 | 1d833b97 | Florent Chuffart | if (tmptransfostart$V8 == 1) { |
1322 | 1d833b97 | Florent Chuffart | begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2) |
1323 | 1d833b97 | Florent Chuffart | end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2) |
1324 | 1d833b97 | Florent Chuffart | } else { |
1325 | 1d833b97 | Florent Chuffart | begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2) |
1326 | 1d833b97 | Florent Chuffart | end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2) |
1327 | 1d833b97 | Florent Chuffart | } |
1328 | 1d833b97 | Florent Chuffart | # Build returned roi |
1329 | 1d833b97 | Florent Chuffart | roi$strain_ref = strain2 |
1330 | 1d833b97 | Florent Chuffart | if (roi$strain_ref == "BY") { |
1331 | 1d833b97 | Florent Chuffart | roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]] |
1332 | 1d833b97 | Florent Chuffart | } else { |
1333 | 1d833b97 | Florent Chuffart | roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]] |
1334 | 1d833b97 | Florent Chuffart | } |
1335 | 1d833b97 | Florent Chuffart | roi$begin = begin_2 |
1336 | 1d833b97 | Florent Chuffart | roi$end = end_2 |
1337 | 1d833b97 | Florent Chuffart | if (sign(roi$end - roi$begin) == 0) { |
1338 | 5bfac5a3 | Florent Chuffart | roi$length = 1 |
1339 | 5bfac5a3 | Florent Chuffart | } else { |
1340 | 5bfac5a3 | Florent Chuffart | roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1 |
1341 | 1d833b97 | Florent Chuffart | } |
1342 | 55c1cdff | Florent Chuffart | return(roi) |
1343 | 1d833b97 | Florent Chuffart | }, ex=function(){ |
1344 | 4016229d | Florent Chuffart | # Define new translate_cur function... |
1345 | 4016229d | Florent Chuffart | translate_cur = function(roi, strain2, config) { |
1346 | 1d833b97 | Florent Chuffart | strain1 = roi$strain_ref |
1347 | 1d833b97 | Florent Chuffart | if (strain1 == strain2) { |
1348 | 1d833b97 | Florent Chuffart | return(roi) |
1349 | 1d833b97 | Florent Chuffart | } else { |
1350 | 4016229d | Florent Chuffart | stop("Here is my new translate_cur function...") |
1351 | 5bfac5a3 | Florent Chuffart | } |
1352 | 1d833b97 | Florent Chuffart | } |
1353 | 1d833b97 | Florent Chuffart | # Binding it by uncomment follwing lines. |
1354 | 4016229d | Florent Chuffart | # unlockBinding("translate_cur", as.environment("package:nm")) |
1355 | 4016229d | Florent Chuffart | # unlockBinding("translate_cur", getNamespace("nm")) |
1356 | 4016229d | Florent Chuffart | # assign("translate_cur", translate_cur, "package:nm") |
1357 | 4016229d | Florent Chuffart | # assign("translate_cur", translate_cur, getNamespace("nm")) |
1358 | 4016229d | Florent Chuffart | # lockBinding("translate_cur", getNamespace("nm")) |
1359 | 4016229d | Florent Chuffart | # lockBinding("translate_cur", as.environment("package:nm")) |
1360 | 1d833b97 | Florent Chuffart | }) |
1361 | 1d833b97 | Florent Chuffart | |
1362 | 1d833b97 | Florent Chuffart | |
1363 | 5bfac5a3 | Florent Chuffart | compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR) |
1364 | 5bfac5a3 | Florent Chuffart | ### CURs are regions that can be aligned between the genomes |
1365 | 5bfac5a3 | Florent Chuffart | diff_allowed = 30, ##<< the maximum indel width allowe din a CUR |
1366 | 5bfac5a3 | Florent Chuffart | min_cur_width = 4000, ##<< The minimum width of a CUR |
1367 | 5bfac5a3 | Florent Chuffart | config = NULL ##<< GLOBAL config variable |
1368 | 5bfac5a3 | Florent Chuffart | ) { |
1369 | 1d833b97 | Florent Chuffart | |
1370 | 5bfac5a3 | Florent Chuffart | check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) { |
1371 | 5bfac5a3 | Florent Chuffart | c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config) |
1372 | 5bfac5a3 | Florent Chuffart | check_homogeneity(c2c) |
1373 | 5bfac5a3 | Florent Chuffart | if (length(c2c[,1]) == 0 ) { |
1374 | 5bfac5a3 | Florent Chuffart | stop("WARNING! checking overlapping for a region corresponding to an empty c2c.") |
1375 | 5bfac5a3 | Florent Chuffart | } else { |
1376 | 5bfac5a3 | Florent Chuffart | lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)}) |
1377 | 5bfac5a3 | Florent Chuffart | upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)}) |
1378 | 5bfac5a3 | Florent Chuffart | tmp_index = order(lower_bounds) |
1379 | 5bfac5a3 | Florent Chuffart | lower_bounds = lower_bounds[tmp_index] |
1380 | 5bfac5a3 | Florent Chuffart | upper_bounds = upper_bounds[tmp_index] |
1381 | 5bfac5a3 | Florent Chuffart | tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)] |
1382 | 5bfac5a3 | Florent Chuffart | ov_index = which(tmp_diff < 0) |
1383 | 5bfac5a3 | Florent Chuffart | if(length(ov_index < 0) !=0 ) { |
1384 | 5bfac5a3 | Florent Chuffart | ov_index = ov_index[1] |
1385 | 5bfac5a3 | Florent Chuffart | print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep="")) |
1386 | 5bfac5a3 | Florent Chuffart | c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),] |
1387 | 5bfac5a3 | Florent Chuffart | print(c2c_corrupted) |
1388 | 5bfac5a3 | Florent Chuffart | return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1)) |
1389 | 5bfac5a3 | Florent Chuffart | } |
1390 | 5bfac5a3 | Florent Chuffart | return(NULL) |
1391 | 5bfac5a3 | Florent Chuffart | } |
1392 | 5bfac5a3 | Florent Chuffart | } |
1393 | 1d833b97 | Florent Chuffart | |
1394 | 5bfac5a3 | Florent Chuffart | check_homogeneity = function(sub_c2c) { |
1395 | 5bfac5a3 | Florent Chuffart | tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3) |
1396 | 5bfac5a3 | Florent Chuffart | tmp_signs = tmp_signs[tmp_signs != 0] |
1397 | 5bfac5a3 | Florent Chuffart | if (sum(tmp_signs[1] != tmp_signs)) { |
1398 | 5bfac5a3 | Florent Chuffart | print(paste("*************** ERROR, non homogenous region (sign)! ********************")) |
1399 | 5bfac5a3 | Florent Chuffart | print(tmp_signs) |
1400 | 5bfac5a3 | Florent Chuffart | } |
1401 | 5bfac5a3 | Florent Chuffart | tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10) |
1402 | 5bfac5a3 | Florent Chuffart | tmp_signs2 = tmp_signs2[tmp_signs2 != 0] |
1403 | 5bfac5a3 | Florent Chuffart | if (sum(tmp_signs2[1] != tmp_signs2)) { |
1404 | 5bfac5a3 | Florent Chuffart | print(paste("*************** ERROR, non homogenous region (sign2)! ********************")) |
1405 | 5bfac5a3 | Florent Chuffart | print(tmp_signs2) |
1406 | 5bfac5a3 | Florent Chuffart | } |
1407 | 5bfac5a3 | Florent Chuffart | if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) { |
1408 | 5bfac5a3 | Florent Chuffart | print("*************** ERROR, non homogenous region chrs or V8! ********************") |
1409 | 5bfac5a3 | Florent Chuffart | } |
1410 | 5bfac5a3 | Florent Chuffart | } |
1411 | 1d833b97 | Florent Chuffart | |
1412 | 5bfac5a3 | Florent Chuffart | test_and_squeeze_rois = function(foo, config=NULL) { |
1413 | 5bfac5a3 | Florent Chuffart | is_it_ok = function(list1, list2) { |
1414 | 5bfac5a3 | Florent Chuffart | bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length)) |
1415 | 5bfac5a3 | Florent Chuffart | ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0 |
1416 | 5bfac5a3 | Florent Chuffart | if (!ok) { |
1417 | 5bfac5a3 | Florent Chuffart | print(bar[bar[,3] != 0 | bar[,6] != 0, ]) |
1418 | 5bfac5a3 | Florent Chuffart | } |
1419 | 5bfac5a3 | Florent Chuffart | return (ok) |
1420 | 5bfac5a3 | Florent Chuffart | } |
1421 | 5bfac5a3 | Florent Chuffart | squeeze_rois = function(list1, list2) { |
1422 | 5bfac5a3 | Florent Chuffart | rois = apply(t(1:nrow(list1)), 2, function(i){ |
1423 | 5bfac5a3 | Florent Chuffart | roi = list1[i,] |
1424 | 5bfac5a3 | Florent Chuffart | roi2 = list2[i,] |
1425 | 5bfac5a3 | Florent Chuffart | roi$begin = max(roi$begin, roi2$begin) |
1426 | 5bfac5a3 | Florent Chuffart | roi$end = min(roi$end, roi2$end) |
1427 | 5bfac5a3 | Florent Chuffart | roi$length = roi$end - roi$begin + 1 |
1428 | 5bfac5a3 | Florent Chuffart | return(roi) |
1429 | 5bfac5a3 | Florent Chuffart | }) |
1430 | 5bfac5a3 | Florent Chuffart | return(do.call(rbind, rois)) |
1431 | 5bfac5a3 | Florent Chuffart | } |
1432 | 5bfac5a3 | Florent Chuffart | # foo_orig = compute_inter_all_strain_curs2(config=config) |
1433 | 5bfac5a3 | Florent Chuffart | # foo = foo_orig |
1434 | 5bfac5a3 | Florent Chuffart | STOP = FALSE |
1435 | 5bfac5a3 | Florent Chuffart | nb_round = 0 |
1436 | 5bfac5a3 | Florent Chuffart | while(!STOP) { |
1437 | 5bfac5a3 | Florent Chuffart | nb_round = nb_round + 1 |
1438 | 5bfac5a3 | Florent Chuffart | print(paste("2-2 round #", nb_round, sep="")) |
1439 | 4016229d | Florent Chuffart | fooby = translate_curs(foo, "BY", config=config) |
1440 | 4016229d | Florent Chuffart | fooyjm = translate_curs(foo, "YJM", config=config) |
1441 | 4016229d | Florent Chuffart | fooyjmby = translate_curs(fooyjm, "BY", config=config) |
1442 | 5bfac5a3 | Florent Chuffart | if (!is_it_ok(fooby, fooyjmby)) { |
1443 | 5bfac5a3 | Florent Chuffart | print("case 1") |
1444 | 5bfac5a3 | Florent Chuffart | foo = squeeze_rois(fooby, fooyjmby) |
1445 | 5bfac5a3 | Florent Chuffart | next |
1446 | 5bfac5a3 | Florent Chuffart | } |
1447 | 4016229d | Florent Chuffart | foorm = translate_curs(foo, "RM", config=config) |
1448 | 4016229d | Florent Chuffart | foormby = translate_curs(foorm, "BY", config=config) |
1449 | 5bfac5a3 | Florent Chuffart | if (!is_it_ok(fooby, foormby)) { |
1450 | 5bfac5a3 | Florent Chuffart | print("case 2") |
1451 | 5bfac5a3 | Florent Chuffart | foo = squeeze_rois(fooby, foormby) |
1452 | 5bfac5a3 | Florent Chuffart | next |
1453 | 5bfac5a3 | Florent Chuffart | } |
1454 | 4016229d | Florent Chuffart | fooyjmrm = translate_curs(fooyjm, "RM", config=config) |
1455 | 4016229d | Florent Chuffart | fooyjmrmyjm = translate_curs(fooyjmrm, "YJM", config=config) |
1456 | 5bfac5a3 | Florent Chuffart | if (!is_it_ok(fooyjm, fooyjmrmyjm)) { |
1457 | 5bfac5a3 | Florent Chuffart | print("case 3") |
1458 | 5bfac5a3 | Florent Chuffart | foo = squeeze_rois(fooyjm, fooyjmrmyjm) |
1459 | 5bfac5a3 | Florent Chuffart | next |
1460 | 5bfac5a3 | Florent Chuffart | } |
1461 | 4016229d | Florent Chuffart | foormyjm = translate_curs(foorm, "YJM", config=config) |
1462 | 4016229d | Florent Chuffart | foormyjmrm = translate_curs(foormyjm, "RM", config=config) |
1463 | 5bfac5a3 | Florent Chuffart | if (!is_it_ok(foorm, foormyjmrm)) { |
1464 | 5bfac5a3 | Florent Chuffart | print("case 4") |
1465 | 5bfac5a3 | Florent Chuffart | foo = squeeze_rois(foorm, foormyjmrm) |
1466 | 5bfac5a3 | Florent Chuffart | next |
1467 | 5bfac5a3 | Florent Chuffart | } |
1468 | 4016229d | Florent Chuffart | foo = translate_curs(foo, "BY", config=config) |
1469 | 5bfac5a3 | Florent Chuffart | STOP = TRUE |
1470 | 5bfac5a3 | Florent Chuffart | } |
1471 | 5bfac5a3 | Florent Chuffart | STOP = FALSE |
1472 | 5bfac5a3 | Florent Chuffart | nb_round = 0 |
1473 | 5bfac5a3 | Florent Chuffart | while(!STOP) { |
1474 | 5bfac5a3 | Florent Chuffart | nb_round = nb_round + 1 |
1475 | 5bfac5a3 | Florent Chuffart | print(paste("3-3 round #", nb_round, sep="")) |
1476 | 4016229d | Florent Chuffart | fooby = translate_curs(foo, "BY", config=config) |
1477 | 4016229d | Florent Chuffart | foobyrm = translate_curs(fooby, "RM", config=config) |
1478 | 4016229d | Florent Chuffart | foobyrmyjm = translate_curs(foobyrm, "YJM", config=config) |
1479 | 4016229d | Florent Chuffart | foobyrmyjmby = translate_curs(foobyrmyjm, "BY", config=config) |
1480 | 5bfac5a3 | Florent Chuffart | if (!is_it_ok(fooby, foobyrmyjmby)) { |
1481 | 5bfac5a3 | Florent Chuffart | print("case 1") |
1482 | 5bfac5a3 | Florent Chuffart | foo = squeeze_rois(fooby, foobyrmyjmby) |
1483 | 5bfac5a3 | Florent Chuffart | } |
1484 | 4016229d | Florent Chuffart | fooby = translate_curs(foo, "BY", config=config) |
1485 | 4016229d | Florent Chuffart | foobyyjm = translate_curs(fooby, "YJM", config=config) |
1486 | 4016229d | Florent Chuffart | foobyyjmrm = translate_curs(foobyyjm, "RM", config=config) |
1487 | 4016229d | Florent Chuffart | foobyyjmrmby = translate_curs(foobyyjmrm, "BY", config=config) |
1488 | 5bfac5a3 | Florent Chuffart | if (!is_it_ok(fooby, foobyyjmrmby)) { |
1489 | 5bfac5a3 | Florent Chuffart | print("case 2") |
1490 | 5bfac5a3 | Florent Chuffart | foo = squeeze_rois(fooby, foobyyjmrmby) |
1491 | 5bfac5a3 | Florent Chuffart | next |
1492 | 5bfac5a3 | Florent Chuffart | } |
1493 | 4016229d | Florent Chuffart | foo = translate_curs(foo, "BY", config=config) |
1494 | 5bfac5a3 | Florent Chuffart | STOP = TRUE |
1495 | 5bfac5a3 | Florent Chuffart | } |
1496 | 5bfac5a3 | Florent Chuffart | print("end") |
1497 | 5bfac5a3 | Florent Chuffart | return(foo) |
1498 | 5bfac5a3 | Florent Chuffart | } |
1499 | 1d833b97 | Florent Chuffart | |
1500 | 5bfac5a3 | Florent Chuffart | get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) { |
1501 | 5bfac5a3 | Florent Chuffart | c2c = c2c_extraction(strain1, strain2, config=config) |
1502 | 5bfac5a3 | Florent Chuffart | # computing diffs |
1503 | 55c1cdff | Florent Chuffart | diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)] |
1504 | 55c1cdff | Florent Chuffart | diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)] |
1505 | 1d833b97 | Florent Chuffart | # Filtering |
1506 | 1d833b97 | Florent Chuffart | indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed) |
1507 | 1d833b97 | Florent Chuffart | indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1)) |
1508 | 5bfac5a3 | Florent Chuffart | rois = apply(t(1:length(indexes_start)), 2, function(i) { |
1509 | 5bfac5a3 | Florent Chuffart | if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start))) |
1510 | 5bfac5a3 | Florent Chuffart | returned_rois = NULL |
1511 | 1d833b97 | Florent Chuffart | start = indexes_start[i] |
1512 | 1d833b97 | Florent Chuffart | stop = indexes_stop[i] |
1513 | 1d833b97 | Florent Chuffart | sub_c2c = c2c[start:stop,] |
1514 | 5bfac5a3 | Florent Chuffart | check_homogeneity(sub_c2c) |
1515 | 1d833b97 | Florent Chuffart | if (strain1 == "BY") { |
1516 | 1d833b97 | Florent Chuffart | chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]] |
1517 | 5bfac5a3 | Florent Chuffart | } else { |
1518 | 1d833b97 | Florent Chuffart | chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]] |
1519 | 1d833b97 | Florent Chuffart | } |
1520 | 5bfac5a3 | Florent Chuffart | roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1) |
1521 | 5bfac5a3 | Florent Chuffart | roi$length = roi$end - roi$begin + 1 |
1522 | 1d833b97 | Florent Chuffart | if (roi$length >= min_cur_width) { |
1523 | 5bfac5a3 | Florent Chuffart | lower_bound = roi$begin |
1524 | 5bfac5a3 | Florent Chuffart | upper_bound = roi$end |
1525 | 5bfac5a3 | Florent Chuffart | check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config) |
1526 | 5bfac5a3 | Florent Chuffart | while(!is.null(check)) { |
1527 | 5bfac5a3 | Florent Chuffart | # print(check) |
1528 | 5bfac5a3 | Florent Chuffart | roi1 = roi |
1529 | 5bfac5a3 | Florent Chuffart | roi1$end = check[[1]] |
1530 | 5bfac5a3 | Florent Chuffart | roi1$length = roi1$end - roi1$begin + 1 |
1531 | 5bfac5a3 | Florent Chuffart | if (roi1$length >= min_cur_width) { |
1532 | 5bfac5a3 | Florent Chuffart | returned_rois = dfadd(returned_rois,roi1) |
1533 | 5bfac5a3 | Florent Chuffart | } |
1534 | 5bfac5a3 | Florent Chuffart | roi$begin = check[[2]] |
1535 | 5bfac5a3 | Florent Chuffart | roi$length = roi$end - roi$begin + 1 |
1536 | 5bfac5a3 | Florent Chuffart | if (roi$length >= min_cur_width) { |
1537 | 5bfac5a3 | Florent Chuffart | lower_bound = min(roi$begin, roi$end) |
1538 | 5bfac5a3 | Florent Chuffart | upper_bound = max(roi$begin, roi$end) |
1539 | 5bfac5a3 | Florent Chuffart | check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config) |
1540 | 5bfac5a3 | Florent Chuffart | } else { |
1541 | 5bfac5a3 | Florent Chuffart | check = NULL |
1542 | 5bfac5a3 | Florent Chuffart | roi = NULL |
1543 | 5bfac5a3 | Florent Chuffart | } |
1544 | 5bfac5a3 | Florent Chuffart | } |
1545 | 5bfac5a3 | Florent Chuffart | returned_rois = dfadd(returned_rois,roi) |
1546 | 1d833b97 | Florent Chuffart | } |
1547 | 5bfac5a3 | Florent Chuffart | }) |
1548 | 5bfac5a3 | Florent Chuffart | rois = do.call(rbind,rois) |
1549 | 5bfac5a3 | Florent Chuffart | rois = rois[order(as.numeric(rois$chr), rois$begin), ] |
1550 | 1d833b97 | Florent Chuffart | return(rois) |
1551 | 1d833b97 | Florent Chuffart | } |
1552 | 1d833b97 | Florent Chuffart | |
1553 | 4016229d | Florent Chuffart | translate_curs = function(rois, target_strain, config) { |
1554 | 5bfac5a3 | Florent Chuffart | tr_rois = apply(t(1:nrow(rois)), 2, function(i){ |
1555 | 5bfac5a3 | Florent Chuffart | roi = rois[i,] |
1556 | 4016229d | Florent Chuffart | tr_roi = translate_cur(roi, target_strain, config=config) |
1557 | 5bfac5a3 | Florent Chuffart | tmp_begin = min(tr_roi$begin, tr_roi$end) |
1558 | 5bfac5a3 | Florent Chuffart | tmp_end = max(tr_roi$begin, tr_roi$end) |
1559 | 5bfac5a3 | Florent Chuffart | tr_roi$begin = tmp_begin |
1560 | 5bfac5a3 | Florent Chuffart | tr_roi$end = tmp_end |
1561 | 5bfac5a3 | Florent Chuffart | tr_roi$length = tr_roi$end - tr_roi$begin + 1 |
1562 | 5bfac5a3 | Florent Chuffart | return(tr_roi) |
1563 | 5bfac5a3 | Florent Chuffart | }) |
1564 | 5bfac5a3 | Florent Chuffart | tr_rois = do.call(rbind, tr_rois) |
1565 | 5bfac5a3 | Florent Chuffart | return(tr_rois) |
1566 | 5bfac5a3 | Florent Chuffart | } |
1567 | 729c934e | Florent Chuffart | |
1568 | 5bfac5a3 | Florent Chuffart | combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM")) |
1569 | 5bfac5a3 | Florent Chuffart | rois = list() |
1570 | 5bfac5a3 | Florent Chuffart | for (combi in combis) { |
1571 | 5bfac5a3 | Florent Chuffart | strain1 = combi[1] |
1572 | 5bfac5a3 | Florent Chuffart | strain2 = combi[2] |
1573 | 5bfac5a3 | Florent Chuffart | print(paste(strain1, strain2)) |
1574 | 5bfac5a3 | Florent Chuffart | rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config) |
1575 | 5bfac5a3 | Florent Chuffart | strain1 = combi[2] |
1576 | 5bfac5a3 | Florent Chuffart | strain2 = combi[1] |
1577 | 5bfac5a3 | Florent Chuffart | print(paste(strain1, strain2)) |
1578 | 5bfac5a3 | Florent Chuffart | rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config) |
1579 | 4016229d | Florent Chuffart | tr_rois_rev = translate_curs(rois_rev, combi[1], config) |
1580 | 5bfac5a3 | Florent Chuffart | region1 = rois_fwd |
1581 | 5bfac5a3 | Florent Chuffart | region2 = tr_rois_rev |
1582 | 5bfac5a3 | Florent Chuffart | rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev) |
1583 | 5bfac5a3 | Florent Chuffart | } |
1584 | 5bfac5a3 | Florent Chuffart | reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]]) |
1585 | 5bfac5a3 | Florent Chuffart | reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ] |
1586 | 4016229d | Florent Chuffart | tr_reducted_1_rois = translate_curs(reducted_1_rois, "RM", config) |
1587 | 5bfac5a3 | Florent Chuffart | reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]]) |
1588 | 5bfac5a3 | Florent Chuffart | reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ] |
1589 | 4016229d | Florent Chuffart | reducted_rois = translate_curs(reducted_2_rois, "BY", config) |
1590 | 5bfac5a3 | Florent Chuffart | reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ] |
1591 | 5bfac5a3 | Florent Chuffart | squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config) |
1592 | 5bfac5a3 | Florent Chuffart | return (squeezed_rois) |
1593 | 1d833b97 | Florent Chuffart | } |
1594 | 1d833b97 | Florent Chuffart | |
1595 | 5bfac5a3 | Florent Chuffart | intersect_region = function(# Returns the intersection of 2 list on regions. |
1596 | 5bfac5a3 | Florent Chuffart | ### This function... |
1597 | 5bfac5a3 | Florent Chuffart | region1, ##<< Original regions. |
1598 | 5bfac5a3 | Florent Chuffart | region2 ##<< Regions to intersect. |
1599 | 5bfac5a3 | Florent Chuffart | ) { |
1600 | 5bfac5a3 | Florent Chuffart | intersection = apply(t(1:nrow(region1)), 2, function(i) { |
1601 | 5bfac5a3 | Florent Chuffart | roi1 = region1[i, ] |
1602 | 5bfac5a3 | Florent Chuffart | sub_regions2 = region2[region2$chr == roi1$chr, ] |
1603 | 5bfac5a3 | Florent Chuffart | sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end | |
1604 | 5bfac5a3 | Florent Chuffart | roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end | |
1605 | 5bfac5a3 | Florent Chuffart | sub_regions2$begin < roi1$begin & roi1$end < sub_regions2$end |
1606 | 5bfac5a3 | Florent Chuffart | , ] |
1607 | 5bfac5a3 | Florent Chuffart | if (nrow(sub_regions2) == 0) { |
1608 | 5bfac5a3 | Florent Chuffart | print("removing a region") |
1609 | 5bfac5a3 | Florent Chuffart | return(NULL) |
1610 | 5bfac5a3 | Florent Chuffart | } else if (nrow(sub_regions2) > 1) { |
1611 | 5bfac5a3 | Florent Chuffart | print("more than one region in intersect_region") |
1612 | 5bfac5a3 | Florent Chuffart | return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])}))) |
1613 | 5bfac5a3 | Florent Chuffart | } else { |
1614 | 5bfac5a3 | Florent Chuffart | roi2 = sub_regions2[1,] |
1615 | 5bfac5a3 | Florent Chuffart | if (roi1$begin < roi2$begin) { |
1616 | 5bfac5a3 | Florent Chuffart | print("not the same begin") |
1617 | 5bfac5a3 | Florent Chuffart | roi1$begin = roi2$begin |
1618 | 5bfac5a3 | Florent Chuffart | roi1$length = roi1$end - roi1$begin + 1 |
1619 | 5bfac5a3 | Florent Chuffart | } |
1620 | 5bfac5a3 | Florent Chuffart | if (roi1$end > roi2$end) { |
1621 | 5bfac5a3 | Florent Chuffart | print("not the same end") |
1622 | 5bfac5a3 | Florent Chuffart | roi1$end = roi2$end |
1623 | 5bfac5a3 | Florent Chuffart | roi1$length = roi1$end - roi1$begin + 1 |
1624 | 5bfac5a3 | Florent Chuffart | } |
1625 | 5bfac5a3 | Florent Chuffart | return(roi1) |
1626 | 5bfac5a3 | Florent Chuffart | } |
1627 | 5bfac5a3 | Florent Chuffart | }) |
1628 | 5bfac5a3 | Florent Chuffart | return(do.call(rbind,intersection)) |
1629 | 5bfac5a3 | Florent Chuffart | } |
1630 | 1d833b97 | Florent Chuffart | |
1631 | 55c1cdff | Florent Chuffart | build_replicates = structure(function(# Stage replicates data |
1632 | e5603c3f | Florent Chuffart | ### This function loads in memory the data corresponding to the given experiments. |
1633 | e5603c3f | Florent Chuffart | expe, ##<< a list of vectors corresponding to replicates. |
1634 | 61914235 | Florent Chuffart | roi, ##<< the region that we are interested in. |
1635 | 61914235 | Florent Chuffart | only_fetch=FALSE, ##<< filter or not inputs. |
1636 | 61914235 | Florent Chuffart | get_genome=FALSE,##<< Load or not corresponding genome. |
1637 | 61914235 | Florent Chuffart | all_samples, ##<< Global list of samples. |
1638 | 61914235 | Florent Chuffart | config=NULL ##<< GLOBAL config variable. |
1639 | 61914235 | Florent Chuffart | ) { |
1640 | 61914235 | Florent Chuffart | build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) { |
1641 | 61914235 | Florent Chuffart | samples=list() |
1642 | 61914235 | Florent Chuffart | for (i in samples_ids) { |
1643 | 61914235 | Florent Chuffart | sample = as.list(all_samples[all_samples$id==i,]) |
1644 | a0b91fee | Florent Chuffart | sample$orig_roi = roi |
1645 | 4016229d | Florent Chuffart | sample$roi = translate_cur(roi, sample$strain, config = config) |
1646 | 61914235 | Florent Chuffart | if (get_genome) { |
1647 | 61914235 | Florent Chuffart | # Get Genome |
1648 | 61914235 | Florent Chuffart | fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]] |
1649 | 5bfac5a3 | Florent Chuffart | sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end] |
1650 | 61914235 | Florent Chuffart | } |
1651 | 61914235 | Florent Chuffart | # Get inputs |
1652 | 61914235 | Florent Chuffart | sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="") |
1653 | 55c1cdff | Florent Chuffart | sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE) |
1654 | 5bfac5a3 | Florent Chuffart | sample$total_reads = sum(sample$inputs[,4]) |
1655 | 61914235 | Florent Chuffart | if (!only_fetch) { |
1656 | b8a95426 | Florent Chuffart | sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE) |
1657 | 61914235 | Florent Chuffart | } |
1658 | 61914235 | Florent Chuffart | # Get TF outputs for Mnase_Seq samples |
1659 | 5bfac5a3 | Florent Chuffart | if (sample$marker == "Mnase_Seq" & get_ouputs) { |
1660 | 61914235 | Florent Chuffart | sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="") |
1661 | 61914235 | Florent Chuffart | sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t") |
1662 | 61914235 | Florent Chuffart | if (!only_fetch) { |
1663 | 61914235 | Florent Chuffart | sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300) |
1664 | 61914235 | Florent Chuffart | } |
1665 | 61914235 | Florent Chuffart | } |
1666 | 5bfac5a3 | Florent Chuffart | samples[[length(samples) + 1]] = sample |
1667 | 61914235 | Florent Chuffart | } |
1668 | 61914235 | Florent Chuffart | return(samples) |
1669 | 61914235 | Florent Chuffart | } |
1670 | 61914235 | Florent Chuffart | replicates = list() |
1671 | 61914235 | Florent Chuffart | for(samples_ids in expe) { |
1672 | 61914235 | Florent Chuffart | samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples) |
1673 | 61914235 | Florent Chuffart | replicates[[length(replicates) + 1]] = samples |
1674 | 61914235 | Florent Chuffart | } |
1675 | 61914235 | Florent Chuffart | return(replicates) |
1676 | 61914235 | Florent Chuffart | }, ex = function() { |
1677 | 61914235 | Florent Chuffart | # library(rjson) |
1678 | 61914235 | Florent Chuffart | # library(nucleominer) |
1679 | 55c1cdff | Florent Chuffart | # |
1680 | 61914235 | Florent Chuffart | # # Read config file |
1681 | e5603c3f | Florent Chuffart | # json_conf_file = "nucleominer_config.json" |
1682 | 61914235 | Florent Chuffart | # config = fromJSON(paste(readLines(json_conf_file), collapse="")) |
1683 | 61914235 | Florent Chuffart | # # Read sample file |
1684 | 55c1cdff | Florent Chuffart | # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE) |
1685 | 61914235 | Florent Chuffart | # # here are the sample ids in a list |
1686 | 61914235 | Florent Chuffart | # expes = list(c(1)) |
1687 | 61914235 | Florent Chuffart | # # here is the region that we wnt to see the coverage |
1688 | 55c1cdff | Florent Chuffart | # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY") |
1689 | 61914235 | Florent Chuffart | # # it displays the corverage |
1690 | 61914235 | Florent Chuffart | # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config) |
1691 | 55c1cdff | Florent Chuffart | # out = watch_samples(replicates, config$READ_LENGTH, |
1692 | 55c1cdff | Florent Chuffart | # plot_coverage = TRUE, |
1693 | 55c1cdff | Florent Chuffart | # plot_squared_reads = FALSE, |
1694 | 55c1cdff | Florent Chuffart | # plot_ref_genome = FALSE, |
1695 | 55c1cdff | Florent Chuffart | # plot_arrow_raw_reads = FALSE, |
1696 | 55c1cdff | Florent Chuffart | # plot_arrow_nuc_reads = FALSE, |
1697 | 55c1cdff | Florent Chuffart | # plot_gaussian_reads = FALSE, |
1698 | 55c1cdff | Florent Chuffart | # plot_gaussian_unified_reads = FALSE, |
1699 | 55c1cdff | Florent Chuffart | # plot_ellipse_nucs = FALSE, |
1700 | 55c1cdff | Florent Chuffart | # plot_wp_nucs = FALSE, |
1701 | 55c1cdff | Florent Chuffart | # plot_wp_nuc_model = FALSE, |
1702 | 55c1cdff | Florent Chuffart | # plot_common_nucs = FALSE, |
1703 | 61914235 | Florent Chuffart | # height = 50) |
1704 | 61914235 | Florent Chuffart | }) |
1705 | 61914235 | Florent Chuffart | |
1706 | 61914235 | Florent Chuffart | |
1707 | 61914235 | Florent Chuffart | |
1708 | 61914235 | Florent Chuffart | |
1709 | 61914235 | Florent Chuffart | |
1710 | 61914235 | Florent Chuffart | |
1711 | 61914235 | Florent Chuffart | |
1712 | 61914235 | Florent Chuffart | |
1713 | 61914235 | Florent Chuffart | |
1714 | 61914235 | Florent Chuffart | |
1715 | 61914235 | Florent Chuffart | |
1716 | 61914235 | Florent Chuffart | |
1717 | 61914235 | Florent Chuffart | |
1718 | 61914235 | Florent Chuffart | |
1719 | 61914235 | Florent Chuffart | |
1720 | 61914235 | Florent Chuffart | |
1721 | 61914235 | Florent Chuffart | |
1722 | 61914235 | Florent Chuffart | |
1723 | 61914235 | Florent Chuffart | |
1724 | 61914235 | Florent Chuffart | |
1725 | 61914235 | Florent Chuffart | |
1726 | 61914235 | Florent Chuffart | |
1727 | 61914235 | Florent Chuffart | |
1728 | 61914235 | Florent Chuffart | |
1729 | 61914235 | Florent Chuffart | |
1730 | 61914235 | Florent Chuffart | |
1731 | 61914235 | Florent Chuffart | |
1732 | 61914235 | Florent Chuffart | |
1733 | 61914235 | Florent Chuffart | |
1734 | 61914235 | Florent Chuffart | |
1735 | 61914235 | Florent Chuffart | |
1736 | 61914235 | Florent Chuffart | |
1737 | 61914235 | Florent Chuffart | |
1738 | 1d833b97 | Florent Chuffart | |
1739 | 1d833b97 | Florent Chuffart | |
1740 | 1d833b97 | Florent Chuffart | |
1741 | 1d833b97 | Florent Chuffart | |
1742 | 1d833b97 | Florent Chuffart | |
1743 | 1d833b97 | Florent Chuffart | |
1744 | 1d833b97 | Florent Chuffart | |
1745 | 1d833b97 | Florent Chuffart | |
1746 | 1d833b97 | Florent Chuffart | |
1747 | 1d833b97 | Florent Chuffart | |
1748 | 1d833b97 | Florent Chuffart | |
1749 | 1d833b97 | Florent Chuffart | |
1750 | 1d833b97 | Florent Chuffart | |
1751 | 1d833b97 | Florent Chuffart | |
1752 | 1d833b97 | Florent Chuffart | |
1753 | 1d833b97 | Florent Chuffart | |
1754 | 1d833b97 | Florent Chuffart | |
1755 | 1d833b97 | Florent Chuffart | |
1756 | 1d833b97 | Florent Chuffart | |
1757 | 1d833b97 | Florent Chuffart | |
1758 | 1d833b97 | Florent Chuffart | |
1759 | 1d833b97 | Florent Chuffart | |
1760 | 1d833b97 | Florent Chuffart | |
1761 | 1d833b97 | Florent Chuffart | |
1762 | 1d833b97 | Florent Chuffart | |
1763 | 1d833b97 | Florent Chuffart | |
1764 | 1d833b97 | Florent Chuffart | |
1765 | 1d833b97 | Florent Chuffart | |
1766 | 1d833b97 | Florent Chuffart | |
1767 | 1d833b97 | Florent Chuffart | |
1768 | 1d833b97 | Florent Chuffart | |
1769 | 1d833b97 | Florent Chuffart | |
1770 | 1d833b97 | Florent Chuffart | |
1771 | 1d833b97 | Florent Chuffart | |
1772 | 6b5a528c | Florent Chuffart | |
1773 | 6b5a528c | Florent Chuffart | |
1774 | 6b5a528c | Florent Chuffart | |
1775 | 6b5a528c | Florent Chuffart | |
1776 | 6b5a528c | Florent Chuffart | |
1777 | 6b5a528c | Florent Chuffart | |
1778 | 6b5a528c | Florent Chuffart | |
1779 | 6b5a528c | Florent Chuffart | |
1780 | 6b5a528c | Florent Chuffart | |
1781 | 6b5a528c | Florent Chuffart | |
1782 | 6b5a528c | Florent Chuffart | |
1783 | 6b5a528c | Florent Chuffart | |
1784 | 6b5a528c | Florent Chuffart | |
1785 | 6b5a528c | Florent Chuffart | |
1786 | 6b5a528c | Florent Chuffart | |
1787 | 6b5a528c | Florent Chuffart | |
1788 | 55c1cdff | Florent Chuffart | watch_samples = function(# Watching analysis of samples |
1789 | 6b5a528c | Florent Chuffart | ### This function allows to view analysis for a particuler region of the genome. |
1790 | 6b5a528c | Florent Chuffart | replicates, ##<< replicates under the form... |
1791 | 6b5a528c | Florent Chuffart | read_length, ##<< length of the reads |
1792 | 6b5a528c | Florent Chuffart | plot_ref_genome = TRUE, ##<< Plot (or not) reference genome. |
1793 | 6b5a528c | Florent Chuffart | plot_arrow_raw_reads = TRUE, ##<< Plot (or not) arrows for raw reads. |
1794 | 6b5a528c | Florent Chuffart | plot_arrow_nuc_reads = TRUE, ##<< Plot (or not) arrows for reads aasiocied to a nucleosome. |
1795 | 6b5a528c | Florent Chuffart | plot_squared_reads = TRUE, ##<< Plot (or not) reads in the square fashion. |
1796 | 6b5a528c | Florent Chuffart | plot_coverage = FALSE, ##<< Plot (or not) reads in the covergae fashion. fashion. |
1797 | 6b5a528c | Florent Chuffart | plot_gaussian_reads = TRUE, ##<< Plot (or not) gaussian model of a F anf R reads. |
1798 | 6b5a528c | Florent Chuffart | plot_gaussian_unified_reads = TRUE, ##<< Plot (or not) gaussian model of a nuc. |
1799 | 6b5a528c | Florent Chuffart | plot_ellipse_nucs = TRUE, ##<< Plot (or not) ellipse for a nuc. |
1800 | 7646593d | Florent Chuffart | change_col = TRUE, ##<< Change the color of each nucleosome. |
1801 | 6b5a528c | Florent Chuffart | plot_wp_nucs = TRUE, ##<< Plot (or not) cluster of nucs |
1802 | 3bfae55d | Florent Chuffart | plot_fuzzy_nucs = FALSE, ##<< Plot (or not) cluster of fuzzy |
1803 | 6b5a528c | Florent Chuffart | plot_wp_nuc_model = TRUE, ##<< Plot (or not) gaussian model for a cluster of nucs |
1804 | 9fdbfada | Florent Chuffart | plot_common_nucs = FALSE, ##<< Plot (or not) aligned reads. |
1805 | 9fdbfada | Florent Chuffart | plot_common_unrs = FALSE, ##<< Plot (or not) unaligned nucleosomal refgions (UNRs). |
1806 | 6b5a528c | Florent Chuffart | plot_wp_nucs_4_nonmnase = FALSE, ##<< Plot (or not) clusters for non inputs samples. |
1807 | 729c934e | Florent Chuffart | plot_chain = FALSE, ##<< Plot (or not) clusterised nuceosomes between mnase samples. |
1808 | 6e0010bc | Florent Chuffart | plot_sample_id = FALSE, ##<< Plot (or not) the sample id for each sample. |
1809 | 55c1cdff | Florent Chuffart | aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed. |
1810 | 6b5a528c | Florent Chuffart | aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed. |
1811 | 1d833b97 | Florent Chuffart | height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis. |
1812 | 6e0010bc | Florent Chuffart | main=NULL, ##<< main title of the produced plot |
1813 | 6e0010bc | Florent Chuffart | xlab=NULL, ##<< xlab of the produced plot |
1814 | 6e0010bc | Florent Chuffart | ylab="#reads (per million reads)", ##<< ylab of the produced plot |
1815 | 1d833b97 | Florent Chuffart | config=NULL ##<< GLOBAL config variable |
1816 | 6b5a528c | Florent Chuffart | ){ |
1817 | 6b5a528c | Florent Chuffart | returned_list = list() |
1818 | 6b5a528c | Florent Chuffart | # Computing global display parameters |
1819 | 6b5a528c | Florent Chuffart | if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) { |
1820 | 6b5a528c | Florent Chuffart | x_min_glo = replicates[[1]][[1]]$roi[["begin"]] |
1821 | 6b5a528c | Florent Chuffart | x_max_glo = replicates[[1]][[1]]$roi[["end"]] |
1822 | 6b5a528c | Florent Chuffart | } else { |
1823 | 6b5a528c | Florent Chuffart | x_min_glo = - replicates[[1]][[1]]$roi[["begin"]] |
1824 | 6b5a528c | Florent Chuffart | x_max_glo = - replicates[[1]][[1]]$roi[["end"]] |
1825 | 6b5a528c | Florent Chuffart | } |
1826 | 6b5a528c | Florent Chuffart | base_glo = 0 |
1827 | 6b5a528c | Florent Chuffart | nb_rank_glo = 0 |
1828 | 6b5a528c | Florent Chuffart | for (samples in replicates) { |
1829 | 6b5a528c | Florent Chuffart | nb_rank_glo = nb_rank_glo + length(samples) |
1830 | 6b5a528c | Florent Chuffart | } |
1831 | 6b5a528c | Florent Chuffart | ylim_glo = c(base_glo, base_glo + height * nb_rank_glo) |
1832 | 6b5a528c | Florent Chuffart | y_min_glo = min(ylim_glo) |
1833 | 6b5a528c | Florent Chuffart | y_max_glo = max(ylim_glo) |
1834 | 6b5a528c | Florent Chuffart | delta_y_glo = y_max_glo - y_min_glo |
1835 | 6b5a528c | Florent Chuffart | # Plot main frame |
1836 | 6e0010bc | Florent Chuffart | if (is.null(xlab)) { |
1837 | 6e0010bc | Florent Chuffart | xlab = paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr) |
1838 | 6e0010bc | Florent Chuffart | } |
1839 | 6e0010bc | Florent Chuffart | plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab=ylab, xlab=xlab, main=main ) |
1840 | 6b5a528c | Florent Chuffart | axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2)) |
1841 | 6b5a528c | Florent Chuffart | # Go |
1842 | 6b5a528c | Florent Chuffart | replicates_wp_nucs = list() |
1843 | 9fdbfada | Florent Chuffart | wp_maps = list() |
1844 | 9fdbfada | Florent Chuffart | fuzzy_maps = list() |
1845 | 6b5a528c | Florent Chuffart | for (replicate_rank in 1:length(replicates)) { |
1846 | 6b5a528c | Florent Chuffart | # Computing replicate parameters |
1847 | 6b5a528c | Florent Chuffart | nb_rank = length(samples) |
1848 | 6b5a528c | Florent Chuffart | base = (replicate_rank-1) * height * nb_rank |
1849 | 6b5a528c | Florent Chuffart | ylim = c(base, base + height * nb_rank) |
1850 | 6b5a528c | Florent Chuffart | y_min = min(ylim) |
1851 | 6b5a528c | Florent Chuffart | y_max = max(ylim) |
1852 | 6b5a528c | Florent Chuffart | delta_y = y_max - y_min |
1853 | 6b5a528c | Florent Chuffart | samples = replicates[[replicate_rank]] |
1854 | 6b5a528c | Florent Chuffart | for (sample_rank in 1:length(samples)) { |
1855 | 6b5a528c | Florent Chuffart | # computing sample parameters |
1856 | 6b5a528c | Florent Chuffart | sample = samples[[sample_rank]] |
1857 | 6b5a528c | Florent Chuffart | y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank |
1858 | 6e0010bc | Florent Chuffart | if (plot_sample_id) { |
1859 | 6e0010bc | Florent Chuffart | text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep="")) |
1860 | 6e0010bc | Florent Chuffart | } |
1861 | 6b5a528c | Florent Chuffart | |
1862 | 6b5a528c | Florent Chuffart | if (sample$roi[["begin"]] < sample$roi[["end"]]) { |
1863 | 6b5a528c | Florent Chuffart | x_min = sample$roi[["begin"]] |
1864 | 6b5a528c | Florent Chuffart | x_max = sample$roi[["end"]] |
1865 | 6b5a528c | Florent Chuffart | } else { |
1866 | 6b5a528c | Florent Chuffart | x_min = - sample$roi[["begin"]] |
1867 | 6b5a528c | Florent Chuffart | x_max = - sample$roi[["end"]] |
1868 | 5bfac5a3 | Florent Chuffart | } |
1869 | 6b5a528c | Florent Chuffart | shift = x_min_glo - x_min |
1870 | 6b5a528c | Florent Chuffart | # Plot Genome seq |
1871 | 6b5a528c | Florent Chuffart | if (plot_ref_genome) { |
1872 | 6b5a528c | Florent Chuffart | text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier") |
1873 | 6b5a528c | Florent Chuffart | } |
1874 | 6b5a528c | Florent Chuffart | # Plot reads |
1875 | 6b5a528c | Florent Chuffart | reads = sample$inputs |
1876 | 5bfac5a3 | Florent Chuffart | signs = sign_from_strand(reads[,3]) |
1877 | 6b5a528c | Florent Chuffart | if (plot_arrow_raw_reads) { |
1878 | 55c1cdff | Florent Chuffart | arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, |
1879 | 5bfac5a3 | Florent Chuffart | col=1, length=0.15/nb_rank) |
1880 | 6b5a528c | Florent Chuffart | } |
1881 | 5bfac5a3 | Florent Chuffart | if (plot_squared_reads) { |
1882 | 6b5a528c | Florent Chuffart | # require(plotrix) |
1883 | 6b5a528c | Florent Chuffart | rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift, y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0) |
1884 | 6b5a528c | Florent Chuffart | } |
1885 | 5bfac5a3 | Florent Chuffart | if (plot_coverage) { |
1886 | a52f85d7 | Florent Chuffart | if (length(reads[,1]) != 0) { |
1887 | a52f85d7 | Florent Chuffart | step_h = sign(x_min) * signs * reads[,4] |
1888 | a52f85d7 | Florent Chuffart | step_b = sign(x_min) * reads[,2] + shift |
1889 | a52f85d7 | Florent Chuffart | step_e = sign(x_min) * (reads[,2] + signs * 150) + shift |
1890 | 55c1cdff | Florent Chuffart | steps_x = min(step_b, step_e):max(step_b, step_e) |
1891 | a52f85d7 | Florent Chuffart | steps_y = rep(0, length(steps_x)) |
1892 | 55c1cdff | Florent Chuffart | for (i in 1:length(step_h)) { |
1893 | a52f85d7 | Florent Chuffart | steps_y[which(steps_x==min(step_b[i], step_e[i]))] = steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i]) |
1894 | a52f85d7 | Florent Chuffart | steps_y[which(steps_x==max(step_b[i], step_e[i]))] = steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i]) |
1895 | a52f85d7 | Florent Chuffart | } |
1896 | a52f85d7 | Florent Chuffart | tmp_index = which(steps_y != 0) |
1897 | a52f85d7 | Florent Chuffart | steps_x = steps_x[tmp_index] |
1898 | a52f85d7 | Florent Chuffart | steps_y = steps_y[tmp_index] |
1899 | a52f85d7 | Florent Chuffart | tmp_current_level = 0 |
1900 | 55c1cdff | Florent Chuffart | for (i in 1:length(steps_y)) { |
1901 | a52f85d7 | Florent Chuffart | steps_y[i] = tmp_current_level + steps_y[i] |
1902 | a52f85d7 | Florent Chuffart | tmp_current_level = steps_y[i] |
1903 | a52f85d7 | Florent Chuffart | } |
1904 | a52f85d7 | Florent Chuffart | steps_y = c(0, steps_y) |
1905 | a52f85d7 | Florent Chuffart | steps_y = steps_y * 1000000/sample$total_reads |
1906 | a52f85d7 | Florent Chuffart | } else { |
1907 | a52f85d7 | Florent Chuffart | steps_y = c(0, 0, 0) |
1908 | 1e310114 | Florent Chuffart | steps_x = c(x_min, x_max) |
1909 | 6b5a528c | Florent Chuffart | } |
1910 | 0d1475ac | Florent Chuffart | # print(steps_x) |
1911 | 0d1475ac | Florent Chuffart | # print(steps_y) |
1912 | 6b5a528c | Florent Chuffart | lines(stepfun(steps_x, steps_y + y_lev), pch="") |
1913 | 6b5a528c | Florent Chuffart | abline(y_lev,0) |
1914 | 6b5a528c | Florent Chuffart | returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y) |
1915 | 6b5a528c | Florent Chuffart | } |
1916 | 6b5a528c | Florent Chuffart | # Plot nucs |
1917 | 6b5a528c | Florent Chuffart | if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) { |
1918 | 6b5a528c | Florent Chuffart | nucs = sample$outputs |
1919 | 6b5a528c | Florent Chuffart | if (length(nucs$center) > 0) { |
1920 | 6b5a528c | Florent Chuffart | col = 1 |
1921 | 6b5a528c | Florent Chuffart | for (i in 1:length(nucs$center)) { |
1922 | 7646593d | Florent Chuffart | if (change_col) { |
1923 | 55c1cdff | Florent Chuffart | col = col + 1 |
1924 | 6e0010bc | Florent Chuffart | } else { |
1925 | 6e0010bc | Florent Chuffart | col = "blue" |
1926 | 6e0010bc | Florent Chuffart | } |
1927 | 6b5a528c | Florent Chuffart | nuc = nucs[i,] |
1928 | 6b5a528c | Florent Chuffart | involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width) |
1929 | 6b5a528c | Florent Chuffart | involved_signs = apply(t(involved_reads[,3]), 2, function(strand) { if (strand == "F") return(1) else return(-1)}) |
1930 | 55c1cdff | Florent Chuffart | total_involved_reads = sum(involved_reads[,4]) |
1931 | 6b5a528c | Florent Chuffart | if (plot_arrow_nuc_reads ) { |
1932 | 55c1cdff | Florent Chuffart | arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, |
1933 | 5bfac5a3 | Florent Chuffart | col=col, length=0.15/nb_rank) |
1934 | 6b5a528c | Florent Chuffart | } |
1935 | 6b5a528c | Florent Chuffart | if (plot_gaussian_reads | plot_gaussian_unified_reads) { |
1936 | 6b5a528c | Florent Chuffart | flatted_reads = flat_reads(involved_reads, nuc$width) |
1937 | 6b5a528c | Florent Chuffart | delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width) |
1938 | 6b5a528c | Florent Chuffart | } |
1939 | 6b5a528c | Florent Chuffart | if (plot_gaussian_reads ) { |
1940 | 6b5a528c | Florent Chuffart | flatted_reads = flat_reads(involved_reads, nuc$width) |
1941 | 6b5a528c | Florent Chuffart | delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width) |
1942 | 883439d6 | Florent Chuffart | lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col) |
1943 | 883439d6 | Florent Chuffart | lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col) |
1944 | 6b5a528c | Florent Chuffart | } |
1945 | 6b5a528c | Florent Chuffart | if (plot_gaussian_unified_reads ) { |
1946 | 6e0010bc | Florent Chuffart | lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=1) |
1947 | 6b5a528c | Florent Chuffart | } |
1948 | 6b5a528c | Florent Chuffart | if (plot_ellipse_nucs) { |
1949 | 6b5a528c | Florent Chuffart | # require(plotrix) |
1950 | 55c1cdff | Florent Chuffart | draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col) |
1951 | 6b5a528c | Florent Chuffart | } |
1952 | 6b5a528c | Florent Chuffart | } |
1953 | 6b5a528c | Florent Chuffart | } else { |
1954 | 6b5a528c | Florent Chuffart | print("WARNING! No nucs to print.") |
1955 | 6b5a528c | Florent Chuffart | } |
1956 | 6b5a528c | Florent Chuffart | } |
1957 | 6b5a528c | Florent Chuffart | } |
1958 | 55c1cdff | Florent Chuffart | |
1959 | 6b5a528c | Florent Chuffart | # Plot wp nucs |
1960 | 9fdbfada | Florent Chuffart | if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) { |
1961 | 6b5a528c | Florent Chuffart | if (samples[[1]]$marker == "Mnase_Seq") { |
1962 | 55c1cdff | Florent Chuffart | if (is.null(aggregated_intra_strain_nucs)) { |
1963 | 5bfac5a3 | Florent Chuffart | wp_nucs = aggregate_intra_strain_nucs(samples)[[1]] |
1964 | 6b5a528c | Florent Chuffart | } else { |
1965 | 6b5a528c | Florent Chuffart | wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]] |
1966 | 6b5a528c | Florent Chuffart | } |
1967 | 6b5a528c | Florent Chuffart | } else { |
1968 | 6b5a528c | Florent Chuffart | wp_nucs = replicates_wp_nucs[[replicate_rank-2]] |
1969 | 6b5a528c | Florent Chuffart | } |
1970 | 729c934e | Florent Chuffart | if (plot_chain) { |
1971 | 55c1cdff | Florent Chuffart | tf_nucs = lapply(wp_nucs, function(nuc) { |
1972 | 729c934e | Florent Chuffart | bar = apply(t(nuc$nucs), 2, function(tmp_nuc){ |
1973 | 729c934e | Florent Chuffart | tmp_nuc = tmp_nuc[[1]] |
1974 | 729c934e | Florent Chuffart | tmp_nuc$inputs = NULL |
1975 | 729c934e | Florent Chuffart | tmp_nuc$original_reads = NULL |
1976 | 729c934e | Florent Chuffart | tmp_nuc$wp = nuc$wp |
1977 | 729c934e | Florent Chuffart | # print(tmp_nuc) |
1978 | 729c934e | Florent Chuffart | return(tmp_nuc) |
1979 | 55c1cdff | Florent Chuffart | }) |
1980 | dc58ee6c | Florent Chuffart | return(do.call(rbind, bar)) |
1981 | dc58ee6c | Florent Chuffart | }) |
1982 | 729c934e | Florent Chuffart | tf_nucs = data.frame(do.call(rbind, tf_nucs)) |
1983 | 55c1cdff | Florent Chuffart | tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2 |
1984 | dc58ee6c | Florent Chuffart | tmp_y = y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank |
1985 | dc58ee6c | Florent Chuffart | tmp_y_prev = tmp_y[-length(tmp_y)] |
1986 | dc58ee6c | Florent Chuffart | tmp_y_next = tmp_y[-1] |
1987 | dc58ee6c | Florent Chuffart | tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2 |
1988 | dc58ee6c | Florent Chuffart | tmp_track = unlist(tf_nucs$track) |
1989 | dc58ee6c | Florent Chuffart | tmp_track_prev = tmp_track[-length(tmp_track)] |
1990 | dc58ee6c | Florent Chuffart | tmp_track_next = tmp_track[-1] |
1991 | 6e0010bc | Florent Chuffart | # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25 |
1992 | e5603c3f | Florent Chuffart | if (is.null(config$TRACK_LLR_OFFSET)) { |
1993 | e5603c3f | Florent Chuffart | config$TRACK_LLR_OFFSET = 0 |
1994 | 6e0010bc | Florent Chuffart | } |
1995 | e5603c3f | Florent Chuffart | tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LLR_OFFSET * 25 |
1996 | dc58ee6c | Florent Chuffart | tmp_x_prev = tmp_x[-length(tmp_x)] |
1997 | dc58ee6c | Florent Chuffart | tmp_x_next = tmp_x[-1] |
1998 | dc58ee6c | Florent Chuffart | need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50}) |
1999 | dc58ee6c | Florent Chuffart | tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift |
2000 | 7e2d37e1 | Florent Chuffart | tmp_llr_inter =signif(unlist(tf_nucs$llr_score)[-1], 2) |
2001 | 55c1cdff | Florent Chuffart | new_tmp_x = c() |
2002 | 55c1cdff | Florent Chuffart | new_tmp_y = c() |
2003 | dc58ee6c | Florent Chuffart | index_odd = 1:length(tmp_x) * 2 - 1 |
2004 | 55c1cdff | Florent Chuffart | index_even = (1:(length(tmp_x) - 1)) * 2 |
2005 | 55c1cdff | Florent Chuffart | new_tmp_x[index_odd] = tmp_x |
2006 | dc58ee6c | Florent Chuffart | new_tmp_y[index_odd] = tmp_y |
2007 | dc58ee6c | Florent Chuffart | new_tmp_x[index_even] = tmp_x_inter |
2008 | dc58ee6c | Florent Chuffart | new_tmp_y[index_even] = tmp_y_inter |
2009 | ec2936ea | Florent Chuffart | lines(new_tmp_x , new_tmp_y, lwd=2) |
2010 | dc58ee6c | Florent Chuffart | points(tmp_x, tmp_y, cex=4, pch=16, col="white") |
2011 | ec2936ea | Florent Chuffart | points(tmp_x, tmp_y, cex=4, lwd=2) |
2012 | dc58ee6c | Florent Chuffart | text(tmp_x, tmp_y, 1:nrow(tf_nucs)) |
2013 | e5603c3f | Florent Chuffart | if (is.null(config$LEGEND_LLR_POS)) { |
2014 | 6e0010bc | Florent Chuffart | pos = 2 |
2015 | 6e0010bc | Florent Chuffart | } else { |
2016 | e5603c3f | Florent Chuffart | pos = config$LEGEND_LLR_POS |
2017 | 6e0010bc | Florent Chuffart | } |
2018 | 7e2d37e1 | Florent Chuffart | col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")}) |
2019 | 7e2d37e1 | Florent Chuffart | text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) |
2020 | 37b11e9b | Florent Chuffart | |
2021 | 729c934e | Florent Chuffart | } |
2022 | 55c1cdff | Florent Chuffart | |
2023 | 9fdbfada | Florent Chuffart | if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) { |
2024 | 729c934e | Florent Chuffart | replicates_wp_nucs[[replicate_rank]] = wp_nucs |
2025 | 9fdbfada | Florent Chuffart | strain = samples[[1]]$strain |
2026 | cc54d805 | Florent Chuffart | nb_tracks = length(replicates[[1]]) |
2027 | cc54d805 | Florent Chuffart | # print(paste("**************"), nb_tracks) |
2028 | d973538c | Florent Chuffart | wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo", nb_tracks) |
2029 | 9fdbfada | Florent Chuffart | fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config) |
2030 | 9fdbfada | Florent Chuffart | |
2031 | 9fdbfada | Florent Chuffart | if (plot_fuzzy_nucs) { |
2032 | 9fdbfada | Florent Chuffart | fuzzy_map = fuzzy_maps[[strain]] |
2033 | 4016229d | Florent Chuffart | if (!is.null(fuzzy_map)) { |
2034 | 4016229d | Florent Chuffart | if (nrow(fuzzy_map) > 0) { |
2035 | 4016229d | Florent Chuffart | rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1) |
2036 | 4016229d | Florent Chuffart | } |
2037 | d492bca4 | Florent Chuffart | } |
2038 | 9fdbfada | Florent Chuffart | } |
2039 | 9fdbfada | Florent Chuffart | |
2040 | 9fdbfada | Florent Chuffart | if (plot_wp_nucs) { |
2041 | 9fdbfada | Florent Chuffart | for (wp_nuc in wp_nucs) { |
2042 | 9fdbfada | Florent Chuffart | if (wp_nuc$wp){ |
2043 | 9fdbfada | Florent Chuffart | rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1) |
2044 | 9fdbfada | Florent Chuffart | if (plot_wp_nuc_model) { |
2045 | 9fdbfada | Florent Chuffart | all_original_reads = c() |
2046 | 9fdbfada | Florent Chuffart | for(initial_nuc in wp_nuc$nucs) { |
2047 | 9fdbfada | Florent Chuffart | all_original_reads = c(all_original_reads, initial_nuc$original_reads) |
2048 | 9fdbfada | Florent Chuffart | } |
2049 | 9fdbfada | Florent Chuffart | delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound |
2050 | 9fdbfada | Florent Chuffart | lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1) |
2051 | 9fdbfada | Florent Chuffart | } |
2052 | 729c934e | Florent Chuffart | } |
2053 | 729c934e | Florent Chuffart | } |
2054 | 729c934e | Florent Chuffart | } |
2055 | 729c934e | Florent Chuffart | } |
2056 | 6b5a528c | Florent Chuffart | } |
2057 | 5bfac5a3 | Florent Chuffart | } |
2058 | 55c1cdff | Florent Chuffart | |
2059 | 5bfac5a3 | Florent Chuffart | if (plot_common_nucs) { |
2060 | 9fdbfada | Florent Chuffart | if (is.null(aligned_inter_strain_nucs)) { |
2061 | 9fdbfada | Florent Chuffart | aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]] |
2062 | b69443b5 | Florent Chuffart | if (!is.null(aligned_inter_strain_nucs)) { |
2063 | 4016229d | Florent Chuffart | aligned_inter_strain_nucs$cur_index = "foo" |
2064 | b69443b5 | Florent Chuffart | } |
2065 | 9fdbfada | Florent Chuffart | } |
2066 | 55c1cdff | Florent Chuffart | |
2067 | 9fdbfada | Florent Chuffart | #Plot common wp nucs |
2068 | 9fdbfada | Florent Chuffart | mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list() |
2069 | 9fdbfada | Florent Chuffart | for (replicate_rank in 1:length(replicates)) { |
2070 | 9fdbfada | Florent Chuffart | nb_rank[[replicate_rank]] = length(samples) |
2071 | 9fdbfada | Florent Chuffart | base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]] |
2072 | 9fdbfada | Florent Chuffart | ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]]) |
2073 | 9fdbfada | Florent Chuffart | y_min[[replicate_rank]] = min(ylim[[replicate_rank]]) |
2074 | 9fdbfada | Florent Chuffart | y_max[[replicate_rank]] = max(ylim[[replicate_rank]]) |
2075 | 9fdbfada | Florent Chuffart | delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]] |
2076 | 9fdbfada | Florent Chuffart | mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2 |
2077 | 9fdbfada | Florent Chuffart | |
2078 | 9fdbfada | Florent Chuffart | samples = replicates[[replicate_rank]] |
2079 | 9fdbfada | Florent Chuffart | for (sample_rank in 1:length(samples)) { |
2080 | 9fdbfada | Florent Chuffart | sample = samples[[sample_rank]] |
2081 | 9fdbfada | Florent Chuffart | y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]] |
2082 | 9fdbfada | Florent Chuffart | if (sample$roi[["begin"]] < sample$roi[["end"]]) { |
2083 | 9fdbfada | Florent Chuffart | x_min[[replicate_rank]] = sample$roi[["begin"]] |
2084 | 9fdbfada | Florent Chuffart | x_max[[replicate_rank]] = sample$roi[["end"]] |
2085 | 9fdbfada | Florent Chuffart | } else { |
2086 | 9fdbfada | Florent Chuffart | x_min[[replicate_rank]] = - sample$roi[["begin"]] |
2087 | 9fdbfada | Florent Chuffart | x_max[[replicate_rank]] = - sample$roi[["end"]] |
2088 | 6b5a528c | Florent Chuffart | } |
2089 | 9fdbfada | Florent Chuffart | shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]] |
2090 | 55c1cdff | Florent Chuffart | } |
2091 | 9fdbfada | Florent Chuffart | } |
2092 | b69443b5 | Florent Chuffart | print(aligned_inter_strain_nucs) |
2093 | b69443b5 | Florent Chuffart | if (!is.null(aligned_inter_strain_nucs)) { |
2094 | b69443b5 | Florent Chuffart | for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) { |
2095 | b69443b5 | Florent Chuffart | inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,] |
2096 | b69443b5 | Florent Chuffart | tmp_xs = tmp_ys = c() |
2097 | b69443b5 | Florent Chuffart | for (replicate_rank in 1:length(replicates)) { |
2098 | b69443b5 | Florent Chuffart | samples = replicates[[replicate_rank]] |
2099 | b69443b5 | Florent Chuffart | strain = samples[[1]]$strain |
2100 | b69443b5 | Florent Chuffart | tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]]) |
2101 | b69443b5 | Florent Chuffart | tmp_ys = c(tmp_ys, mid_y[[replicate_rank]]) |
2102 | b69443b5 | Florent Chuffart | } |
2103 | 37b11e9b | Florent Chuffart | lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]])*8, cex=dev.size()[1]*400/(x_max[[1]]-x_min[[1]]), pch=19) |
2104 | 55c1cdff | Florent Chuffart | } |
2105 | 9fdbfada | Florent Chuffart | } |
2106 | 9fdbfada | Florent Chuffart | |
2107 | 9fdbfada | Florent Chuffart | if (plot_common_unrs) { |
2108 | 9fdbfada | Florent Chuffart | combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain) |
2109 | 9fdbfada | Florent Chuffart | roi = as.list(samples[[1]]$roi) |
2110 | 4016229d | Florent Chuffart | cur_index = "foo" |
2111 | 9fdbfada | Florent Chuffart | common_nuc_results = list() |
2112 | 9fdbfada | Florent Chuffart | common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs |
2113 | 4016229d | Florent Chuffart | unrs = get_unrs(combi, roi, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) |
2114 | ec2936ea | Florent Chuffart | rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], border=4, lwd=10, col=adjustcolor(4, alpha.f = 0.05)) |
2115 | 9fdbfada | Florent Chuffart | } |
2116 | 9fdbfada | Florent Chuffart | |
2117 | 6b5a528c | Florent Chuffart | } |
2118 | 6b5a528c | Florent Chuffart | return(returned_list) |
2119 | 6b5a528c | Florent Chuffart | } |
2120 | 6b5a528c | Florent Chuffart | |
2121 | 6b5a528c | Florent Chuffart | |
2122 | 6b5a528c | Florent Chuffart | |
2123 | 9fdbfada | Florent Chuffart | get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain. |
2124 | 9fdbfada | Florent Chuffart | ### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions. |
2125 | 0a13b5e3 | Florent Chuffart | wp_map, ##<< Well positionned nucleosomes map. |
2126 | 9fdbfada | Florent Chuffart | roi, ##<< The region of interest. |
2127 | 9fdbfada | Florent Chuffart | strain, ##<< The strain we want to extracvt the fuzzy map. |
2128 | 9fdbfada | Florent Chuffart | config=NULL ##<< GLOBAL config variable. |
2129 | 9fdbfada | Florent Chuffart | ) { |
2130 | 9fdbfada | Florent Chuffart | fuzzy_map = wp_map[wp_map$wp==0, ] |
2131 | 9fdbfada | Florent Chuffart | if (nrow(fuzzy_map) > 0) { |
2132 | 9fdbfada | Florent Chuffart | fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,]) |
2133 | 9fdbfada | Florent Chuffart | if (!is.null(fuzzy_map)) { |
2134 | 9fdbfada | Florent Chuffart | fuzzy_map = union_regions(fuzzy_map) |
2135 | 9fdbfada | Florent Chuffart | fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config) |
2136 | 9fdbfada | Florent Chuffart | } |
2137 | 9fdbfada | Florent Chuffart | } |
2138 | 9fdbfada | Florent Chuffart | return(fuzzy_map) |
2139 | 9fdbfada | Florent Chuffart | } |
2140 | 9fdbfada | Florent Chuffart | |
2141 | 9fdbfada | Florent Chuffart | |
2142 | 9fdbfada | Florent Chuffart | |
2143 | 9fdbfada | Florent Chuffart | |
2144 | 9fdbfada | Florent Chuffart | |
2145 | 9fdbfada | Florent Chuffart | get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs). |
2146 | 9fdbfada | Florent Chuffart | ### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline. |
2147 | 9fdbfada | Florent Chuffart | combi, ##<< The strain combination to consider. |
2148 | 9fdbfada | Florent Chuffart | roi, ##<< The region of interest. |
2149 | 4016229d | Florent Chuffart | cur_index, ##<< The region of interest index. |
2150 | 9fdbfada | Florent Chuffart | wp_maps, ##<< Well positionned nucleosomes maps. |
2151 | 9fdbfada | Florent Chuffart | fuzzy_maps, ##<< Fuzzy nucleosomes maps. |
2152 | 9fdbfada | Florent Chuffart | common_nuc_results, ##<< Common wp nuc maps |
2153 | 9fdbfada | Florent Chuffart | config=NULL ##<< GLOBAL config variable |
2154 | 9fdbfada | Florent Chuffart | ) { |
2155 | 4016229d | Florent Chuffart | # print(cur_index) |
2156 | 9fdbfada | Florent Chuffart | |
2157 | 9fdbfada | Florent Chuffart | tmp_combi_key = paste(combi[1], combi[2], sep="_") |
2158 | 9fdbfada | Florent Chuffart | tmp_common_nucs = common_nuc_results[[tmp_combi_key]] |
2159 | 4016229d | Florent Chuffart | tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$cur_index == cur_index, ] |
2160 | 9fdbfada | Florent Chuffart | |
2161 | f0703205 | Florent Chuffart | # print(paste("Dealing with unr from", combi[1])) |
2162 | 9fdbfada | Florent Chuffart | tmp_fuzzy = fuzzy_maps[[combi[1]]] |
2163 | 4016229d | Florent Chuffart | tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ] |
2164 | 9fdbfada | Florent Chuffart | tmp_wp = wp_maps[[combi[1]]] |
2165 | 9fdbfada | Florent Chuffart | tmp_wp = tmp_wp[tmp_wp$wp==1,] |
2166 | 4016229d | Florent Chuffart | tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ] |
2167 | 9fdbfada | Florent Chuffart | # Let's go! |
2168 | 9fdbfada | Florent Chuffart | tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) { |
2169 | 9fdbfada | Florent Chuffart | if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) { |
2170 | 9fdbfada | Florent Chuffart | return(NULL) |
2171 | 9fdbfada | Florent Chuffart | } else { |
2172 | 9fdbfada | Florent Chuffart | return (index_nuc) |
2173 | 9fdbfada | Florent Chuffart | } |
2174 | 9fdbfada | Florent Chuffart | })) |
2175 | 9fdbfada | Florent Chuffart | tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ] |
2176 | 9fdbfada | Florent Chuffart | tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4]) |
2177 | 9fdbfada | Florent Chuffart | if (length(tmp_unr) != 0) { |
2178 | 9fdbfada | Florent Chuffart | tmp_unr = union_regions(tmp_unr) |
2179 | 9fdbfada | Florent Chuffart | } |
2180 | 9fdbfada | Florent Chuffart | tmp_unr_nucs_1 = tmp_unr |
2181 | 9fdbfada | Florent Chuffart | if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)} |
2182 | 9fdbfada | Florent Chuffart | agg_unr_1 = tmp_unr_nucs_1 |
2183 | 9fdbfada | Florent Chuffart | |
2184 | f0703205 | Florent Chuffart | # print(paste("Dealing with unr from ", combi[2])) |
2185 | 9fdbfada | Florent Chuffart | tmp_fuzzy = fuzzy_maps[[combi[2]]] |
2186 | 4016229d | Florent Chuffart | tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ] |
2187 | 9fdbfada | Florent Chuffart | tmp_wp = wp_maps[[combi[2]]] |
2188 | 9fdbfada | Florent Chuffart | tmp_wp = tmp_wp[tmp_wp$wp==1,] |
2189 | 4016229d | Florent Chuffart | tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ] |
2190 | 9fdbfada | Florent Chuffart | # Let's go! |
2191 | 9fdbfada | Florent Chuffart | tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) { |
2192 | 9fdbfada | Florent Chuffart | if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) { |
2193 | 9fdbfada | Florent Chuffart | return(NULL) |
2194 | 9fdbfada | Florent Chuffart | } else { |
2195 | 9fdbfada | Florent Chuffart | return (index_nuc) |
2196 | 9fdbfada | Florent Chuffart | } |
2197 | 9fdbfada | Florent Chuffart | })) |
2198 | 9fdbfada | Florent Chuffart | tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ] |
2199 | 9fdbfada | Florent Chuffart | tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4]) |
2200 | 9fdbfada | Florent Chuffart | if (length(tmp_unr) != 0) { |
2201 | 9fdbfada | Florent Chuffart | tmp_unr = union_regions(tmp_unr) |
2202 | 9fdbfada | Florent Chuffart | } |
2203 | 9fdbfada | Florent Chuffart | tmp_unr_nucs_2 = tmp_unr |
2204 | 9fdbfada | Florent Chuffart | if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)} |
2205 | 9fdbfada | Florent Chuffart | agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config) |
2206 | 4016229d | Florent Chuffart | tr_agg_unr_2 = translate_regions(agg_unr_2, combi, cur_index, roi=roi, config=config) |
2207 | 9fdbfada | Florent Chuffart | tr_agg_unr_2 = union_regions(tr_agg_unr_2) |
2208 | 9fdbfada | Florent Chuffart | |
2209 | f0703205 | Florent Chuffart | # print("Dealing with unr from both...") |
2210 | 9fdbfada | Florent Chuffart | all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2)) |
2211 | 9fdbfada | Florent Chuffart | |
2212 | f0703205 | Florent Chuffart | # print(paste("Dealing with wp from", combi[1])) |
2213 | 9fdbfada | Florent Chuffart | tmp_wp = wp_maps[[combi[1]]] |
2214 | 9fdbfada | Florent Chuffart | tmp_wp = tmp_wp[tmp_wp$wp==1,] |
2215 | 4016229d | Florent Chuffart | tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ] |
2216 | 9fdbfada | Florent Chuffart | # Let's go! |
2217 | 9fdbfada | Florent Chuffart | tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) { |
2218 | 9fdbfada | Florent Chuffart | if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) { |
2219 | 9fdbfada | Florent Chuffart | return (index_nuc) |
2220 | 9fdbfada | Florent Chuffart | } else { |
2221 | 9fdbfada | Florent Chuffart | return(NULL) |
2222 | 9fdbfada | Florent Chuffart | } |
2223 | 9fdbfada | Florent Chuffart | })) |
2224 | 9fdbfada | Florent Chuffart | wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ] |
2225 | 9fdbfada | Florent Chuffart | |
2226 | f0703205 | Florent Chuffart | # print(paste("Dealing with wp from", combi[2])) |
2227 | 9fdbfada | Florent Chuffart | tmp_wp = wp_maps[[combi[2]]] |
2228 | 9fdbfada | Florent Chuffart | tmp_wp = tmp_wp[tmp_wp$wp==1,] |
2229 | 4016229d | Florent Chuffart | tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ] |
2230 | 9fdbfada | Florent Chuffart | # Let's go! |
2231 | 9fdbfada | Florent Chuffart | tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) { |
2232 | 9fdbfada | Florent Chuffart | if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) { |
2233 | 9fdbfada | Florent Chuffart | return (index_nuc) |
2234 | 9fdbfada | Florent Chuffart | } else { |
2235 | 9fdbfada | Florent Chuffart | return(NULL) |
2236 | 9fdbfada | Florent Chuffart | } |
2237 | 9fdbfada | Florent Chuffart | })) |
2238 | 9fdbfada | Florent Chuffart | wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ] |
2239 | 9fdbfada | Florent Chuffart | wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config) |
2240 | 9fdbfada | Florent Chuffart | if (nrow(wp_nucs_2) == 0) { |
2241 | 9fdbfada | Florent Chuffart | tr_wp_nucs_2 = wp_nucs_2 |
2242 | 9fdbfada | Florent Chuffart | } else { |
2243 | 4016229d | Florent Chuffart | tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, cur_index, roi=roi, config=config) |
2244 | 9fdbfada | Florent Chuffart | } |
2245 | 9fdbfada | Florent Chuffart | |
2246 | f0703205 | Florent Chuffart | # print("Dealing with wp from both...") |
2247 | 9fdbfada | Florent Chuffart | all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2)) |
2248 | 9fdbfada | Florent Chuffart | |
2249 | f0703205 | Florent Chuffart | # print("Dealing with unr and wp...") |
2250 | 9fdbfada | Florent Chuffart | non_inter_unr = substract_region(all_unr, all_wp) |
2251 | 9fdbfada | Florent Chuffart | non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config) |
2252 | 9fdbfada | Florent Chuffart | if (is.null(non_inter_unr)) { return(NULL) } |
2253 | 9fdbfada | Florent Chuffart | non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound |
2254 | 37b11e9b | Florent Chuffart | min_unr_width = 75 |
2255 | 37b11e9b | Florent Chuffart | non_inter_unr = non_inter_unr[non_inter_unr$len >= min_unr_width,] |
2256 | 9fdbfada | Florent Chuffart | |
2257 | 9fdbfada | Florent Chuffart | non_inter_unr$index_nuc = 1:length(non_inter_unr[,1]) |
2258 | 9fdbfada | Florent Chuffart | return (non_inter_unr) |
2259 | 9fdbfada | Florent Chuffart | } |
2260 | 9fdbfada | Florent Chuffart | |
2261 | 9fdbfada | Florent Chuffart |