root / src / R / nucleominer.R @ 7646593d
Historique | Voir | Annoter | Télécharger (92,22 ko)
1 | 6b5a528c | Florent Chuffart | # get_content = function(# Get content from cached |
---|---|---|---|
2 | 6b5a528c | Florent Chuffart | # ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed. |
3 | 6b5a528c | Florent Chuffart | # obj, ##<< The object that we want its content. |
4 | 6b5a528c | Florent Chuffart | # ... ##<< Parameters that will be passed to my_read |
5 | 6b5a528c | Florent Chuffart | # ) { |
6 | 6b5a528c | Florent Chuffart | # UseMethod("get_content", obj) |
7 | 6b5a528c | Florent Chuffart | # ### Returns the cached content of the current object. |
8 | 6b5a528c | Florent Chuffart | # } |
9 | 6b5a528c | Florent Chuffart | # |
10 | 6b5a528c | Florent Chuffart | # get_content.default = structure( function(# Get content from cached |
11 | 6b5a528c | Florent Chuffart | # ### Acces to the cached content of a file via the global variable NM_CACHE. Load content in NM_CACHE if needed. |
12 | 6b5a528c | Florent Chuffart | # obj, ##<< The object that we want its content. |
13 | 6b5a528c | Florent Chuffart | # ... ##<< Parameters that will be passed to my_read |
14 | 6b5a528c | Florent Chuffart | # ) { |
15 | 6b5a528c | Florent Chuffart | # if (inherits(try(NM_CACHE,TRUE), "try-error") || is.null(NM_CACHE)) { |
16 | 6b5a528c | Florent Chuffart | # NM_CACHE <<- list() |
17 | 6b5a528c | Florent Chuffart | # } |
18 | 6b5a528c | Florent Chuffart | # if (is.null(NM_CACHE[[obj$filename]])) { |
19 | 6b5a528c | Florent Chuffart | # print(paste("Loading file ",obj$filename, sep="")) |
20 | 6b5a528c | Florent Chuffart | # tmp_content = my_read(obj, ...) |
21 | 6b5a528c | Florent Chuffart | # print("affect it...") |
22 | 6b5a528c | Florent Chuffart | # NM_CACHE[[obj$filename]] <<- tmp_content |
23 | 6b5a528c | Florent Chuffart | # print("done.") |
24 | 6b5a528c | Florent Chuffart | # } |
25 | 6b5a528c | Florent Chuffart | # return(NM_CACHE[[obj$filename]]) |
26 | 6b5a528c | Florent Chuffart | # ### Returns the cached content of the current object. |
27 | 6b5a528c | Florent Chuffart | # }, ex=function(){ |
28 | 6b5a528c | Florent Chuffart | # # Create a dataframe |
29 | 6b5a528c | Florent Chuffart | # df = NULL |
30 | 6b5a528c | Florent Chuffart | # df = dfadd(df, list(key1 = "value1", key2 = "value2")) |
31 | 6b5a528c | Florent Chuffart | # df = dfadd(df, list(key1 = "value1'", key2 = "value3'")) |
32 | 6b5a528c | Florent Chuffart | # # Dump it into tmp file |
33 | 6b5a528c | Florent Chuffart | # write.table(df, file="/tmp/tmp.dump.table.tmp") |
34 | 6b5a528c | Florent Chuffart | # # Load it into cache using feature. |
35 | 6b5a528c | Florent Chuffart | # cached_content_object = list(filename="/tmp/tmp.dump.table.tmp") |
36 | 6b5a528c | Florent Chuffart | # class(cached_content_object) = "table" |
37 | 6b5a528c | Florent Chuffart | # # First time it will be load into cache |
38 | 6b5a528c | Florent Chuffart | # print(get_content(cached_content_object)) |
39 | 6b5a528c | Florent Chuffart | # # Second time not |
40 | 6b5a528c | Florent Chuffart | # print(get_content(cached_content_object)) |
41 | 6b5a528c | Florent Chuffart | # |
42 | 6b5a528c | Florent Chuffart | # }) |
43 | 6b5a528c | Florent Chuffart | # |
44 | 6b5a528c | Florent Chuffart | # my_read = function( |
45 | 6b5a528c | Florent Chuffart | # ### Abstract my_read function. |
46 | 6b5a528c | Florent Chuffart | # obj, ...) { |
47 | 6b5a528c | Florent Chuffart | # UseMethod("my_read", obj) |
48 | 6b5a528c | Florent Chuffart | # } |
49 | 6b5a528c | Florent Chuffart | # |
50 | 6b5a528c | Florent Chuffart | # my_read.default = function( |
51 | 6b5a528c | Florent Chuffart | # ### Default my_read function. |
52 | 6b5a528c | Florent Chuffart | # obj, ...){ |
53 | 6b5a528c | Florent Chuffart | # stop(paste("ERROR, my_read is not defined for any Objects (file ", obj$filename," )", sep="")) |
54 | 6b5a528c | Florent Chuffart | # } |
55 | 6b5a528c | Florent Chuffart | # |
56 | 6b5a528c | Florent Chuffart | # my_read.fasta = function( |
57 | 6b5a528c | Florent Chuffart | # ### my_read function for fasta files. |
58 | 6b5a528c | Florent Chuffart | # obj, ...){ |
59 | 6b5a528c | Florent Chuffart | # # require(seqinr) |
60 | 6b5a528c | Florent Chuffart | # return(read.fasta(obj$filename, ...)) |
61 | 6b5a528c | Florent Chuffart | # } |
62 | 6b5a528c | Florent Chuffart | # |
63 | 6b5a528c | Florent Chuffart | # my_read.table = function( |
64 | 6b5a528c | Florent Chuffart | # ### my_read function for table files. |
65 | 6b5a528c | Florent Chuffart | # obj, ...){ |
66 | 6b5a528c | Florent Chuffart | # if (rev(unlist(strsplit(obj$filename, ".", fixed=TRUE)))[1] == "gz") { |
67 | 6b5a528c | Florent Chuffart | # return(read.table(file=gzfile(obj$filename), ...)) |
68 | 6b5a528c | Florent Chuffart | # } else { |
69 | 6b5a528c | Florent Chuffart | # return(read.table(file=obj$filename, ...)) |
70 | 6b5a528c | Florent Chuffart | # } |
71 | 6b5a528c | Florent Chuffart | # } |
72 | 6b5a528c | Florent Chuffart | # |
73 | 6b5a528c | Florent Chuffart | # my_read.cvs = function( |
74 | 6b5a528c | Florent Chuffart | # ### my_read function for cvs files. |
75 | 6b5a528c | Florent Chuffart | # obj, ...){ |
76 | 6b5a528c | Florent Chuffart | # return(read.csv(file=obj$filename, ...)) |
77 | 6b5a528c | Florent Chuffart | # } |
78 | 6b5a528c | Florent Chuffart | |
79 | 6b5a528c | Florent Chuffart | |
80 | 6b5a528c | Florent Chuffart | |
81 | 6b5a528c | Florent Chuffart | |
82 | 6b5a528c | Florent Chuffart | |
83 | 6b5a528c | Florent Chuffart | |
84 | 6b5a528c | Florent Chuffart | |
85 | 6b5a528c | Florent Chuffart | |
86 | 6b5a528c | Florent Chuffart | |
87 | 6b5a528c | Florent Chuffart | FDR = structure(function# False Discovery Rate |
88 | 6b5a528c | Florent Chuffart | ### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper |
89 | 6b5a528c | Florent Chuffart | ##author<< Gael Yvert, |
90 | 6b5a528c | Florent Chuffart | ( |
91 | 6b5a528c | Florent Chuffart | x, ##<< A vector x of independent p-values. |
92 | 6b5a528c | Florent Chuffart | FDR ##<< The specified FDR. |
93 | 6b5a528c | Florent Chuffart | ) { |
94 | 6b5a528c | Florent Chuffart | x <- sort(na.omit(x)) |
95 | 6b5a528c | Florent Chuffart | N = length(x) |
96 | 6b5a528c | Florent Chuffart | i = 1; |
97 | 6b5a528c | Florent Chuffart | while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR |
98 | 6b5a528c | Florent Chuffart | if (i == 1) |
99 | 6b5a528c | Florent Chuffart | return (NA) |
100 | 6b5a528c | Florent Chuffart | else |
101 | 6b5a528c | Florent Chuffart | return( x[i-1] ) |
102 | 6b5a528c | Florent Chuffart | ### Return the the corresponding cutoff. |
103 | 6b5a528c | Florent Chuffart | }, ex=function(){ |
104 | 6b5a528c | Florent Chuffart | print("example") |
105 | 6b5a528c | Florent Chuffart | }) |
106 | 6b5a528c | Florent Chuffart | |
107 | 6b5a528c | Florent Chuffart | |
108 | 6b5a528c | Florent Chuffart | |
109 | 6b5a528c | Florent Chuffart | lod_score_vecs = structure(function # Likelihood ratio |
110 | 6b5a528c | Florent Chuffart | ### Compute the likelihood log of two set of value from two models Vs. a unique model. |
111 | 6b5a528c | Florent Chuffart | ( |
112 | 6b5a528c | Florent Chuffart | x ,##<< First vector. |
113 | 6b5a528c | Florent Chuffart | y ##<< Second vector. |
114 | 6b5a528c | Florent Chuffart | ) { |
115 | 6b5a528c | Florent Chuffart | if (length(x) <=1 | length(y) <= 1) { |
116 | 6b5a528c | Florent Chuffart | return(NA) |
117 | 6b5a528c | Florent Chuffart | } |
118 | 6b5a528c | Florent Chuffart | meanX = mean(x) |
119 | 6b5a528c | Florent Chuffart | sdX = sd(x) |
120 | 6b5a528c | Florent Chuffart | meanY = mean(y) |
121 | 6b5a528c | Florent Chuffart | sdY = sd(y) |
122 | 6b5a528c | Florent Chuffart | meanXY = mean(c(x,y)) |
123 | 6b5a528c | Florent Chuffart | sdXY = sd(c(x,y)) |
124 | 6b5a528c | Florent Chuffart | llX = sum(log(dnorm(x,mean=meanX,sd=sdX))) |
125 | 6b5a528c | Florent Chuffart | llY = sum(log(dnorm(y,mean=meanY,sd=sdY))) |
126 | 6b5a528c | Florent Chuffart | llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY))) |
127 | 729c934e | Florent Chuffart | ratio = llX + llY - llXY |
128 | 6b5a528c | Florent Chuffart | return(ratio) |
129 | 6b5a528c | Florent Chuffart | ### Returns the likelihood ratio. |
130 | 6b5a528c | Florent Chuffart | }, ex=function(){ |
131 | 6b5a528c | Florent Chuffart | # LOD score for 2 set of values |
132 | 6b5a528c | Florent Chuffart | mean1=5; sd1=2; card2 = 250 |
133 | 6b5a528c | Florent Chuffart | mean2=6; sd2=3; card1 = 200 |
134 | 6b5a528c | Florent Chuffart | x1 = rnorm(card1, mean1, sd1) |
135 | 6b5a528c | Florent Chuffart | x2 = rnorm(card2, mean2, sd2) |
136 | 6b5a528c | Florent Chuffart | min = floor(min(c(x1,x2))) |
137 | 6b5a528c | Florent Chuffart | max = ceiling(max(c(x1,x2))) |
138 | 6b5a528c | Florent Chuffart | hist(c(x1,x2), xlim=c(min, max), breaks=min:max) |
139 | 6b5a528c | Florent Chuffart | lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2) |
140 | 6b5a528c | Florent Chuffart | lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3) |
141 | 6b5a528c | Florent Chuffart | lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4) |
142 | 6b5a528c | Florent Chuffart | lod_score_vecs(x1,x2) |
143 | 6b5a528c | Florent Chuffart | }) |
144 | 6b5a528c | Florent Chuffart | |
145 | 6b5a528c | Florent Chuffart | dfadd = structure(function# Adding list to a dataframe. |
146 | 6b5a528c | Florent Chuffart | ### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}. |
147 | 6b5a528c | Florent Chuffart | (df, ##<< A dataframe |
148 | 6b5a528c | Florent Chuffart | l ##<< A list |
149 | 6b5a528c | Florent Chuffart | ) { |
150 | 6b5a528c | Florent Chuffart | if (is.null(df)) { |
151 | 6b5a528c | Florent Chuffart | df = data.frame(l,stringsAsFactors=FALSE) |
152 | 6b5a528c | Florent Chuffart | } else { |
153 | 6b5a528c | Florent Chuffart | df = rbind(df, data.frame(l,stringsAsFactors=FALSE)) |
154 | 6b5a528c | Florent Chuffart | } |
155 | 6b5a528c | Florent Chuffart | return(df) |
156 | 6b5a528c | Florent Chuffart | ### Return the dataframe \emph{df}. |
157 | 6b5a528c | Florent Chuffart | }, ex=function(){ |
158 | 6b5a528c | Florent Chuffart | ## Here dataframe is NULL |
159 | 6b5a528c | Florent Chuffart | print(df) |
160 | 6b5a528c | Florent Chuffart | df = NULL |
161 | 6b5a528c | Florent Chuffart | |
162 | 6b5a528c | Florent Chuffart | # Initialize df |
163 | 6b5a528c | Florent Chuffart | df = dfadd(df, list(key1 = "value1", key2 = "value2")) |
164 | 6b5a528c | Florent Chuffart | print(df) |
165 | 6b5a528c | Florent Chuffart | |
166 | 6b5a528c | Florent Chuffart | # Adding elements to df |
167 | 6b5a528c | Florent Chuffart | df = dfadd(df, list(key1 = "value1'", key2 = "value2'")) |
168 | 6b5a528c | Florent Chuffart | print(df) |
169 | 6b5a528c | Florent Chuffart | }) |
170 | 6b5a528c | Florent Chuffart | |
171 | 6b5a528c | Florent Chuffart | |
172 | 6b5a528c | Florent Chuffart | sign_from_strand = function( |
173 | 6b5a528c | Florent Chuffart | ### Get the sign of strand |
174 | 6b5a528c | Florent Chuffart | strands) { |
175 | 6b5a528c | Florent Chuffart | apply(t(strands), 2, function(strand) { if (strand == "F") return(1) else return(-1)}) |
176 | 6b5a528c | Florent Chuffart | ### If strand in forward then returns 1 else returns -1 |
177 | 6b5a528c | Florent Chuffart | } |
178 | 6b5a528c | Florent Chuffart | |
179 | 6b5a528c | Florent Chuffart | flat_reads = function( |
180 | 6b5a528c | Florent Chuffart | ### Extract reads coordinates from TempleteFilter input sequence |
181 | 6b5a528c | Florent Chuffart | reads, ##<< TemplateFilter input reads |
182 | 6b5a528c | Florent Chuffart | nuc_width ##<< Width used to shift F and R reads. |
183 | 6b5a528c | Florent Chuffart | ) { |
184 | 6b5a528c | Florent Chuffart | F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])})) |
185 | 6b5a528c | Florent Chuffart | R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])})) |
186 | 6b5a528c | Florent Chuffart | flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads)) ) |
187 | 6b5a528c | Florent Chuffart | return(list(F_flatted_reads, R_flatted_reads, flatted_reads)) |
188 | 6b5a528c | Florent Chuffart | ### Returns a list of F reads, R reads and joint/shifted F and R reads. |
189 | 6b5a528c | Florent Chuffart | } |
190 | 6b5a528c | Florent Chuffart | |
191 | 6b5a528c | Florent Chuffart | |
192 | 6b5a528c | Florent Chuffart | |
193 | 6b5a528c | Florent Chuffart | filter_tf_outputs = function(# Filter TemplateFilter outputs |
194 | 6b5a528c | Florent Chuffart | ### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlap threshold. |
195 | 6b5a528c | Florent Chuffart | tf_outputs, ##<< TemplateFilter outputs. |
196 | 6b5a528c | Florent Chuffart | chr, ##<< Chromosome observed, here chr is an integer. |
197 | 6b5a528c | Florent Chuffart | x_min, ##<< Coordinate of the first bp observed. |
198 | 6b5a528c | Florent Chuffart | x_max, ##<< Coordinate of the last bp observed. |
199 | 6b5a528c | Florent Chuffart | nuc_width = 160, ##<< Nucleosome width. |
200 | 6b5a528c | Florent Chuffart | ol_bp = 59, ##<< Overlap Threshold. |
201 | 6b5a528c | Florent Chuffart | corr_thres = 0.5 ##<< Correlation threshold. |
202 | 6b5a528c | Florent Chuffart | ) { |
203 | 6b5a528c | Florent Chuffart | if (x_min < 0) { |
204 | 6b5a528c | Florent Chuffart | tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center > (-x_max - nuc_width/2) & tf_outputs$center < (-x_min + nuc_width/2),] |
205 | 6b5a528c | Florent Chuffart | } else { |
206 | 6b5a528c | Florent Chuffart | tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center > (x_min - nuc_width/2) & tf_outputs$center < (x_max + nuc_width/2),] |
207 | 6b5a528c | Florent Chuffart | } |
208 | 6b5a528c | Florent Chuffart | tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2 |
209 | 6b5a528c | Florent Chuffart | tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2 |
210 | 6b5a528c | Florent Chuffart | tf_outputs = tf_outputs[tf_outputs$correlation >= corr_thres,] |
211 | 6b5a528c | Florent Chuffart | tf_outputs = tf_outputs[order(tf_outputs$correlation,decreasing=TRUE),] |
212 | 6b5a528c | Florent Chuffart | i = 1 |
213 | 6b5a528c | Florent Chuffart | while (i <= length(tf_outputs[,1])) { |
214 | 6b5a528c | Florent Chuffart | lb = tf_outputs[i,]$low |
215 | 6b5a528c | Florent Chuffart | ub = tf_outputs[i,]$up |
216 | 6b5a528c | Florent Chuffart | tf_outputs = tf_outputs[!(tf_outputs$low <= (ub-ol_bp) & tf_outputs$up > ub) & !(tf_outputs$up >= (lb+ol_bp) & tf_outputs$low < lb),] |
217 | 6b5a528c | Florent Chuffart | i = i+1 |
218 | 6b5a528c | Florent Chuffart | } |
219 | 6b5a528c | Florent Chuffart | return(tf_outputs) |
220 | 6b5a528c | Florent Chuffart | ### Returns filtered TemplateFilter Outputs |
221 | 6b5a528c | Florent Chuffart | } |
222 | 6b5a528c | Florent Chuffart | |
223 | 6b5a528c | Florent Chuffart | |
224 | 6b5a528c | Florent Chuffart | |
225 | 6b5a528c | Florent Chuffart | filter_tf_inputs = function(# Filter TemplateFilter inputs |
226 | 6b5a528c | Florent Chuffart | ### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads. |
227 | 6b5a528c | Florent Chuffart | inputs, ##<< TF inputs to be filtered. |
228 | 6b5a528c | Florent Chuffart | chr, ##<< Chromosome observed, here chr is an integer. |
229 | 6b5a528c | Florent Chuffart | x_min, ##<< Coordinate of the first bp observed. |
230 | 6b5a528c | Florent Chuffart | x_max, ##<< Coordinate of the last bp observed. |
231 | 6b5a528c | Florent Chuffart | nuc_width = 160, ##<< Nucleosome width. |
232 | 6b5a528c | Florent Chuffart | only_f = FALSE, ##<< Filter only F reads. |
233 | b8a95426 | Florent Chuffart | only_r = FALSE, ##<< Filter only R reads. |
234 | b8a95426 | Florent Chuffart | filter_for_coverage = FALSE ##<< Does it filter for plot coverage? |
235 | 6b5a528c | Florent Chuffart | ) { |
236 | 6b5a528c | Florent Chuffart | if (only_f) { |
237 | 6b5a528c | Florent Chuffart | inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,] |
238 | 6b5a528c | Florent Chuffart | } else if (only_r) { |
239 | 6b5a528c | Florent Chuffart | inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,] |
240 | 6b5a528c | Florent Chuffart | } else { |
241 | 6b5a528c | Florent Chuffart | inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,] |
242 | 6b5a528c | Florent Chuffart | } |
243 | b8a95426 | Florent Chuffart | if (!filter_for_coverage) { |
244 | b8a95426 | Florent Chuffart | corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3]) |
245 | b8a95426 | Florent Chuffart | inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,] |
246 | b8a95426 | Florent Chuffart | } |
247 | 6b5a528c | Florent Chuffart | return(inputs) |
248 | 6b5a528c | Florent Chuffart | ### Returns filtred inputs. |
249 | 6b5a528c | Florent Chuffart | } |
250 | 6b5a528c | Florent Chuffart | |
251 | 6b5a528c | Florent Chuffart | |
252 | 6b5a528c | Florent Chuffart | |
253 | 6b5a528c | Florent Chuffart | get_comp_strand = function( |
254 | 6b5a528c | Florent Chuffart | ### Compute the complementatry strand. |
255 | 6b5a528c | Florent Chuffart | strand ##<< The original strand. |
256 | 6b5a528c | Florent Chuffart | ) { |
257 | 6b5a528c | Florent Chuffart | apply(t(strand),2, function(n){ |
258 | 6b5a528c | Florent Chuffart | if (n=="a") {return("t")} |
259 | 6b5a528c | Florent Chuffart | if (n=="t") {return("a")} |
260 | 6b5a528c | Florent Chuffart | if (n=="c") {return("g")} |
261 | 6b5a528c | Florent Chuffart | if (n=="g") {return("c")} |
262 | 6b5a528c | Florent Chuffart | }) |
263 | 6b5a528c | Florent Chuffart | ### Returns the complementatry strand. |
264 | 6b5a528c | Florent Chuffart | } |
265 | 6b5a528c | Florent Chuffart | |
266 | 6b5a528c | Florent Chuffart | |
267 | 6b5a528c | Florent Chuffart | aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes. |
268 | 6b5a528c | Florent Chuffart | ### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample. |
269 | 6b5a528c | Florent Chuffart | samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=..., end=..., chr=..., genome=...)}. |
270 | 729c934e | Florent Chuffart | lod_thres=20, ##<< Log likelihood ration threshold. |
271 | 6b5a528c | Florent Chuffart | coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound. |
272 | 6b5a528c | Florent Chuffart | ){ |
273 | 6b5a528c | Florent Chuffart | end_of_tracks = function(tracks) { |
274 | 6b5a528c | Florent Chuffart | if (length(tracks) == 0) { |
275 | 6b5a528c | Florent Chuffart | return(TRUE) |
276 | 6b5a528c | Florent Chuffart | } |
277 | 6b5a528c | Florent Chuffart | for (lower_bound in tracks) { |
278 | 6b5a528c | Florent Chuffart | if(!is.na(lower_bound)) { |
279 | 6b5a528c | Florent Chuffart | if (lower_bound < coord_max) { |
280 | 6b5a528c | Florent Chuffart | return(FALSE) |
281 | 6b5a528c | Florent Chuffart | } |
282 | 6b5a528c | Florent Chuffart | } |
283 | 6b5a528c | Florent Chuffart | } |
284 | 6b5a528c | Florent Chuffart | return(TRUE) |
285 | 6b5a528c | Florent Chuffart | } |
286 | 6b5a528c | Florent Chuffart | store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) { |
287 | 6b5a528c | Florent Chuffart | if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) { |
288 | 6b5a528c | Florent Chuffart | new_cluster$wp = TRUE |
289 | 6b5a528c | Florent Chuffart | center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2 |
290 | 6b5a528c | Florent Chuffart | if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) { |
291 | 6b5a528c | Florent Chuffart | clusters[[length(clusters) + 1]] = new_cluster |
292 | 6b5a528c | Florent Chuffart | # print(new_cluster) |
293 | 6b5a528c | Florent Chuffart | } |
294 | 6b5a528c | Florent Chuffart | } else { |
295 | 6b5a528c | Florent Chuffart | new_cluster$wp = FALSE |
296 | 6b5a528c | Florent Chuffart | center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2 |
297 | 6b5a528c | Florent Chuffart | if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) { |
298 | 6b5a528c | Florent Chuffart | clusters[[length(clusters) + 1]] = new_cluster |
299 | 6b5a528c | Florent Chuffart | } |
300 | 6b5a528c | Florent Chuffart | } |
301 | 6b5a528c | Florent Chuffart | return(clusters) |
302 | 6b5a528c | Florent Chuffart | } |
303 | 6b5a528c | Florent Chuffart | strain = samples[[1]]$strain |
304 | 6b5a528c | Florent Chuffart | lod_scores = c() |
305 | 6b5a528c | Florent Chuffart | min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end) |
306 | 6b5a528c | Florent Chuffart | max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end) |
307 | 6b5a528c | Florent Chuffart | # compute clusters |
308 | 6b5a528c | Florent Chuffart | clusters = list() |
309 | 6b5a528c | Florent Chuffart | cluster_contents = list() |
310 | 6b5a528c | Florent Chuffart | # Init reader |
311 | 6b5a528c | Florent Chuffart | indexes = c() |
312 | 6b5a528c | Florent Chuffart | track_readers = c() |
313 | 6b5a528c | Florent Chuffart | current_nuc = NULL |
314 | 729c934e | Florent Chuffart | lod_score = lod_thres + 1 |
315 | 6b5a528c | Florent Chuffart | # Read nucs from TF outputs |
316 | 6b5a528c | Florent Chuffart | tf_outs = list() |
317 | 6b5a528c | Florent Chuffart | i = 1 |
318 | 6b5a528c | Florent Chuffart | for (sample in samples) { |
319 | 6b5a528c | Florent Chuffart | # print(sample$roi$chr) |
320 | 6b5a528c | Florent Chuffart | # print(min_nuc_center) |
321 | 6b5a528c | Florent Chuffart | # print(max_nuc_center) |
322 | 6b5a528c | Florent Chuffart | # print(sample$outputs) |
323 | 6b5a528c | Florent Chuffart | # tf_outs[[i]] = filter_tf_outputs(sample$outputs, sample$roi$chr, min_nuc_center, max_nuc_center) |
324 | 6b5a528c | Florent Chuffart | # print(tf_outs[[i]]) |
325 | 6b5a528c | Florent Chuffart | tf_outs[[i]] = sample$outputs |
326 | 6b5a528c | Florent Chuffart | tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),] |
327 | 6b5a528c | Florent Chuffart | indexes[i] = 1 |
328 | 6b5a528c | Florent Chuffart | if (is.na(tf_outs[[i]][indexes[i],]$center)) { |
329 | 6b5a528c | Florent Chuffart | track_readers[i] = coord_max |
330 | 6b5a528c | Florent Chuffart | } else { |
331 | 6b5a528c | Florent Chuffart | track_readers[i] = tf_outs[[i]][indexes[i],]$center |
332 | 6b5a528c | Florent Chuffart | } |
333 | 6b5a528c | Florent Chuffart | i = i+1 |
334 | 6b5a528c | Florent Chuffart | } |
335 | 6b5a528c | Florent Chuffart | # print(track_readers) |
336 | 6b5a528c | Florent Chuffart | new_cluster = NULL |
337 | 6b5a528c | Florent Chuffart | nb_nucs_in_cluster = 0 |
338 | 6b5a528c | Florent Chuffart | nuc_from_track = c() |
339 | 6b5a528c | Florent Chuffart | for (i in 1:length(tf_outs)){ |
340 | 6b5a528c | Florent Chuffart | nuc_from_track[i] = FALSE |
341 | 6b5a528c | Florent Chuffart | } |
342 | 6b5a528c | Florent Chuffart | # Start clustering |
343 | 6b5a528c | Florent Chuffart | while (!end_of_tracks(track_readers)) { |
344 | 6b5a528c | Florent Chuffart | new_center = min(track_readers) |
345 | 6b5a528c | Florent Chuffart | current_track = which(track_readers == new_center)[1] |
346 | 6b5a528c | Florent Chuffart | new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],]) |
347 | 6b5a528c | Florent Chuffart | new_nuc$chr = substr(new_nuc$chr,4,1000000L) |
348 | 6b5a528c | Florent Chuffart | new_nuc$inputs = samples[[current_track]]$inputs |
349 | 6b5a528c | Florent Chuffart | new_nuc$chr = samples[[current_track]]$roi$chr |
350 | 6b5a528c | Florent Chuffart | new_nuc$track = current_track |
351 | 6b5a528c | Florent Chuffart | |
352 | 6b5a528c | Florent Chuffart | new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width) |
353 | 6b5a528c | Florent Chuffart | flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width) |
354 | 6b5a528c | Florent Chuffart | new_nuc$original_reads = flatted_reads[[3]] |
355 | 6b5a528c | Florent Chuffart | |
356 | 6b5a528c | Florent Chuffart | new_upper_bound = new_nuc$upper_bound |
357 | 6b5a528c | Florent Chuffart | |
358 | 6b5a528c | Florent Chuffart | if (!is.null(current_nuc)) { |
359 | 6b5a528c | Florent Chuffart | lod_score = lod_score_vecs(current_nuc$original_reads,new_nuc$original_reads) |
360 | 6b5a528c | Florent Chuffart | lod_scores = c(lod_scores,lod_score) |
361 | 6b5a528c | Florent Chuffart | } |
362 | 6b5a528c | Florent Chuffart | # print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" ")) |
363 | 6b5a528c | Florent Chuffart | if (is.na(lod_score)) { |
364 | 729c934e | Florent Chuffart | lod_score = lod_thres + 1 |
365 | 6b5a528c | Florent Chuffart | } |
366 | 729c934e | Florent Chuffart | # Store lod_score |
367 | 729c934e | Florent Chuffart | new_nuc$lod_score = lod_score |
368 | 729c934e | Florent Chuffart | if (lod_score < lod_thres) { |
369 | 6b5a528c | Florent Chuffart | # aggregate to current cluster |
370 | 6b5a528c | Florent Chuffart | # update bound |
371 | 6b5a528c | Florent Chuffart | if (new_nuc$upper_bound > new_cluster$upper_bound) { |
372 | 6b5a528c | Florent Chuffart | new_cluster$upper_bound = new_nuc$upper_bound |
373 | 6b5a528c | Florent Chuffart | } |
374 | 6b5a528c | Florent Chuffart | if (new_nuc$lower_bound < new_cluster$lower_bound) { |
375 | 6b5a528c | Florent Chuffart | new_cluster$lower_bound = new_nuc$lower_bound |
376 | 6b5a528c | Florent Chuffart | } |
377 | 6b5a528c | Florent Chuffart | # add nucleosome to current cluster |
378 | 6b5a528c | Florent Chuffart | nuc_from_track[current_track] = TRUE |
379 | 6b5a528c | Florent Chuffart | nb_nucs_in_cluster = nb_nucs_in_cluster + 1 |
380 | 6b5a528c | Florent Chuffart | new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc |
381 | 6b5a528c | Florent Chuffart | } else { |
382 | 6b5a528c | Florent Chuffart | if (!is.null(new_cluster)) { |
383 | 6b5a528c | Florent Chuffart | # store old cluster |
384 | 6b5a528c | Florent Chuffart | clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center) |
385 | 6b5a528c | Florent Chuffart | } |
386 | 6b5a528c | Florent Chuffart | # Reinit current cluster composition stuff |
387 | 6b5a528c | Florent Chuffart | nb_nucs_in_cluster = 0 |
388 | 6b5a528c | Florent Chuffart | nuc_from_track = c() |
389 | 6b5a528c | Florent Chuffart | for (i in 1:length(tf_outs)){ |
390 | 6b5a528c | Florent Chuffart | nuc_from_track[i] = FALSE |
391 | 6b5a528c | Florent Chuffart | } |
392 | 6b5a528c | Florent Chuffart | # create new cluster |
393 | 6b5a528c | Florent Chuffart | new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list()) |
394 | 6b5a528c | Florent Chuffart | # update upper bound |
395 | 6b5a528c | Florent Chuffart | current_upper_bound = new_upper_bound |
396 | 6b5a528c | Florent Chuffart | # add nucleosome to current cluster |
397 | 6b5a528c | Florent Chuffart | nb_nucs_in_cluster = nb_nucs_in_cluster + 1 |
398 | 6b5a528c | Florent Chuffart | nuc_from_track[current_track] = TRUE |
399 | 6b5a528c | Florent Chuffart | new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc |
400 | 6b5a528c | Florent Chuffart | |
401 | 6b5a528c | Florent Chuffart | } |
402 | 6b5a528c | Florent Chuffart | |
403 | 6b5a528c | Florent Chuffart | current_nuc = new_nuc |
404 | 6b5a528c | Florent Chuffart | |
405 | 6b5a528c | Florent Chuffart | # update indexes |
406 | 6b5a528c | Florent Chuffart | if (indexes[current_track] < length(tf_outs[[current_track]]$center)) { |
407 | 6b5a528c | Florent Chuffart | indexes[current_track] = indexes[current_track] + 1 |
408 | 6b5a528c | Florent Chuffart | # update track |
409 | 6b5a528c | Florent Chuffart | track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center |
410 | 6b5a528c | Florent Chuffart | } else { |
411 | 6b5a528c | Florent Chuffart | # update track |
412 | 6b5a528c | Florent Chuffart | track_readers[current_track] = coord_max |
413 | 6b5a528c | Florent Chuffart | } |
414 | 6b5a528c | Florent Chuffart | } |
415 | 6b5a528c | Florent Chuffart | # store last cluster |
416 | 6b5a528c | Florent Chuffart | if (!is.null(new_cluster)) { |
417 | 6b5a528c | Florent Chuffart | # store old cluster |
418 | 6b5a528c | Florent Chuffart | clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center) |
419 | 6b5a528c | Florent Chuffart | } |
420 | 6b5a528c | Florent Chuffart | return(list(clusters, lod_scores)) |
421 | 6b5a528c | Florent Chuffart | ### Returns a list of clusterized nucleosomes, and all computed lod scores. |
422 | 6b5a528c | Florent Chuffart | }, ex=function(){ |
423 | 6b5a528c | Florent Chuffart | # Dealing with a region of interest |
424 | 6b5a528c | Florent Chuffart | roi =list(name="example", begin=1000, end=1300, chr="1", genome=rep("A",301)) |
425 | 6b5a528c | Florent Chuffart | samples = list() |
426 | 6b5a528c | Florent Chuffart | for (i in 1:3) { |
427 | 6b5a528c | Florent Chuffart | # Create TF output |
428 | 6b5a528c | Florent Chuffart | tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9) |
429 | 6b5a528c | Florent Chuffart | outputs = dfadd(NULL,tf_nuc) |
430 | 6b5a528c | Florent Chuffart | outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end) |
431 | 6b5a528c | Florent Chuffart | # Generate corresponding reads |
432 | 6b5a528c | Florent Chuffart | nb_reads = round(runif(1,170,230)) |
433 | 6b5a528c | Florent Chuffart | reads = round(rnorm(nb_reads, tf_nuc$center,20)) |
434 | 6b5a528c | Florent Chuffart | u_reads = sort(unique(reads)) |
435 | 6b5a528c | Florent Chuffart | strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2)))) |
436 | 6b5a528c | Florent Chuffart | counts = apply(t(u_reads), 2, function(r) { sum(reads == r)}) |
437 | 6b5a528c | Florent Chuffart | shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)}) |
438 | 6b5a528c | Florent Chuffart | u_reads = u_reads + shifts |
439 | 6b5a528c | Florent Chuffart | inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)), |
440 | 6b5a528c | Florent Chuffart | "V2" = u_reads, |
441 | 6b5a528c | Florent Chuffart | "V3" = strands, |
442 | 6b5a528c | Florent Chuffart | "V4" = counts), stringsAsFactors=FALSE) |
443 | 6b5a528c | Florent Chuffart | samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs) |
444 | 6b5a528c | Florent Chuffart | } |
445 | 6b5a528c | Florent Chuffart | print(aggregate_intra_strain_nucs(samples)) |
446 | 6b5a528c | Florent Chuffart | }) |
447 | 6b5a528c | Florent Chuffart | |
448 | 7646593d | Florent Chuffart | flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output |
449 | 7646593d | Florent Chuffart | ### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function. |
450 | 7646593d | Florent Chuffart | partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function |
451 | 7646593d | Florent Chuffart | roi_index ##<< the index of the roi involved |
452 | 7646593d | Florent Chuffart | ) { |
453 | 7646593d | Florent Chuffart | if (length(partial_strain_maps) == 0 ){ |
454 | 7646593d | Florent Chuffart | print(paste("Empty partial_strain_maps for roi", roi_index, "ands strain", strain, "." )) |
455 | 7646593d | Florent Chuffart | tmp_strain_maps = list() |
456 | 7646593d | Florent Chuffart | } else { |
457 | 7646593d | Florent Chuffart | tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){ |
458 | 7646593d | Florent Chuffart | tmp_nuc = partial_strain_maps[[i]] |
459 | 7646593d | Florent Chuffart | tmp_nuc_as_list = list() |
460 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]] |
461 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]]) |
462 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]]) |
463 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["roi_index"]] = roi_index |
464 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["index_nuc"]] = i |
465 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp) |
466 | 7646593d | Florent Chuffart | all_original_reads = c() |
467 | 7646593d | Florent Chuffart | for (j in 1:length(tmp_nuc$nucs)) { |
468 | 7646593d | Florent Chuffart | all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads) |
469 | 7646593d | Florent Chuffart | } |
470 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads) |
471 | 7646593d | Florent Chuffart | if (tmp_nuc$wp) { |
472 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["lod_1"]] = signif(tmp_nuc$nucs[[2]]$lod_score,5) |
473 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["lod_2"]] = signif(tmp_nuc$nucs[[3]]$lod_score,5) |
474 | 7646593d | Florent Chuffart | } else { |
475 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["lod_1"]] = NA |
476 | 7646593d | Florent Chuffart | tmp_nuc_as_list[["lod_2"]] = NA |
477 | 7646593d | Florent Chuffart | } |
478 | 7646593d | Florent Chuffart | return(tmp_nuc_as_list) |
479 | 7646593d | Florent Chuffart | }) |
480 | 7646593d | Florent Chuffart | tmp_strain_maps = do.call("rbind", tmp_strain_map) |
481 | 7646593d | Florent Chuffart | } |
482 | 7646593d | Florent Chuffart | return(data.frame(tmp_strain_maps)) |
483 | 7646593d | Florent Chuffart | ### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function. |
484 | 7646593d | Florent Chuffart | } |
485 | 6b5a528c | Florent Chuffart | |
486 | 6b5a528c | Florent Chuffart | align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains. |
487 | 6b5a528c | Florent Chuffart | ### This function aligns nucs between two strains for a given genome region. |
488 | 6b5a528c | Florent Chuffart | replicates, ##<< Set of replicates, ideally 3 per strain. |
489 | 6b5a528c | Florent Chuffart | wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed. |
490 | 6b5a528c | Florent Chuffart | wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed. |
491 | 6b5a528c | Florent Chuffart | corr_thres=0.5, ##<< Correlation threshold. |
492 | 729c934e | Florent Chuffart | lod_thres=100, ##<< LOD cut off. |
493 | 1d833b97 | Florent Chuffart | config=NULL, ##<< GLOBAL config variable |
494 | 6b5a528c | Florent Chuffart | ... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed. |
495 | 6b5a528c | Florent Chuffart | ) { |
496 | 6b5a528c | Florent Chuffart | if (length(replicates) < 2) { |
497 | 6b5a528c | Florent Chuffart | stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.") |
498 | 6b5a528c | Florent Chuffart | } else if (length(replicates) > 2) { |
499 | 6b5a528c | Florent Chuffart | print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.") |
500 | 6b5a528c | Florent Chuffart | } |
501 | 6b5a528c | Florent Chuffart | common_nuc = NULL |
502 | 6b5a528c | Florent Chuffart | lod_scores = c() |
503 | 6b5a528c | Florent Chuffart | chr = replicates[[1]][[1]]$roi$chr |
504 | 6b5a528c | Florent Chuffart | min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end) |
505 | 6b5a528c | Florent Chuffart | max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end) |
506 | 6b5a528c | Florent Chuffart | strain_ref1 = replicates[[1]][[1]]$strain |
507 | 6b5a528c | Florent Chuffart | strain_ref2 = replicates[[2]][[1]]$strain |
508 | 6b5a528c | Florent Chuffart | big_roi = replicates[[1]][[1]]$roi |
509 | a0b91fee | Florent Chuffart | orig_big_roi = replicates[[1]][[1]]$orig_roi |
510 | 6b5a528c | Florent Chuffart | if(big_roi$end - big_roi$begin < 0) { |
511 | 6b5a528c | Florent Chuffart | tmp_begin = big_roi$begin |
512 | 6b5a528c | Florent Chuffart | big_roi$begin = big_roi$end |
513 | 6b5a528c | Florent Chuffart | big_roi$end = tmp_begin |
514 | 6b5a528c | Florent Chuffart | } |
515 | 6b5a528c | Florent Chuffart | # GO! |
516 | 6b5a528c | Florent Chuffart | if (is.null(wp_nucs_strain_ref1)) { |
517 | 6b5a528c | Florent Chuffart | wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]] |
518 | 6b5a528c | Florent Chuffart | } |
519 | 6b5a528c | Florent Chuffart | if (is.null(wp_nucs_strain_ref2)) { |
520 | 6b5a528c | Florent Chuffart | wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]] |
521 | 6b5a528c | Florent Chuffart | } |
522 | 1d833b97 | Florent Chuffart | # foo <<- wp_nucs_strain_ref1 |
523 | 1d833b97 | Florent Chuffart | # print(apply(t(wp_nucs_strain_ref1), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)})) |
524 | 1d833b97 | Florent Chuffart | # print(apply(t(wp_nucs_strain_ref2), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)})) |
525 | 6b5a528c | Florent Chuffart | # dealing with matching_nas |
526 | 6b5a528c | Florent Chuffart | lws = c() |
527 | 6b5a528c | Florent Chuffart | ups = c() |
528 | 6b5a528c | Florent Chuffart | for (na in wp_nucs_strain_ref2) { |
529 | 6b5a528c | Florent Chuffart | lws = c(lws, na$lower_bound) |
530 | 6b5a528c | Florent Chuffart | ups = c(ups, na$upper_bound) |
531 | 6b5a528c | Florent Chuffart | } |
532 | 6b5a528c | Florent Chuffart | |
533 | 6b5a528c | Florent Chuffart | print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep="")) |
534 | 6b5a528c | Florent Chuffart | roi_strain_ref1 = NULL |
535 | 6b5a528c | Florent Chuffart | roi_strain_ref2 = NULL |
536 | 6b5a528c | Florent Chuffart | if (length(wp_nucs_strain_ref1) > 0) { |
537 | 6b5a528c | Florent Chuffart | for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){ |
538 | 6b5a528c | Florent Chuffart | # print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep="")) |
539 | 6b5a528c | Florent Chuffart | nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]] |
540 | 6b5a528c | Florent Chuffart | # Filtering on Well Positionned |
541 | 883439d6 | Florent Chuffart | if (nuc_strain_ref1$wp) { |
542 | 6b5a528c | Florent Chuffart | roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1) |
543 | a0b91fee | Florent Chuffart | roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi=orig_big_roi, config=config) |
544 | 1d833b97 | Florent Chuffart | if (!is.null(roi_strain_ref2)){ |
545 | 6b5a528c | Florent Chuffart | # LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER) |
546 | 6b5a528c | Florent Chuffart | lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin) |
547 | 6b5a528c | Florent Chuffart | upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin) |
548 | 6b5a528c | Florent Chuffart | matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2) |
549 | 6b5a528c | Florent Chuffart | for (index_nuc_strain_ref2 in matching_nas) { |
550 | 6b5a528c | Florent Chuffart | nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]] |
551 | 6b5a528c | Florent Chuffart | # Filtering on Well Positionned |
552 | 883439d6 | Florent Chuffart | nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2) |
553 | a0b91fee | Florent Chuffart | if (!is.null(translate_roi(nuc_strain_ref2_to_roi, strain_ref1, big_roi=orig_big_roi, config=config)) & |
554 | 1e34bb1f | Florent Chuffart | nuc_strain_ref2$wp) { |
555 | 6b5a528c | Florent Chuffart | # Filtering on correlation Score and collecting reads |
556 | 6b5a528c | Florent Chuffart | SKIP = FALSE |
557 | 6b5a528c | Florent Chuffart | # TODO: This for loop could be done before working on strain_ref2. Isn't it? |
558 | 6b5a528c | Florent Chuffart | reads_strain_ref1 = c() |
559 | 6b5a528c | Florent Chuffart | for (nuc in nuc_strain_ref1$nucs){ |
560 | 6b5a528c | Florent Chuffart | reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads) |
561 | 6b5a528c | Florent Chuffart | if (nuc$corr < corr_thres) { |
562 | 6b5a528c | Florent Chuffart | SKIP = TRUE |
563 | 6b5a528c | Florent Chuffart | } |
564 | 6b5a528c | Florent Chuffart | } |
565 | 6b5a528c | Florent Chuffart | reads_strain_ref2 = c() |
566 | 6b5a528c | Florent Chuffart | for (nuc in nuc_strain_ref2$nucs){ |
567 | 6b5a528c | Florent Chuffart | reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads) |
568 | 6b5a528c | Florent Chuffart | if (nuc$corr < corr_thres) { |
569 | 6b5a528c | Florent Chuffart | SKIP = TRUE |
570 | 6b5a528c | Florent Chuffart | } |
571 | 6b5a528c | Florent Chuffart | } |
572 | 6b5a528c | Florent Chuffart | # Filtering on correlation Score |
573 | 6b5a528c | Florent Chuffart | if (!SKIP) { |
574 | 6b5a528c | Florent Chuffart | # tranlation of reads into strain 2 coords |
575 | 6b5a528c | Florent Chuffart | diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2 |
576 | 6b5a528c | Florent Chuffart | reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1)) |
577 | 6b5a528c | Florent Chuffart | lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2) |
578 | 6b5a528c | Florent Chuffart | lod_scores = c(lod_scores, lod_score) |
579 | 6b5a528c | Florent Chuffart | # Filtering on LOD Score |
580 | 729c934e | Florent Chuffart | if (lod_score < lod_thres) { |
581 | 6b5a528c | Florent Chuffart | tmp_nuc = list() |
582 | 6b5a528c | Florent Chuffart | # strain_ref1 |
583 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr |
584 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound |
585 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound |
586 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5) |
587 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5) |
588 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1) |
589 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1 |
590 | 6b5a528c | Florent Chuffart | # tmp_nuc[[paste("corr1_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[1]]$corr,5) |
591 | 6b5a528c | Florent Chuffart | # tmp_nuc[[paste("corr2_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[2]]$corr,5) |
592 | 6b5a528c | Florent Chuffart | # tmp_nuc[[paste("corr3_", strain_ref1, sep="")]] = signif(nuc_strain_ref1$nucs[[3]]$corr,5) |
593 | 6b5a528c | Florent Chuffart | # strain_ref2 |
594 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr |
595 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound |
596 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound |
597 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5) |
598 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5) |
599 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2) |
600 | 6b5a528c | Florent Chuffart | tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2 |
601 | 6b5a528c | Florent Chuffart | # tmp_nuc[[paste("corr1_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[1]]$corr,5) |
602 | 6b5a528c | Florent Chuffart | # tmp_nuc[[paste("corr2_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[2]]$corr,5) |
603 | 6b5a528c | Florent Chuffart | # tmp_nuc[[paste("corr3_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[3]]$corr,5) |
604 | 6b5a528c | Florent Chuffart | # common |
605 | 6b5a528c | Florent Chuffart | tmp_nuc[["lod_score"]] = signif(lod_score,5) |
606 | 6b5a528c | Florent Chuffart | # print(tmp_nuc) |
607 | 6b5a528c | Florent Chuffart | common_nuc = dfadd(common_nuc, tmp_nuc) |
608 | 6b5a528c | Florent Chuffart | } |
609 | 6b5a528c | Florent Chuffart | } |
610 | 6b5a528c | Florent Chuffart | } |
611 | 6b5a528c | Florent Chuffart | } |
612 | 6b5a528c | Florent Chuffart | } else { |
613 | 6b5a528c | Florent Chuffart | print("WARNING! No roi for strain ref 2.") |
614 | 6b5a528c | Florent Chuffart | } |
615 | 6b5a528c | Florent Chuffart | } |
616 | 6b5a528c | Florent Chuffart | } |
617 | 6b5a528c | Florent Chuffart | |
618 | 6b5a528c | Florent Chuffart | if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) { |
619 | 6b5a528c | Florent Chuffart | index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] == common_nuc[,1:3][-1,] ,1,sum) == 3) |
620 | 6b5a528c | Florent Chuffart | to_remove_list = c() |
621 | 6b5a528c | Florent Chuffart | for (i in 1:length(index_redundant)) { |
622 | 6b5a528c | Florent Chuffart | if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) { |
623 | 6b5a528c | Florent Chuffart | to_remove = index_redundant[i] |
624 | 6b5a528c | Florent Chuffart | } else { |
625 | 6b5a528c | Florent Chuffart | to_remove = index_redundant[i] + 1 |
626 | 6b5a528c | Florent Chuffart | } |
627 | 6b5a528c | Florent Chuffart | to_remove_list = c(to_remove_list, to_remove) |
628 | 6b5a528c | Florent Chuffart | } |
629 | 6b5a528c | Florent Chuffart | common_nuc = common_nuc[-to_remove_list,] |
630 | 6b5a528c | Florent Chuffart | } |
631 | 6b5a528c | Florent Chuffart | |
632 | 6b5a528c | Florent Chuffart | if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) { |
633 | 8c87ee64 | Florent Chuffart | index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3) |
634 | 6b5a528c | Florent Chuffart | to_remove_list = c() |
635 | 6b5a528c | Florent Chuffart | for (i in 1:length(index_redundant)) { |
636 | 6b5a528c | Florent Chuffart | if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) { |
637 | 6b5a528c | Florent Chuffart | to_remove = index_redundant[i] |
638 | 6b5a528c | Florent Chuffart | } else { |
639 | 6b5a528c | Florent Chuffart | to_remove = index_redundant[i] + 1 |
640 | 6b5a528c | Florent Chuffart | } |
641 | 6b5a528c | Florent Chuffart | to_remove_list = c(to_remove_list, to_remove) |
642 | 6b5a528c | Florent Chuffart | } |
643 | 6b5a528c | Florent Chuffart | common_nuc = common_nuc[-to_remove_list,] |
644 | 6b5a528c | Florent Chuffart | } |
645 | 6b5a528c | Florent Chuffart | |
646 | 6b5a528c | Florent Chuffart | return(list(common_nuc, lod_scores)) |
647 | 6b5a528c | Florent Chuffart | } else { |
648 | 6b5a528c | Florent Chuffart | print("WARNING, no nucs for strain_ref1.") |
649 | 6b5a528c | Florent Chuffart | return(NULL) |
650 | 6b5a528c | Florent Chuffart | } |
651 | 6b5a528c | Florent Chuffart | ### Returns a list of clusterized nucleosomes, and all computed lod scores. |
652 | 6b5a528c | Florent Chuffart | }, ex=function(){ |
653 | 1d833b97 | Florent Chuffart | |
654 | 1d833b97 | Florent Chuffart | # Define new translate_roi function... |
655 | 1d833b97 | Florent Chuffart | translate_roi = function(roi, strain2, big_roi=NULL, config=NULL) { |
656 | 1d833b97 | Florent Chuffart | return(roi) |
657 | 1d833b97 | Florent Chuffart | } |
658 | 1d833b97 | Florent Chuffart | # Binding it by uncomment follwing lines. |
659 | 1d833b97 | Florent Chuffart | unlockBinding("translate_roi", as.environment("package:nucleominer")) |
660 | 1d833b97 | Florent Chuffart | unlockBinding("translate_roi", getNamespace("nucleominer")) |
661 | 1d833b97 | Florent Chuffart | assign("translate_roi", translate_roi, "package:nucleominer") |
662 | 1d833b97 | Florent Chuffart | assign("translate_roi", translate_roi, getNamespace("nucleominer")) |
663 | 1d833b97 | Florent Chuffart | lockBinding("translate_roi", getNamespace("nucleominer")) |
664 | 1d833b97 | Florent Chuffart | lockBinding("translate_roi", as.environment("package:nucleominer")) |
665 | 1d833b97 | Florent Chuffart | |
666 | 6b5a528c | Florent Chuffart | # Dealing with a region of interest |
667 | 6b5a528c | Florent Chuffart | roi =list(name="example", begin=1000, end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1") |
668 | 6b5a528c | Florent Chuffart | roi2 = translate_roi(roi, roi$strain_ref1) |
669 | 6b5a528c | Florent Chuffart | replicates = list() |
670 | 6b5a528c | Florent Chuffart | for (j in 1:2) { |
671 | 6b5a528c | Florent Chuffart | samples = list() |
672 | 6b5a528c | Florent Chuffart | for (i in 1:3) { |
673 | 6b5a528c | Florent Chuffart | # Create TF output |
674 | 6b5a528c | Florent Chuffart | tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9) |
675 | 6b5a528c | Florent Chuffart | outputs = dfadd(NULL,tf_nuc) |
676 | 6b5a528c | Florent Chuffart | outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end) |
677 | 6b5a528c | Florent Chuffart | # Generate corresponding reads |
678 | 6b5a528c | Florent Chuffart | nb_reads = round(runif(1,170,230)) |
679 | 6b5a528c | Florent Chuffart | reads = round(rnorm(nb_reads, tf_nuc$center,20)) |
680 | 6b5a528c | Florent Chuffart | u_reads = sort(unique(reads)) |
681 | 6b5a528c | Florent Chuffart | strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2)))) |
682 | 6b5a528c | Florent Chuffart | counts = apply(t(u_reads), 2, function(r) { sum(reads == r)}) |
683 | 6b5a528c | Florent Chuffart | shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)}) |
684 | 6b5a528c | Florent Chuffart | u_reads = u_reads + shifts |
685 | 6b5a528c | Florent Chuffart | inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)), |
686 | 6b5a528c | Florent Chuffart | "V2" = u_reads, |
687 | 6b5a528c | Florent Chuffart | "V3" = strands, |
688 | 6b5a528c | Florent Chuffart | "V4" = counts), stringsAsFactors=FALSE) |
689 | 6b5a528c | Florent Chuffart | samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs) |
690 | 6b5a528c | Florent Chuffart | } |
691 | 6b5a528c | Florent Chuffart | replicates[[length(replicates) + 1]] = samples |
692 | 6b5a528c | Florent Chuffart | } |
693 | 6b5a528c | Florent Chuffart | print(align_inter_strain_nucs(replicates)) |
694 | 6b5a528c | Florent Chuffart | }) |
695 | 6b5a528c | Florent Chuffart | |
696 | 6b5a528c | Florent Chuffart | |
697 | 6b5a528c | Florent Chuffart | |
698 | 6b5a528c | Florent Chuffart | |
699 | 6b5a528c | Florent Chuffart | |
700 | 6b5a528c | Florent Chuffart | |
701 | 6b5a528c | Florent Chuffart | |
702 | 6b5a528c | Florent Chuffart | |
703 | 6b5a528c | Florent Chuffart | |
704 | 6b5a528c | Florent Chuffart | |
705 | 6b5a528c | Florent Chuffart | |
706 | 6b5a528c | Florent Chuffart | |
707 | 6b5a528c | Florent Chuffart | |
708 | 6b5a528c | Florent Chuffart | |
709 | 6b5a528c | Florent Chuffart | |
710 | 6b5a528c | Florent Chuffart | |
711 | 6b5a528c | Florent Chuffart | |
712 | 6b5a528c | Florent Chuffart | |
713 | 6b5a528c | Florent Chuffart | |
714 | 6b5a528c | Florent Chuffart | |
715 | 6b5a528c | Florent Chuffart | |
716 | 6b5a528c | Florent Chuffart | |
717 | 6b5a528c | Florent Chuffart | |
718 | 6b5a528c | Florent Chuffart | |
719 | 6b5a528c | Florent Chuffart | |
720 | 6b5a528c | Florent Chuffart | |
721 | 6b5a528c | Florent Chuffart | fetch_mnase_replicates = function(# Prefetch data |
722 | 6b5a528c | Florent Chuffart | ### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates. |
723 | 6b5a528c | Florent Chuffart | strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids. |
724 | 6b5a528c | Florent Chuffart | roi, ##<< Region of interest. |
725 | 6b5a528c | Florent Chuffart | all_samples, ##<< Global list of samples. |
726 | 1d833b97 | Florent Chuffart | config=NULL, ##<< GLOBAL config variable |
727 | 6b5a528c | Florent Chuffart | only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking. |
728 | 6b5a528c | Florent Chuffart | get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence. |
729 | 6b5a528c | Florent Chuffart | get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files. |
730 | 6b5a528c | Florent Chuffart | ) { |
731 | 6b5a528c | Florent Chuffart | samples=list() |
732 | 6b5a528c | Florent Chuffart | samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id) |
733 | 6b5a528c | Florent Chuffart | for (i in samples_ids) { |
734 | 6b5a528c | Florent Chuffart | sample = as.list(all_samples[all_samples$id==i,]) |
735 | a0b91fee | Florent Chuffart | sample$orig_roi = roi |
736 | 8e9facd8 | Florent Chuffart | sample$roi = translate_roi(roi, sample$strain, config = config) |
737 | 6b5a528c | Florent Chuffart | if (get_genome) { |
738 | 6b5a528c | Florent Chuffart | # Get Genome |
739 | 1d833b97 | Florent Chuffart | sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end] |
740 | 6b5a528c | Florent Chuffart | } |
741 | 6b5a528c | Florent Chuffart | # Get inputs |
742 | a52f85d7 | Florent Chuffart | sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE) |
743 | 6b5a528c | Florent Chuffart | sample$total_reads = sum(sample$inputs[,4]) |
744 | 6b5a528c | Florent Chuffart | if (!only_fetch) { |
745 | 6b5a528c | Florent Chuffart | sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300) |
746 | 6b5a528c | Florent Chuffart | } |
747 | 6b5a528c | Florent Chuffart | # Get TF outputs for Mnase_Seq samples |
748 | 6b5a528c | Florent Chuffart | if (sample$marker == "Mnase_Seq" & get_ouputs) { |
749 | a52f85d7 | Florent Chuffart | sample$outputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), "table", header=TRUE, sep="\t") |
750 | 6b5a528c | Florent Chuffart | if (!only_fetch) { |
751 | 6b5a528c | Florent Chuffart | sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300) |
752 | 6b5a528c | Florent Chuffart | } |
753 | 6b5a528c | Florent Chuffart | } |
754 | 6b5a528c | Florent Chuffart | samples[[length(samples) + 1]] = sample |
755 | 6b5a528c | Florent Chuffart | } |
756 | 6b5a528c | Florent Chuffart | return(samples) |
757 | 6b5a528c | Florent Chuffart | } |
758 | 6b5a528c | Florent Chuffart | |
759 | 6b5a528c | Florent Chuffart | substract_region = function(# Substract to a list of regions an other list of regions that intersect it. |
760 | 6b5a528c | Florent Chuffart | ### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two. |
761 | 6b5a528c | Florent Chuffart | region1, ##<< Original regions. |
762 | 6b5a528c | Florent Chuffart | region2 ##<< Regions to substract. |
763 | 6b5a528c | Florent Chuffart | ) { |
764 | 6b5a528c | Florent Chuffart | rec_substract_region = function(region1, region2) { |
765 | 6b5a528c | Florent Chuffart | non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) { |
766 | 6b5a528c | Florent Chuffart | cur_fuzzy = region1[i,] |
767 | 6b5a528c | Florent Chuffart | inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,] |
768 | 6b5a528c | Florent Chuffart | if (length(inter_wp[,1]) > 0) { |
769 | 6b5a528c | Florent Chuffart | ret = c() |
770 | 6b5a528c | Florent Chuffart | for (j in 1:length(inter_wp[,1])) { |
771 | 6b5a528c | Florent Chuffart | cur_wp = inter_wp[j,] |
772 | 6b5a528c | Florent Chuffart | if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) { |
773 | 6b5a528c | Florent Chuffart | # remove cur_fuzzy |
774 | 6b5a528c | Florent Chuffart | ret = c() |
775 | 6b5a528c | Florent Chuffart | break |
776 | 6b5a528c | Florent Chuffart | } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) { |
777 | 6b5a528c | Florent Chuffart | # crop fuzzy |
778 | 6b5a528c | Florent Chuffart | cur_fuzzy$lower_bound = cur_wp$upper_bound + 1 |
779 | 6b5a528c | Florent Chuffart | ret = cur_fuzzy |
780 | 6b5a528c | Florent Chuffart | } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) { |
781 | 6b5a528c | Florent Chuffart | # crop fuzzy |
782 | 6b5a528c | Florent Chuffart | cur_fuzzy$upper_bound = cur_wp$lower_bound - 1 |
783 | 6b5a528c | Florent Chuffart | ret = cur_fuzzy |
784 | 6b5a528c | Florent Chuffart | } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) { |
785 | 6b5a528c | Florent Chuffart | # split fuzzy |
786 | 6b5a528c | Florent Chuffart | tmp_ret_fuzzy_1 = cur_fuzzy |
787 | 6b5a528c | Florent Chuffart | tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1 |
788 | 6b5a528c | Florent Chuffart | tmp_ret_fuzzy_2 = cur_fuzzy |
789 | 6b5a528c | Florent Chuffart | tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1 |
790 | 6b5a528c | Florent Chuffart | ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp) |
791 | 6b5a528c | Florent Chuffart | # print(ret) |
792 | 6b5a528c | Florent Chuffart | # ret = cur_fuzzy |
793 | 6b5a528c | Florent Chuffart | break |
794 | 6b5a528c | Florent Chuffart | } else { |
795 | 6b5a528c | Florent Chuffart | stop("WARNING NO ADAPTED CASE!") |
796 | 6b5a528c | Florent Chuffart | } |
797 | 6b5a528c | Florent Chuffart | } |
798 | 6b5a528c | Florent Chuffart | return(ret) |
799 | 6b5a528c | Florent Chuffart | } else { |
800 | 6b5a528c | Florent Chuffart | return(cur_fuzzy) |
801 | 6b5a528c | Florent Chuffart | } |
802 | 6b5a528c | Florent Chuffart | }) |
803 | 6b5a528c | Florent Chuffart | } |
804 | 6b5a528c | Florent Chuffart | non_inter_fuzzy = rec_substract_region(region1, region2) |
805 | 6b5a528c | Florent Chuffart | if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)} |
806 | b20637ed | Florent Chuffart | tmp_ulist = unlist(non_inter_fuzzy) |
807 | b20637ed | Florent Chuffart | tmp_names = names(tmp_ulist)[1:4] |
808 | b20637ed | Florent Chuffart | non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE) |
809 | b20637ed | Florent Chuffart | names(non_inter_fuzzy) = tmp_names |
810 | b20637ed | Florent Chuffart | non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr) |
811 | b20637ed | Florent Chuffart | non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr) |
812 | b20637ed | Florent Chuffart | non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound) |
813 | b20637ed | Florent Chuffart | non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound) |
814 | b20637ed | Florent Chuffart | non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),] |
815 | b20637ed | Florent Chuffart | return(non_inter_fuzzy) |
816 | 6b5a528c | Florent Chuffart | } |
817 | 6b5a528c | Florent Chuffart | |
818 | 6b5a528c | Florent Chuffart | union_regions = function(# Aggregate regions that intersect themnselves. |
819 | 6b5a528c | Florent Chuffart | ### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable. |
820 | 6b5a528c | Florent Chuffart | regions ##<< The Regions to be aggregated |
821 | 6b5a528c | Florent Chuffart | ) { |
822 | 6b5a528c | Florent Chuffart | old_length = length(regions[,1]) |
823 | 6b5a528c | Florent Chuffart | new_length = 0 |
824 | 6b5a528c | Florent Chuffart | |
825 | 6b5a528c | Florent Chuffart | while (old_length != new_length) { |
826 | 6b5a528c | Florent Chuffart | regions = regions[order(regions$lower_bound), ] |
827 | 6b5a528c | Florent Chuffart | regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 0, TRUE) |
828 | 6b5a528c | Florent Chuffart | vec_end_1 = which(regions$stop) |
829 | 6b5a528c | Florent Chuffart | if (length(vec_end_1) == 0) { |
830 | 6b5a528c | Florent Chuffart | vec_end_1 = c(length(regions$stop)) |
831 | 6b5a528c | Florent Chuffart | } |
832 | 6b5a528c | Florent Chuffart | if (vec_end_1[length(vec_end_1)] != length(regions$stop)) { |
833 | 6b5a528c | Florent Chuffart | vec_end_1 = c(vec_end_1, length(regions$stop)) |
834 | 6b5a528c | Florent Chuffart | } |
835 | 6b5a528c | Florent Chuffart | vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1) |
836 | 6b5a528c | Florent Chuffart | union = apply(t(1:length(vec_beg_1)), 2, function(i) { |
837 | 6b5a528c | Florent Chuffart | chr = regions$chr[vec_beg_1[i]] |
838 | 6b5a528c | Florent Chuffart | lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]]) |
839 | 6b5a528c | Florent Chuffart | upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]]) |
840 | 6b5a528c | Florent Chuffart | roi_index = regions$roi_index[vec_beg_1[i]] |
841 | 6b5a528c | Florent Chuffart | data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, roi_index=roi_index)) |
842 | 6b5a528c | Florent Chuffart | }) |
843 | 59ad95ca | Florent Chuffart | union = collapse_regions(union) |
844 | 6b5a528c | Florent Chuffart | old_length = length(regions[,1]) |
845 | 6b5a528c | Florent Chuffart | new_length = length(union[,1]) |
846 | 6b5a528c | Florent Chuffart | regions = union |
847 | 6b5a528c | Florent Chuffart | } |
848 | 6b5a528c | Florent Chuffart | return(union) |
849 | 6b5a528c | Florent Chuffart | } |
850 | 6b5a528c | Florent Chuffart | |
851 | 6b5a528c | Florent Chuffart | remove_aligned_wp = function(# Remove wp nucs from common nucs list. |
852 | 6b5a528c | Florent Chuffart | ### It is based on common wp nucs index on nucs and region. |
853 | 6b5a528c | Florent Chuffart | strain_maps, ##<< Nuc maps. |
854 | 6b5a528c | Florent Chuffart | roi_index, ##<< The region of interest index. |
855 | 6b5a528c | Florent Chuffart | tmp_common_nucs, ##<< the list of wp nucs. |
856 | 6b5a528c | Florent Chuffart | strain##<< The strain to consider. |
857 | 6b5a528c | Florent Chuffart | ){ |
858 | 6b5a528c | Florent Chuffart | fuzzy_nucs = strain_maps[[strain]] |
859 | 6b5a528c | Florent Chuffart | fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$roi_index == roi_index,] |
860 | 6b5a528c | Florent Chuffart | fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),] |
861 | 6b5a528c | Florent Chuffart | if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)} |
862 | 6b5a528c | Florent Chuffart | if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"} |
863 | 6b5a528c | Florent Chuffart | anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]] |
864 | 6b5a528c | Florent Chuffart | fuzzy_nucs = fuzzy_nucs[-anti_index_1,] |
865 | 6b5a528c | Florent Chuffart | return(fuzzy_nucs) |
866 | 6b5a528c | Florent Chuffart | } |
867 | 6b5a528c | Florent Chuffart | |
868 | 6b5a528c | Florent Chuffart | translate_regions = function(# Translate a list of regions from a strain ref to another. |
869 | 6b5a528c | Florent Chuffart | ### This function is an eloborated call to translate_roi. |
870 | 6b5a528c | Florent Chuffart | regions, ##<< Regions to be translated. |
871 | 6b5a528c | Florent Chuffart | combi, ##<< Combination of strains. |
872 | 6b5a528c | Florent Chuffart | roi_index, ##<< The region of interest index. |
873 | 1d833b97 | Florent Chuffart | config=NULL, ##<< GLOBAL config variable |
874 | 6b5a528c | Florent Chuffart | roi ##<< The region of interest. |
875 | 6b5a528c | Florent Chuffart | ) { |
876 | 6b5a528c | Florent Chuffart | tr_regions = apply(t(1:length(regions[,1])), 2, function(i) { |
877 | 6b5a528c | Florent Chuffart | tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2]) |
878 | 5a26597a | Florent Chuffart | big_roi = roi |
879 | 5a26597a | Florent Chuffart | trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], config = config, big_roi = big_roi) |
880 | 6b5a528c | Florent Chuffart | data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), roi_index=roi_index)) |
881 | 6b5a528c | Florent Chuffart | }) |
882 | 59ad95ca | Florent Chuffart | return(collapse_regions(tr_regions)) |
883 | 59ad95ca | Florent Chuffart | } |
884 | 59ad95ca | Florent Chuffart | |
885 | 59ad95ca | Florent Chuffart | collapse_regions = function(# reformat an "apply manipulated" list of regions |
886 | 59ad95ca | Florent Chuffart | ### Utils to reformat an "apply manipulated" list of regions |
887 | 59ad95ca | Florent Chuffart | regions ##< a list of regions |
888 | 59ad95ca | Florent Chuffart | ) { |
889 | b20637ed | Florent Chuffart | regions = do.call(rbind, regions) |
890 | 59ad95ca | Florent Chuffart | regions$chr = as.character(regions$chr) |
891 | 59ad95ca | Florent Chuffart | regions$chr = as.numeric(regions$chr) |
892 | 59ad95ca | Florent Chuffart | regions$lower_bound = as.numeric(regions$lower_bound) |
893 | 59ad95ca | Florent Chuffart | regions$upper_bound = as.numeric(regions$upper_bound) |
894 | 59ad95ca | Florent Chuffart | regions = regions[order(regions$lower_bound),] |
895 | 59ad95ca | Florent Chuffart | return(regions) |
896 | 6b5a528c | Florent Chuffart | } |
897 | 6b5a528c | Florent Chuffart | |
898 | 6b5a528c | Florent Chuffart | extract_wp = function(# Extract wp nucs from nuc map. |
899 | 6b5a528c | Florent Chuffart | ### Function based on common wp nuc index and roi_index. |
900 | 6b5a528c | Florent Chuffart | strain_maps, ##<< Nuc maps. |
901 | 6b5a528c | Florent Chuffart | roi_index, ##<< The region of interest index. |
902 | 6b5a528c | Florent Chuffart | strain, ##<< The strain to consider. |
903 | 6b5a528c | Florent Chuffart | tmp_common_nucs ##<< the list of wp nucs. |
904 | 6b5a528c | Florent Chuffart | ) { |
905 | 6b5a528c | Florent Chuffart | wp_nucs = apply(t(tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]), 2, function(i) { |
906 | 6b5a528c | Florent Chuffart | tmp_wp_nucs = strain_maps[[strain]] |
907 | 6b5a528c | Florent Chuffart | tmp_wp_nucs = tmp_wp_nucs[tmp_wp_nucs$roi_index == roi_index & tmp_wp_nucs$index_nuc == i,] |
908 | 6b5a528c | Florent Chuffart | return(tmp_wp_nucs) |
909 | 6b5a528c | Florent Chuffart | }) |
910 | 59ad95ca | Florent Chuffart | return(collapse_regions(wp_nucs)) |
911 | 6b5a528c | Florent Chuffart | } |
912 | 6b5a528c | Florent Chuffart | |
913 | 6b5a528c | Florent Chuffart | |
914 | 6b5a528c | Florent Chuffart | |
915 | 6b5a528c | Florent Chuffart | |
916 | 6b5a528c | Florent Chuffart | |
917 | 6b5a528c | Florent Chuffart | crop_fuzzy = function(# Crop bound of regions according to region of interest bound |
918 | 6b5a528c | Florent Chuffart | ### The fucntion is no more necessary since we remove "big_roi" bug in translate_roi function. |
919 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs, ##<< the regiuons to be croped. |
920 | 6b5a528c | Florent Chuffart | roi, ##<< The region of interest. |
921 | 1d833b97 | Florent Chuffart | strain, ##<< The strain to consider. |
922 | 1d833b97 | Florent Chuffart | config=NULL ##<< GLOBAL config variable |
923 | 6b5a528c | Florent Chuffart | ) { |
924 | 8e9facd8 | Florent Chuffart | tr_roi = translate_roi(roi, strain, config = config) |
925 | 6b5a528c | Florent Chuffart | tr_roi_begin = min(tr_roi$begin, tr_roi$end) |
926 | 6b5a528c | Florent Chuffart | tr_roi_end = max(tr_roi$begin, tr_roi$end) |
927 | 6b5a528c | Florent Chuffart | if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) { |
928 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin |
929 | 6b5a528c | Florent Chuffart | } |
930 | 6b5a528c | Florent Chuffart | if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) { |
931 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin |
932 | 6b5a528c | Florent Chuffart | } |
933 | 6b5a528c | Florent Chuffart | if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) { |
934 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end |
935 | 6b5a528c | Florent Chuffart | } |
936 | 6b5a528c | Florent Chuffart | if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) { |
937 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end |
938 | 6b5a528c | Florent Chuffart | } |
939 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,] |
940 | 6b5a528c | Florent Chuffart | return(tmp_fuzzy_nucs) |
941 | 6b5a528c | Florent Chuffart | } |
942 | 6b5a528c | Florent Chuffart | |
943 | 6b5a528c | Florent Chuffart | |
944 | 6b5a528c | Florent Chuffart | get_fuzzy = function(# Compute the fuzzy nucs. |
945 | 6b5a528c | Florent Chuffart | ### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting fuzzy regions. It will be take into account in the count read part og the pipeline. |
946 | 6b5a528c | Florent Chuffart | combi, ##<< The strain combination to consider. |
947 | 6b5a528c | Florent Chuffart | roi, ##<< The region of interest. |
948 | 6b5a528c | Florent Chuffart | roi_index, ##<< The region of interest index. |
949 | 6b5a528c | Florent Chuffart | strain_maps, ##<< Nuc maps. |
950 | 1d833b97 | Florent Chuffart | common_nuc_results, ##<< Common wp nuc maps |
951 | 1d833b97 | Florent Chuffart | config=NULL ##<< GLOBAL config variable |
952 | 6b5a528c | Florent Chuffart | ) { |
953 | 6b5a528c | Florent Chuffart | print(roi_index) |
954 | 6b5a528c | Florent Chuffart | PLOT = FALSE |
955 | 6b5a528c | Florent Chuffart | tmp_common_nucs = common_nuc_results[[paste(combi[1], combi[2], sep="_")]] |
956 | 6b5a528c | Florent Chuffart | tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$roi_index == roi_index, ] |
957 | 6b5a528c | Florent Chuffart | |
958 | 6b5a528c | Florent Chuffart | print(paste("Dealing with fuzzy from", combi[1])) |
959 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs_1 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[1]) |
960 | 1d833b97 | Florent Chuffart | tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1], config) |
961 | 6b5a528c | Florent Chuffart | if (length(tmp_fuzzy_nucs_1[,1]) == 0) {return(NULL)} |
962 | 6b5a528c | Florent Chuffart | agg_fuzzy_1 = union_regions(tmp_fuzzy_nucs_1) |
963 | 6b5a528c | Florent Chuffart | if (PLOT) for (i in 1:length(agg_fuzzy_1[,1])) { |
964 | 6b5a528c | Florent Chuffart | lines(c(agg_fuzzy_1[i,]$lower_bound, agg_fuzzy_1[i,]$upper_bound), c(+3.1,+3.1), col=2) |
965 | 6b5a528c | Florent Chuffart | } |
966 | 6b5a528c | Florent Chuffart | |
967 | 6b5a528c | Florent Chuffart | print(paste("Dealing with fuzzy from ", combi[2])) |
968 | 6b5a528c | Florent Chuffart | tmp_fuzzy_nucs_2 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[2]) |
969 | 6b5a528c | Florent Chuffart | if (length(tmp_fuzzy_nucs_2[,1]) == 0) {return(NULL)} |
970 | 6b5a528c | Florent Chuffart | agg_fuzzy_2 = union_regions(tmp_fuzzy_nucs_2) |
971 | 1d833b97 | Florent Chuffart | agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2], config) |
972 | ddd6e221 | Florent Chuffart | tr_agg_fuzzy_2 = translate_regions(agg_fuzzy_2, combi, roi_index, roi=roi, config=config) |
973 | 1d833b97 | Florent Chuffart | tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2], config) |
974 | 6b5a528c | Florent Chuffart | # tr_agg_fuzzy_2 = union_regions(tr_agg_fuzzy_2) |
975 | 6b5a528c | Florent Chuffart | if (PLOT) for (i in 1:length(tr_agg_fuzzy_2[,1])) { |
976 | 6b5a528c | Florent Chuffart | lines(c(tr_agg_fuzzy_2[i,]$lower_bound, tr_agg_fuzzy_2[i,]$upper_bound), c(+3.3,+3.3), col=2) |
977 | 6b5a528c | Florent Chuffart | } |
978 | 6b5a528c | Florent Chuffart | |
979 | 6b5a528c | Florent Chuffart | print("Dealing with fuzzy from both...") |
980 | 6b5a528c | Florent Chuffart | all_fuzzy = union_regions(rbind(agg_fuzzy_1, tr_agg_fuzzy_2)) |
981 | 6b5a528c | Florent Chuffart | if (PLOT) for (i in 1:length(all_fuzzy[,1])) { |
982 | 6b5a528c | Florent Chuffart | lines(c(all_fuzzy[i,]$lower_bound, all_fuzzy[i,]$upper_bound), c(+3.2, +3.2), col=1) |
983 | 6b5a528c | Florent Chuffart | } |
984 | 6b5a528c | Florent Chuffart | |
985 | 6b5a528c | Florent Chuffart | print(paste("Dealing with wp from", combi[1])) |
986 | 6b5a528c | Florent Chuffart | wp_nucs_1 = extract_wp(strain_maps, roi_index, combi[1], tmp_common_nucs) |
987 | 6b5a528c | Florent Chuffart | if (PLOT) for (i in 1:length(wp_nucs_1[,1])) { |
988 | 6b5a528c | Florent Chuffart | lines(c(wp_nucs_1[i,]$lower_bound, wp_nucs_1[i,]$upper_bound), c(+3.5,+3.5), col=3) |
989 | 6b5a528c | Florent Chuffart | } |
990 | 6b5a528c | Florent Chuffart | |
991 | 6b5a528c | Florent Chuffart | print(paste("Dealing with wp from", combi[2])) |
992 | 6b5a528c | Florent Chuffart | wp_nucs_2 = extract_wp(strain_maps, roi_index, combi[2], tmp_common_nucs) |
993 | ddd6e221 | Florent Chuffart | tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, roi_index, roi=roi, config=config) |
994 | 6b5a528c | Florent Chuffart | if (PLOT) for (i in 1:length(tr_wp_nucs_2[,1])) { |
995 | 6b5a528c | Florent Chuffart | lines(c(tr_wp_nucs_2[i,]$lower_bound, tr_wp_nucs_2[i,]$upper_bound), c(+3.7,+3.7), col=3) |
996 | 6b5a528c | Florent Chuffart | } |
997 | 6b5a528c | Florent Chuffart | |
998 | 6b5a528c | Florent Chuffart | print("Dealing with wp from both...") |
999 | 6b5a528c | Florent Chuffart | all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2)) |
1000 | 6b5a528c | Florent Chuffart | if (PLOT) for (i in 1:length(all_wp[,1])) { |
1001 | 6b5a528c | Florent Chuffart | lines(c(all_wp[i,]$lower_bound, all_wp[i,]$upper_bound), c(+3.6, +3.6), col=1) |
1002 | 6b5a528c | Florent Chuffart | } |
1003 | 6b5a528c | Florent Chuffart | |
1004 | 6b5a528c | Florent Chuffart | print("Dealing with fuzzy and wp...") |
1005 | 6b5a528c | Florent Chuffart | non_inter_fuzzy = substract_region(all_fuzzy, all_wp) |
1006 | 6b5a528c | Florent Chuffart | if (is.null(non_inter_fuzzy)) { return(NULL) } |
1007 | 6b5a528c | Florent Chuffart | |
1008 | 6b5a528c | Florent Chuffart | non_inter_fuzzy$len = non_inter_fuzzy$upper_bound - non_inter_fuzzy$lower_bound |
1009 | 6b5a528c | Florent Chuffart | # non_inter_fuzzy = non_inter_fuzzy[non_inter_fuzzy$len >= min_fuzz_width,] |
1010 | 6b5a528c | Florent Chuffart | if (PLOT) for (i in 1:length(non_inter_fuzzy[,1])) { |
1011 | 6b5a528c | Florent Chuffart | lines(c(non_inter_fuzzy[i,]$lower_bound, non_inter_fuzzy[i,]$upper_bound), c(+3.9, +3.9), col=1) |
1012 | 6b5a528c | Florent Chuffart | } |
1013 | 6b5a528c | Florent Chuffart | |
1014 | 6b5a528c | Florent Chuffart | non_inter_fuzzy$index_nuc = 1:length(non_inter_fuzzy[,1]) |
1015 | 6b5a528c | Florent Chuffart | return (non_inter_fuzzy) |
1016 | 6b5a528c | Florent Chuffart | } |
1017 | 6b5a528c | Florent Chuffart | |
1018 | 6b5a528c | Florent Chuffart | |
1019 | 6b5a528c | Florent Chuffart | |
1020 | 6b5a528c | Florent Chuffart | get_all_reads = function(# Retrieve Reads |
1021 | 6b5a528c | Florent Chuffart | ### Retrieve reads for a given marker, combi, form. |
1022 | 6b5a528c | Florent Chuffart | marker, ##<< The marker to considere. |
1023 | 6b5a528c | Florent Chuffart | combi, ##<< The starin combination to considere. |
1024 | cc54c799 | Florent Chuffart | form="wp", ##<< The nuc form to considere. |
1025 | cc54c799 | Florent Chuffart | config=NULL ##<< GLOBAL config variable |
1026 | 6b5a528c | Florent Chuffart | ) { |
1027 | 6b5a528c | Florent Chuffart | all_reads = NULL |
1028 | 6b5a528c | Florent Chuffart | for (manip in c("Mnase_Seq", marker)) { |
1029 | 6b5a528c | Florent Chuffart | if (form == "fuzzy") { |
1030 | cc54c799 | Florent Chuffart | out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_fuzzy_and_nbreads.tab",sep="") |
1031 | 6b5a528c | Florent Chuffart | tmp_res = read.table(file=out_filename, header=TRUE) |
1032 | 6b5a528c | Florent Chuffart | tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,] |
1033 | 6b5a528c | Florent Chuffart | tmp_res$form = form |
1034 | 6b5a528c | Florent Chuffart | } else if (form == "wp") { |
1035 | cc54c799 | Florent Chuffart | out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="") |
1036 | 6b5a528c | Florent Chuffart | tmp_res = read.table(file=out_filename, header=TRUE) |
1037 | 6b5a528c | Florent Chuffart | tmp_res$form = form |
1038 | 6b5a528c | Florent Chuffart | } else if (form == "wpfuzzy") { |
1039 | cc54c799 | Florent Chuffart | out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="") |
1040 | 6b5a528c | Florent Chuffart | tmp_res = read.table(file=out_filename, header=TRUE) |
1041 | 6b5a528c | Florent Chuffart | tmp_res$form = "wp" |
1042 | cc54c799 | Florent Chuffart | out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_fuzzy_and_nbreads.tab",sep="") |
1043 | 6b5a528c | Florent Chuffart | tmp_res2 = read.table(file=out_filename, header=TRUE) |
1044 | 6b5a528c | Florent Chuffart | tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,] |
1045 | 6b5a528c | Florent Chuffart | tmp_res2$form = "fuzzy" |
1046 | 6b5a528c | Florent Chuffart | tmp_res = rbind(tmp_res, tmp_res2) |
1047 | 6b5a528c | Florent Chuffart | } |
1048 | 6b5a528c | Florent Chuffart | if (is.null(all_reads)) { |
1049 | 6b5a528c | Florent Chuffart | all_reads = tmp_res[,c(1:9,length(tmp_res))] |
1050 | 6b5a528c | Florent Chuffart | } |
1051 | 6b5a528c | Florent Chuffart | tmp_res = tmp_res[,-c(1:9,length(tmp_res))] |
1052 | 6b5a528c | Florent Chuffart | all_reads = cbind(all_reads, tmp_res) |
1053 | 6b5a528c | Florent Chuffart | } |
1054 | 6b5a528c | Florent Chuffart | return(all_reads) |
1055 | 6b5a528c | Florent Chuffart | } |
1056 | 6b5a528c | Florent Chuffart | |
1057 | 6b5a528c | Florent Chuffart | get_design = function(# Build the design for deseq |
1058 | 6b5a528c | Florent Chuffart | ### This function build the design according sample properties. |
1059 | 6b5a528c | Florent Chuffart | marker, ##<< The marker to considere. |
1060 | 6b5a528c | Florent Chuffart | combi, ##<< The starin combination to considere. |
1061 | 6b5a528c | Florent Chuffart | all_samples ##<< Global list of samples. |
1062 | 6b5a528c | Florent Chuffart | ) { |
1063 | 6b5a528c | Florent Chuffart | off1 = 0 |
1064 | 6b5a528c | Florent Chuffart | off2 = 0 |
1065 | 6b5a528c | Florent Chuffart | manips = c("Mnase_Seq", marker) |
1066 | 6b5a528c | Florent Chuffart | design_rownames = c() |
1067 | 6b5a528c | Florent Chuffart | design_manip = c() |
1068 | 6b5a528c | Florent Chuffart | design_strain = c() |
1069 | 6b5a528c | Florent Chuffart | off2index = function(off) { |
1070 | 6b5a528c | Florent Chuffart | switch(toString(off), |
1071 | 6b5a528c | Florent Chuffart | "1"=c(0,1,1), |
1072 | 6b5a528c | Florent Chuffart | "2"=c(1,0,1), |
1073 | 6b5a528c | Florent Chuffart | "3"=c(1,1,0), |
1074 | 6b5a528c | Florent Chuffart | c(1,1,1) |
1075 | 6b5a528c | Florent Chuffart | ) |
1076 | 6b5a528c | Florent Chuffart | } |
1077 | 6b5a528c | Florent Chuffart | for (manip in manips) { |
1078 | 6b5a528c | Florent Chuffart | tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) & all_samples$marker == manip), ] |
1079 | 6b5a528c | Florent Chuffart | tmp_samples = tmp_samples[order(tmp_samples$strain), ] |
1080 | 6b5a528c | Florent Chuffart | if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) { |
1081 | 6b5a528c | Florent Chuffart | tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,] |
1082 | 6b5a528c | Florent Chuffart | } else { |
1083 | 6b5a528c | Florent Chuffart | if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) { |
1084 | 6b5a528c | Florent Chuffart | tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,] |
1085 | 6b5a528c | Florent Chuffart | } |
1086 | 6b5a528c | Florent Chuffart | } |
1087 | 6b5a528c | Florent Chuffart | design_manip = c(design_manip, rep(manip, length(tmp_samples$id))) |
1088 | 6b5a528c | Florent Chuffart | for (strain in combi) { |
1089 | 6b5a528c | Florent Chuffart | cols = apply(t(tmp_samples[ (tmp_samples$strain == strain & tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")}) |
1090 | 6b5a528c | Florent Chuffart | design_strain = c(design_strain, rep(strain, length(cols))) |
1091 | 6b5a528c | Florent Chuffart | design_rownames = c(design_rownames, cols) |
1092 | 6b5a528c | Florent Chuffart | } |
1093 | 6b5a528c | Florent Chuffart | } |
1094 | 6b5a528c | Florent Chuffart | snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain) |
1095 | 6b5a528c | Florent Chuffart | return(snep_design) |
1096 | 6b5a528c | Florent Chuffart | } |
1097 | 6b5a528c | Florent Chuffart | |
1098 | 6b5a528c | Florent Chuffart | plot_dist_samples = function(# Plot the distribution of reads. |
1099 | 6b5a528c | Florent Chuffart | ### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution. |
1100 | 6b5a528c | Florent Chuffart | strain, ##<< The strain to considere. |
1101 | 6b5a528c | Florent Chuffart | marker, ##<< The marker to considere. |
1102 | 6b5a528c | Florent Chuffart | res, ##<< Data |
1103 | 6b5a528c | Florent Chuffart | all_samples, ##<< Global list of samples. |
1104 | 6b5a528c | Florent Chuffart | NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot. |
1105 | 6b5a528c | Florent Chuffart | ) { |
1106 | 6b5a528c | Florent Chuffart | cols = apply(t(all_samples[ (all_samples$strain == strain & all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")}) |
1107 | 6b5a528c | Florent Chuffart | snepCountTable = res[,cols] |
1108 | 6b5a528c | Florent Chuffart | snepDesign = data.frame( |
1109 | 6b5a528c | Florent Chuffart | row.names = cols, |
1110 | 6b5a528c | Florent Chuffart | manip = rep(marker, length(cols)), |
1111 | 6b5a528c | Florent Chuffart | strain = rep(strain, length(cols)) |
1112 | 6b5a528c | Florent Chuffart | ) |
1113 | 6b5a528c | Florent Chuffart | cdsFull = newCountDataSet(snepCountTable, snepDesign) |
1114 | 6b5a528c | Florent Chuffart | sizeFactors = estimateSizeFactors(cdsFull) |
1115 | 6b5a528c | Florent Chuffart | # print(sizeFactors[[1]]) |
1116 | 6b5a528c | Florent Chuffart | sample_ids = all_samples[ (all_samples$strain == strain & all_samples$marker == marker), ]$id |
1117 | 6b5a528c | Florent Chuffart | if (NEWPLOT) { |
1118 | 6b5a528c | Florent Chuffart | plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker)) |
1119 | 6b5a528c | Florent Chuffart | NEWPLOT = FALSE |
1120 | 6b5a528c | Florent Chuffart | } |
1121 | 6b5a528c | Florent Chuffart | for (it in 1:length(sample_ids)) { |
1122 | 6b5a528c | Florent Chuffart | sample_id = sample_ids[it] |
1123 | 6b5a528c | Florent Chuffart | lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it) |
1124 | 6b5a528c | Florent Chuffart | } |
1125 | 7164d3ac | Florent Chuffart | legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols) |
1126 | 6b5a528c | Florent Chuffart | } |
1127 | 6b5a528c | Florent Chuffart | |
1128 | 6b5a528c | Florent Chuffart | analyse_design = function(# Launch deseq methods. |
1129 | 6b5a528c | Florent Chuffart | ### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models. |
1130 | 6b5a528c | Florent Chuffart | snep_design, ##<< The design to considere. |
1131 | 6b5a528c | Florent Chuffart | reads ##<< The data to considere. |
1132 | 6b5a528c | Florent Chuffart | ) { |
1133 | 6b5a528c | Florent Chuffart | snep_count_table = reads[, rownames(snep_design)] |
1134 | 6b5a528c | Florent Chuffart | cdsFull = newCountDataSet(snep_count_table, snep_design) |
1135 | 6b5a528c | Florent Chuffart | cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum") |
1136 | 6b5a528c | Florent Chuffart | fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain) |
1137 | 6b5a528c | Florent Chuffart | |
1138 | 6b5a528c | Florent Chuffart | cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum") |
1139 | 6b5a528c | Florent Chuffart | fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain) |
1140 | 6b5a528c | Florent Chuffart | |
1141 | 6b5a528c | Florent Chuffart | pvalsGLM = nbinomGLMTest( fit1, fit0 ) |
1142 | 6b5a528c | Florent Chuffart | return(list(fit1, fit0, snep_design, pvalsGLM)) |
1143 | 6b5a528c | Florent Chuffart | } |
1144 | 6b5a528c | Florent Chuffart | |
1145 | 6b5a528c | Florent Chuffart | |
1146 | 6b5a528c | Florent Chuffart | |
1147 | 6b5a528c | Florent Chuffart | |
1148 | 6b5a528c | Florent Chuffart | |
1149 | 6b5a528c | Florent Chuffart | |
1150 | 6b5a528c | Florent Chuffart | |
1151 | 6b5a528c | Florent Chuffart | |
1152 | 6b5a528c | Florent Chuffart | get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form. |
1153 | 6b5a528c | Florent Chuffart | ### This function uses |
1154 | 6b5a528c | Florent Chuffart | marker, ##<< The marker involved. |
1155 | 6b5a528c | Florent Chuffart | combi, ##<< The strain combination involved. |
1156 | 6b5a528c | Florent Chuffart | form, ##<< the nuc form involved. |
1157 | cc54c799 | Florent Chuffart | all_samples, ##<< Global list of samples. |
1158 | cc54c799 | Florent Chuffart | config=NULL ##<< GLOBAL config variable |
1159 | 6b5a528c | Florent Chuffart | ) { |
1160 | 6b5a528c | Florent Chuffart | # PRETREAT |
1161 | 6b5a528c | Florent Chuffart | d = get_design(marker, combi, all_samples) |
1162 | cc54c799 | Florent Chuffart | reads = get_all_reads(marker, combi, form, config=config) |
1163 | 6b5a528c | Florent Chuffart | # RUN ANALYSE |
1164 | 6b5a528c | Florent Chuffart | tmp_analyse = analyse_design(d, reads) |
1165 | 6b5a528c | Florent Chuffart | # RESULTS |
1166 | 6b5a528c | Florent Chuffart | fit1 = tmp_analyse[[1]] |
1167 | 6b5a528c | Florent Chuffart | fit0 = tmp_analyse[[2]] |
1168 | 6b5a528c | Florent Chuffart | k = names(fit1) |
1169 | 6b5a528c | Florent Chuffart | reads[[k[2]]] = signif(fit1[[k[2]]], 5) |
1170 | 6b5a528c | Florent Chuffart | reads[[k[3]]] = signif(fit1[[k[3]]], 5) |
1171 | 6b5a528c | Florent Chuffart | reads[[k[4]]] = signif(fit1[[k[4]]], 5) |
1172 | 6b5a528c | Florent Chuffart | reads$pvalsGLM = signif(tmp_analyse[[4]], 5) |
1173 | 6b5a528c | Florent Chuffart | snep_design = tmp_analyse[[3]] |
1174 | 6b5a528c | Florent Chuffart | print(snep_design) |
1175 | 6b5a528c | Florent Chuffart | fdr = 0.0001 |
1176 | 6b5a528c | Florent Chuffart | thres = FDR(reads$pvalsGLM, fdr) |
1177 | 6b5a528c | Florent Chuffart | reads$snep_index = reads$pvalsGLM < thres |
1178 | 6b5a528c | Florent Chuffart | print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = "")) |
1179 | 6b5a528c | Florent Chuffart | return(reads) |
1180 | 6b5a528c | Florent Chuffart | }, ex=function(){ |
1181 | 6b5a528c | Florent Chuffart | marker = "H3K4me1" |
1182 | 6b5a528c | Florent Chuffart | combi = c("BY", "YJM") |
1183 | 6b5a528c | Florent Chuffart | form = "wpfuzzy" # "wp" | "fuzzy" | "wpfuzzy" |
1184 | 6b5a528c | Florent Chuffart | # foo = get_sneps(marker, combi, form) |
1185 | 6b5a528c | Florent Chuffart | # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp") |
1186 | 6b5a528c | Florent Chuffart | }) |
1187 | 6b5a528c | Florent Chuffart | |
1188 | 6b5a528c | Florent Chuffart | |
1189 | 6b5a528c | Florent Chuffart | |
1190 | 6b5a528c | Florent Chuffart | |
1191 | 6b5a528c | Florent Chuffart | |
1192 | 6b5a528c | Florent Chuffart | |
1193 | 6b5a528c | Florent Chuffart | |
1194 | 6b5a528c | Florent Chuffart | |
1195 | 6b5a528c | Florent Chuffart | |
1196 | 6b5a528c | Florent Chuffart | |
1197 | 6b5a528c | Florent Chuffart | |
1198 | 6b5a528c | Florent Chuffart | |
1199 | 6b5a528c | Florent Chuffart | |
1200 | 6b5a528c | Florent Chuffart | |
1201 | 6b5a528c | Florent Chuffart | |
1202 | 6b5a528c | Florent Chuffart | |
1203 | 6b5a528c | Florent Chuffart | |
1204 | 6b5a528c | Florent Chuffart | |
1205 | 6b5a528c | Florent Chuffart | |
1206 | 6b5a528c | Florent Chuffart | |
1207 | 6b5a528c | Florent Chuffart | |
1208 | 6b5a528c | Florent Chuffart | |
1209 | 6b5a528c | Florent Chuffart | |
1210 | 6b5a528c | Florent Chuffart | |
1211 | 1d833b97 | Florent Chuffart | ROM2ARAB = function(# Roman to Arabic pair list. |
1212 | 1d833b97 | Florent Chuffart | ### Util to convert Roman to Arabic |
1213 | 1d833b97 | Florent Chuffart | ){list( |
1214 | 1d833b97 | Florent Chuffart | "I" = 1, |
1215 | 1d833b97 | Florent Chuffart | "II" = 2, |
1216 | 1d833b97 | Florent Chuffart | "III" = 3, |
1217 | 1d833b97 | Florent Chuffart | "IV" = 4, |
1218 | 1d833b97 | Florent Chuffart | "V" = 5, |
1219 | 1d833b97 | Florent Chuffart | "VI" = 6, |
1220 | 1d833b97 | Florent Chuffart | "VII" = 7, |
1221 | 1d833b97 | Florent Chuffart | "VIII" = 8, |
1222 | 1d833b97 | Florent Chuffart | "IX" = 9, |
1223 | 1d833b97 | Florent Chuffart | "X" = 10, |
1224 | 1d833b97 | Florent Chuffart | "XI" = 11, |
1225 | 1d833b97 | Florent Chuffart | "XII" = 12, |
1226 | 1d833b97 | Florent Chuffart | "XIII" = 13, |
1227 | 1d833b97 | Florent Chuffart | "XIV" = 14, |
1228 | 1d833b97 | Florent Chuffart | "XV" = 15, |
1229 | 1d833b97 | Florent Chuffart | "XVI" = 16, |
1230 | 1d833b97 | Florent Chuffart | "XVII" = 17, |
1231 | 1d833b97 | Florent Chuffart | "XVIII" = 18, |
1232 | 1d833b97 | Florent Chuffart | "XIX" = 19, |
1233 | 1d833b97 | Florent Chuffart | "XX" = 20 |
1234 | 1d833b97 | Florent Chuffart | )} |
1235 | 1d833b97 | Florent Chuffart | |
1236 | 1d833b97 | Florent Chuffart | switch_pairlist = structure(function(# Switch a pairlist |
1237 | 1d833b97 | Florent Chuffart | ### Take a pairlist key:value and return the switched pairlist value:key. |
1238 | 1d833b97 | Florent Chuffart | l ##<< The pairlist to switch. |
1239 | 1d833b97 | Florent Chuffart | ) { |
1240 | 1d833b97 | Florent Chuffart | ret = list() |
1241 | 1d833b97 | Florent Chuffart | for (name in names(l)) { |
1242 | 1d833b97 | Florent Chuffart | ret[[as.character(l[[name]])]] = name |
1243 | 1d833b97 | Florent Chuffart | } |
1244 | 1d833b97 | Florent Chuffart | ret |
1245 | 1d833b97 | Florent Chuffart | ### The switched pairlist. |
1246 | 1d833b97 | Florent Chuffart | }, ex=function(){ |
1247 | 1d833b97 | Florent Chuffart | l = list(key1 = "value1", key2 = "value2") |
1248 | 1d833b97 | Florent Chuffart | print(switch_pairlist(l)) |
1249 | 1d833b97 | Florent Chuffart | }) |
1250 | 1d833b97 | Florent Chuffart | |
1251 | 1d833b97 | Florent Chuffart | ARAB2ROM = function(# Arabic to Roman pair list. |
1252 | 1d833b97 | Florent Chuffart | ### Util to convert Arabicto Roman |
1253 | 1d833b97 | Florent Chuffart | ){switch_pairlist(ROM2ARAB())} |
1254 | 1d833b97 | Florent Chuffart | |
1255 | 1d833b97 | Florent Chuffart | |
1256 | 1d833b97 | Florent Chuffart | |
1257 | 1d833b97 | Florent Chuffart | |
1258 | 1d833b97 | Florent Chuffart | translate_roi = structure(function(# Translate coords of a genome region. |
1259 | 1d833b97 | Florent Chuffart | ### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example. |
1260 | 1d833b97 | Florent Chuffart | roi, ##<< Original genome region of interest. |
1261 | 1d833b97 | Florent Chuffart | strain2, ##<< The strain in wich you want the genome region of interest. |
1262 | 1d833b97 | Florent Chuffart | config=NULL, ##<< GLOBAL config variable |
1263 | 1d833b97 | Florent Chuffart | big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed. |
1264 | 1d833b97 | Florent Chuffart | ) { |
1265 | 1d833b97 | Florent Chuffart | strain1 = roi$strain_ref |
1266 | 1d833b97 | Florent Chuffart | reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM" |
1267 | 1d833b97 | Florent Chuffart | if (strain1 == strain2) { |
1268 | 1d833b97 | Florent Chuffart | roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1 |
1269 | 1d833b97 | Florent Chuffart | return(roi) |
1270 | 1d833b97 | Florent Chuffart | } |
1271 | 1d833b97 | Florent Chuffart | # Launch c2c file |
1272 | 1d833b97 | Florent Chuffart | if (reverse) { |
1273 | 1d833b97 | Florent Chuffart | c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]) |
1274 | 1d833b97 | Florent Chuffart | } else { |
1275 | 1d833b97 | Florent Chuffart | c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]) |
1276 | 1d833b97 | Florent Chuffart | } |
1277 | 1d833b97 | Florent Chuffart | c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE) |
1278 | 1d833b97 | Florent Chuffart | # filtering it |
1279 | 1d833b97 | Florent Chuffart | c2c = c2c[c2c$V6=="-",] |
1280 | 1d833b97 | Florent Chuffart | # Reverse |
1281 | 1d833b97 | Florent Chuffart | if (reverse) { |
1282 | 1d833b97 | Florent Chuffart | tmp_col = c2c$V1 |
1283 | 1d833b97 | Florent Chuffart | c2c$V1 = c2c$V7 |
1284 | 1d833b97 | Florent Chuffart | c2c$V7 = tmp_col |
1285 | 1d833b97 | Florent Chuffart | tmp_col = c2c$V2 |
1286 | 1d833b97 | Florent Chuffart | c2c$V2 = c2c$V9 |
1287 | 1d833b97 | Florent Chuffart | c2c$V9 = tmp_col |
1288 | 1d833b97 | Florent Chuffart | tmp_col = c2c$V3 |
1289 | 1d833b97 | Florent Chuffart | c2c$V3 = c2c$V10 |
1290 | 1d833b97 | Florent Chuffart | c2c$V10 = tmp_col |
1291 | 1d833b97 | Florent Chuffart | } |
1292 | 1d833b97 | Florent Chuffart | # Restrict c2c to big_roi |
1293 | 1d833b97 | Florent Chuffart | # if (FALSE) { |
1294 | 1d833b97 | Florent Chuffart | if (!is.null(big_roi)) { |
1295 | 62d69d53 | Florent Chuffart | if (roi$strain_ref != big_roi$strain_ref) { |
1296 | 1e34bb1f | Florent Chuffart | # print("WARNING, big_roi and roi not in the same strain_ref... translating big_roi.") |
1297 | 62d69d53 | Florent Chuffart | big_roi = translate_roi(big_roi, roi$strain_ref, config=config) |
1298 | 62d69d53 | Florent Chuffart | } |
1299 | 62d69d53 | Florent Chuffart | if (big_roi$end < big_roi$begin) { |
1300 | 62d69d53 | Florent Chuffart | tmp_var = big_roi$begin |
1301 | 62d69d53 | Florent Chuffart | big_roi$begin = big_roi$end |
1302 | 62d69d53 | Florent Chuffart | big_roi$end = tmp_var |
1303 | 62d69d53 | Florent Chuffart | big_roi$length = big_roi$length |
1304 | 62d69d53 | Florent Chuffart | } |
1305 | ded736c8 | Florent Chuffart | if (big_roi$chr!=roi$chr | roi$end > big_roi$end | roi$end < big_roi$begin | roi$begin > big_roi$end | roi$begin < big_roi$begin) { |
1306 | ded736c8 | Florent Chuffart | print("WARNING! Trying to translate a roi not included in a big_roi.") |
1307 | ded736c8 | Florent Chuffart | return(NULL) |
1308 | ded736c8 | Florent Chuffart | } |
1309 | 62d69d53 | Florent Chuffart | if (strain1 == "BY") { |
1310 | 62d69d53 | Florent Chuffart | big_chro_1 = paste("chr", ARAB2ROM()[[big_roi$chr]], sep="") |
1311 | 62d69d53 | Florent Chuffart | } else if (strain1 == "RM") { |
1312 | 62d69d53 | Florent Chuffart | big_chro_1 = paste("supercontig_1.",big_roi$chr,sep="") |
1313 | 62d69d53 | Florent Chuffart | } else if (strain1 == "YJM") { |
1314 | 62d69d53 | Florent Chuffart | big_chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[big_roi$chr]] |
1315 | 1d833b97 | Florent Chuffart | } |
1316 | 62d69d53 | Florent Chuffart | big_begin_1 = big_roi$begin |
1317 | 62d69d53 | Florent Chuffart | big_end_1 = big_roi$end |
1318 | 62d69d53 | Florent Chuffart | c2c = c2c[c2c$V1==big_chro_1,] |
1319 | 62d69d53 | Florent Chuffart | if (length(c2c[c2c$V3 < big_begin_1 & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < big_begin_1 & c2c$V2 < c2c$V3,c("V2", "V3") ] = big_begin_1} |
1320 | 62d69d53 | Florent Chuffart | if (length(c2c[c2c$V2 > big_end_1 & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > big_end_1 & c2c$V2 < c2c$V3, c("V2", "V3")] = big_end_1} |
1321 | 62d69d53 | Florent Chuffart | if (length(c2c[c2c$V2 < big_begin_1 & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < big_begin_1 & c2c$V3 < c2c$V2,c("V2", "V3") ] = big_begin_1} |
1322 | 62d69d53 | Florent Chuffart | if (length(c2c[c2c$V3 > big_end_1 & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > big_end_1 & c2c$V3 < c2c$V2, c("V2", "V3")] = big_end_1} |
1323 | 62d69d53 | Florent Chuffart | c2c = c2c[c2c$V2 - c2c$V3 != 0,] |
1324 | 1d833b97 | Florent Chuffart | } |
1325 | 1d833b97 | Florent Chuffart | # Convert initial roi$chr into c2c format |
1326 | 1d833b97 | Florent Chuffart | if (strain1 == "BY") { |
1327 | 1d833b97 | Florent Chuffart | chro_1 = paste("chr", ARAB2ROM()[[roi$chr]], sep="") |
1328 | 1d833b97 | Florent Chuffart | } else if (strain1 == "RM") { |
1329 | 1d833b97 | Florent Chuffart | chro_1 = paste("supercontig_1.",roi$chr,sep="") |
1330 | 1d833b97 | Florent Chuffart | } else if (strain1 == "YJM") { |
1331 | 1d833b97 | Florent Chuffart | chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[roi$chr]] |
1332 | 1d833b97 | Florent Chuffart | } |
1333 | 1d833b97 | Florent Chuffart | begin_1 = roi$begin |
1334 | 1d833b97 | Florent Chuffart | end_1 = roi$end |
1335 | 1d833b97 | Florent Chuffart | # Computing equivalent strain_2 alignment coordinates |
1336 | 1d833b97 | Florent Chuffart | if (reverse) { |
1337 | 1d833b97 | Florent Chuffart | tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),] |
1338 | 1d833b97 | Florent Chuffart | tmptransfostop = c2c[c2c$V1==chro_1 & ((c2c$V3>=end_1 & c2c$V2<=end_1 & c2c$V8==1) | (c2c$V2>=end_1 & c2c$V3<=end_1 & c2c$V8==-1)),] |
1339 | 1d833b97 | Florent Chuffart | } else { |
1340 | 1d833b97 | Florent Chuffart | tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,] |
1341 | 1d833b97 | Florent Chuffart | tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,] |
1342 | 1d833b97 | Florent Chuffart | } |
1343 | 1d833b97 | Florent Chuffart | # Never happend conditions ... |
1344 | 1d833b97 | Florent Chuffart | { |
1345 | 1d833b97 | Florent Chuffart | if (length(tmptransfostart$V8) == 0) { |
1346 | 1d833b97 | Florent Chuffart | # begin_1 is between to lines: shift begin_1 to the start of 2nd line. |
1347 | 8c87ee64 | Florent Chuffart | if (sum(c2c$V3 >= c2c$V2) != 0) { |
1348 | 54cc8283 | Florent Chuffart | tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2>=begin_1,] |
1349 | f56c14b2 | Florent Chuffart | begin_1 = min(tmp_c2c$V2) |
1350 | 54cc8283 | Florent Chuffart | } else { |
1351 | 5a26597a | Florent Chuffart | tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1,] |
1352 | f56c14b2 | Florent Chuffart | begin_1 = min(tmp_c2c$V3) |
1353 | 54cc8283 | Florent Chuffart | } |
1354 | 1d833b97 | Florent Chuffart | if (reverse) { |
1355 | 1d833b97 | Florent Chuffart | tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),] |
1356 | 1d833b97 | Florent Chuffart | } else { |
1357 | 1d833b97 | Florent Chuffart | tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,] |
1358 | 1d833b97 | Florent Chuffart | } |
1359 | 1d833b97 | Florent Chuffart | if (length(tmptransfostart$V8) == 0) { |
1360 | 1d833b97 | Florent Chuffart | if (!is.null(big_roi)) { |
1361 | 1d833b97 | Florent Chuffart | return(NULL) |
1362 | 1d833b97 | Florent Chuffart | tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$begin & c2c$V2<=big_roi$begin,] |
1363 | 1d833b97 | Florent Chuffart | } else { |
1364 | 1d833b97 | Florent Chuffart | # return(NULL) |
1365 | 1d833b97 | Florent Chuffart | # print(c2c[c2c$V1==chro_1 & c2c$V2<=end_1 & c2c$V3>=begin_1,]) |
1366 | 1d833b97 | Florent Chuffart | # print(c2c[c2c$V1==chro_1,]) |
1367 | 1d833b97 | Florent Chuffart | print(tmptransfostart) |
1368 | 1d833b97 | Florent Chuffart | print(tmptransfostop) |
1369 | 1d833b97 | Florent Chuffart | stop("Never happend condition 1.") |
1370 | 1d833b97 | Florent Chuffart | } |
1371 | 1d833b97 | Florent Chuffart | } |
1372 | 1d833b97 | Florent Chuffart | } |
1373 | 1d833b97 | Florent Chuffart | if (length(tmptransfostop$V8) == 0) { |
1374 | 1d833b97 | Florent Chuffart | # end_1 is between to lines: shift end_1 to the end of 2nd line. |
1375 | 8c87ee64 | Florent Chuffart | if (sum(c2c$V3 >= c2c$V2) != 0) { |
1376 | 54cc8283 | Florent Chuffart | tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3<=end_1,] |
1377 | f56c14b2 | Florent Chuffart | end_1 = max(tmp_c2c$V3) |
1378 | 54cc8283 | Florent Chuffart | } else { |
1379 | 5a26597a | Florent Chuffart | tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2<=end_1,] |
1380 | f56c14b2 | Florent Chuffart | end_1 = max(tmp_c2c$V2) |
1381 | 54cc8283 | Florent Chuffart | } |
1382 | 1d833b97 | Florent Chuffart | if (reverse) { |
1383 | 1d833b97 | Florent Chuffart | tmptransfostop = c2c[c2c$V1==chro_1 & ((c2c$V3>=end_1 & c2c$V2<=end_1 & c2c$V8==1) | (c2c$V2>=end_1 & c2c$V3<=end_1 & c2c$V8==-1)),] |
1384 | 1d833b97 | Florent Chuffart | } else { |
1385 | 1d833b97 | Florent Chuffart | tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,] |
1386 | 1d833b97 | Florent Chuffart | } |
1387 | 1d833b97 | Florent Chuffart | if (length(tmptransfostop$V8) == 0) { |
1388 | 1d833b97 | Florent Chuffart | if (!is.null(big_roi)) { |
1389 | 1d833b97 | Florent Chuffart | return(NULL) |
1390 | 1d833b97 | Florent Chuffart | tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$end & c2c$V2<=big_roi$end,] |
1391 | 1d833b97 | Florent Chuffart | } else { |
1392 | 1d833b97 | Florent Chuffart | # return(NULL) |
1393 | 1d833b97 | Florent Chuffart | print(c2c[c2c$V1==chro_1,]) |
1394 | 1d833b97 | Florent Chuffart | print(tmptransfostart) |
1395 | 1d833b97 | Florent Chuffart | print(tmptransfostop) |
1396 | 1d833b97 | Florent Chuffart | stop("Never happend condition 2.") |
1397 | 1d833b97 | Florent Chuffart | } |
1398 | 1d833b97 | Florent Chuffart | } |
1399 | 1d833b97 | Florent Chuffart | } |
1400 | 1d833b97 | Florent Chuffart | if (length(tmptransfostart$V8) != 1) { |
1401 | 1d833b97 | Florent Chuffart | # tmptransfostart = tmptransfostart[1,] |
1402 | 1d833b97 | Florent Chuffart | # print("many start") |
1403 | 1d833b97 | Florent Chuffart | # print(c2c[c2c$V1==chro_1,]) |
1404 | 1d833b97 | Florent Chuffart | tmptransfostart = tmptransfostart[tmptransfostart$V1==chro_1 & tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,] |
1405 | 1d833b97 | Florent Chuffart | if (length(tmptransfostart$V8) != 1) { |
1406 | 1d833b97 | Florent Chuffart | # return(NULL) |
1407 | 1d833b97 | Florent Chuffart | print(tmptransfostart) |
1408 | 1d833b97 | Florent Chuffart | print(tmptransfostop) |
1409 | 1d833b97 | Florent Chuffart | stop("Never happend condition 3.") |
1410 | 1d833b97 | Florent Chuffart | } |
1411 | 1d833b97 | Florent Chuffart | } |
1412 | 1d833b97 | Florent Chuffart | if (length(tmptransfostop$V8) != 1) { |
1413 | 1d833b97 | Florent Chuffart | # tmptransfostop = tmptransfostop[length(tmptransfostop$V8),] |
1414 | 1d833b97 | Florent Chuffart | # print("many stop") |
1415 | 1d833b97 | Florent Chuffart | # print(tmptransfostop) |
1416 | 1d833b97 | Florent Chuffart | # print(roi) |
1417 | 1d833b97 | Florent Chuffart | tmptransfostop = tmptransfostop[tmptransfostop$V1==chro_1 & tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,] |
1418 | 1d833b97 | Florent Chuffart | if (length(tmptransfostop$V8) != 1) { |
1419 | 1d833b97 | Florent Chuffart | # return(NULL) |
1420 | 1d833b97 | Florent Chuffart | print(tmptransfostart) |
1421 | 1d833b97 | Florent Chuffart | print(tmptransfostop) |
1422 | 1d833b97 | Florent Chuffart | stop("Never happend condition 4.") |
1423 | 1d833b97 | Florent Chuffart | } |
1424 | 1d833b97 | Florent Chuffart | } |
1425 | 1d833b97 | Florent Chuffart | if (tmptransfostart$V7 != tmptransfostop$V7) { |
1426 | 1d833b97 | Florent Chuffart | print(tmptransfostart) |
1427 | 1d833b97 | Florent Chuffart | print(tmptransfostop) |
1428 | 1d833b97 | Florent Chuffart | stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.") |
1429 | 1d833b97 | Florent Chuffart | } |
1430 | 1d833b97 | Florent Chuffart | } |
1431 | 1d833b97 | Florent Chuffart | # Deal with strand |
1432 | 1d833b97 | Florent Chuffart | if (tmptransfostart$V8 == 1) { |
1433 | 1d833b97 | Florent Chuffart | begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2) |
1434 | 1d833b97 | Florent Chuffart | end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2) |
1435 | 1d833b97 | Florent Chuffart | } else { |
1436 | 1d833b97 | Florent Chuffart | begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2) |
1437 | 1d833b97 | Florent Chuffart | end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2) |
1438 | 1d833b97 | Florent Chuffart | } |
1439 | 1d833b97 | Florent Chuffart | # Build returned roi |
1440 | 1d833b97 | Florent Chuffart | roi$strain_ref = strain2 |
1441 | 1d833b97 | Florent Chuffart | if (roi$strain_ref == "BY") { |
1442 | 1d833b97 | Florent Chuffart | roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]] |
1443 | 1d833b97 | Florent Chuffart | } else { |
1444 | 1d833b97 | Florent Chuffart | roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]] |
1445 | 1d833b97 | Florent Chuffart | } |
1446 | 1d833b97 | Florent Chuffart | roi$begin = begin_2 |
1447 | 1d833b97 | Florent Chuffart | roi$end = end_2 |
1448 | 1d833b97 | Florent Chuffart | if (sign(roi$end - roi$begin) == 0) { |
1449 | 1d833b97 | Florent Chuffart | roi$length = 1 |
1450 | 1d833b97 | Florent Chuffart | } else { |
1451 | 1d833b97 | Florent Chuffart | roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1 |
1452 | 1d833b97 | Florent Chuffart | } |
1453 | 1d833b97 | Florent Chuffart | return(roi) |
1454 | 1d833b97 | Florent Chuffart | }, ex=function(){ |
1455 | 1d833b97 | Florent Chuffart | # Define new translate_roi function... |
1456 | 1d833b97 | Florent Chuffart | translate_roi = function(roi, strain2, config) { |
1457 | 1d833b97 | Florent Chuffart | strain1 = roi$strain_ref |
1458 | 1d833b97 | Florent Chuffart | if (strain1 == strain2) { |
1459 | 1d833b97 | Florent Chuffart | return(roi) |
1460 | 1d833b97 | Florent Chuffart | } else { |
1461 | 1d833b97 | Florent Chuffart | stop("Here is my new translate_roi function...") |
1462 | 1d833b97 | Florent Chuffart | } |
1463 | 1d833b97 | Florent Chuffart | } |
1464 | 1d833b97 | Florent Chuffart | # Binding it by uncomment follwing lines. |
1465 | 1d833b97 | Florent Chuffart | # unlockBinding("translate_roi", as.environment("package:nm")) |
1466 | 1d833b97 | Florent Chuffart | # unlockBinding("translate_roi", getNamespace("nm")) |
1467 | 1d833b97 | Florent Chuffart | # assign("translate_roi", translate_roi, "package:nm") |
1468 | 1d833b97 | Florent Chuffart | # assign("translate_roi", translate_roi, getNamespace("nm")) |
1469 | 1d833b97 | Florent Chuffart | # lockBinding("translate_roi", getNamespace("nm")) |
1470 | 1d833b97 | Florent Chuffart | # lockBinding("translate_roi", as.environment("package:nm")) |
1471 | 1d833b97 | Florent Chuffart | }) |
1472 | 1d833b97 | Florent Chuffart | |
1473 | 1d833b97 | Florent Chuffart | |
1474 | 1d833b97 | Florent Chuffart | |
1475 | 1d833b97 | Florent Chuffart | |
1476 | 1d833b97 | Florent Chuffart | |
1477 | 1d833b97 | Florent Chuffart | |
1478 | 1d833b97 | Florent Chuffart | |
1479 | 1d833b97 | Florent Chuffart | compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR) |
1480 | 1d833b97 | Florent Chuffart | ### CURs are regions that can be aligned between the genomes |
1481 | 1d833b97 | Florent Chuffart | diff_allowed = 10, ##<< the maximum indel width allowe din a CUR |
1482 | 1d833b97 | Florent Chuffart | min_cur_width = 200, ##<< The minimum width of a CUR |
1483 | 8e9facd8 | Florent Chuffart | config = NULL, ##<< GLOBAL config variable |
1484 | 1d833b97 | Florent Chuffart | plot = FALSE ##<< Plot CURs or not |
1485 | 1d833b97 | Florent Chuffart | ) { |
1486 | 1d833b97 | Florent Chuffart | get_inter_strain_rois = function(strain1, strain2, diff_allowed = 10, min_cur_width = 200, plot=FALSE) { |
1487 | 8e9facd8 | Florent Chuffart | c2c = get_content(config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]], "table", stringsAsFactors=FALSE) |
1488 | 1d833b97 | Florent Chuffart | # Filtering unagapped |
1489 | 1d833b97 | Florent Chuffart | c2c = c2c[c2c$V6=="-",] |
1490 | 1d833b97 | Florent Chuffart | # filtering some things (chr...) |
1491 | 1d833b97 | Florent Chuffart | # c2c = c2c[c2c$V1 == "chrIV",] |
1492 | 1d833b97 | Florent Chuffart | diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)] |
1493 | 1d833b97 | Florent Chuffart | diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)] |
1494 | 1d833b97 | Florent Chuffart | # Plot diffs to define a threshold (diff_allowed) |
1495 | 1d833b97 | Florent Chuffart | # hist(abs(c(diff2, diff)),breaks=c(0:2000, 200000000000), xlim=c(0,100)) |
1496 | 1d833b97 | Florent Chuffart | # Filtering |
1497 | 1d833b97 | Florent Chuffart | indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed) |
1498 | 1d833b97 | Florent Chuffart | indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1)) |
1499 | 1d833b97 | Florent Chuffart | |
1500 | 1d833b97 | Florent Chuffart | rois = NULL |
1501 | 1d833b97 | Florent Chuffart | for(i in 1:length(indexes_start)) { |
1502 | 1d833b97 | Florent Chuffart | start = indexes_start[i] |
1503 | 1d833b97 | Florent Chuffart | stop = indexes_stop[i] |
1504 | 1d833b97 | Florent Chuffart | sub_c2c = c2c[start:stop,] |
1505 | 1d833b97 | Florent Chuffart | if (strain1 == "BY") { |
1506 | 1d833b97 | Florent Chuffart | chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]] |
1507 | 1d833b97 | Florent Chuffart | } else { |
1508 | 1d833b97 | Florent Chuffart | chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]] |
1509 | 1d833b97 | Florent Chuffart | } |
1510 | 1d833b97 | Florent Chuffart | roi = list(chr=chr, begin=sub_c2c[1,]$V2, end=sub_c2c[length(sub_c2c$V1),]$V3, strain_ref=strain1) |
1511 | 1d833b97 | Florent Chuffart | roi[["length"]] = roi$end - roi$begin |
1512 | 1d833b97 | Florent Chuffart | if (roi$length >= min_cur_width) { |
1513 | 1d833b97 | Florent Chuffart | rois = dfadd(rois,roi) |
1514 | 1d833b97 | Florent Chuffart | } |
1515 | 1d833b97 | Florent Chuffart | if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) { |
1516 | 1d833b97 | Florent Chuffart | print("*************** ERROR, non homogenous region! ********************") |
1517 | 1d833b97 | Florent Chuffart | } |
1518 | 1d833b97 | Florent Chuffart | # print(i) |
1519 | 1d833b97 | Florent Chuffart | # print(roi) |
1520 | 1d833b97 | Florent Chuffart | # print(sub_c2c) |
1521 | 1d833b97 | Florent Chuffart | # print("________________________________________________________________") |
1522 | 1d833b97 | Florent Chuffart | } |
1523 | 1d833b97 | Florent Chuffart | if (plot) { |
1524 | 1d833b97 | Florent Chuffart | print(paste(length(indexes_stop), "area of interest.")) |
1525 | 1d833b97 | Florent Chuffart | # Plot rois |
1526 | 8e9facd8 | Florent Chuffart | genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[strain1]], "fasta") |
1527 | 1d833b97 | Florent Chuffart | plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))})))) |
1528 | 1d833b97 | Florent Chuffart | for (name in names(genome)) { |
1529 | 1d833b97 | Florent Chuffart | if (strain1 == "BY") { |
1530 | 1d833b97 | Florent Chuffart | chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[[strain1]][[name]]]], sep="") |
1531 | 1d833b97 | Florent Chuffart | } else { |
1532 | 1d833b97 | Florent Chuffart | chr_ref = name |
1533 | 1d833b97 | Florent Chuffart | } |
1534 | 1d833b97 | Florent Chuffart | y_lev = as.integer(config$FASTA_INDEXES[[strain1]][[name]]) |
1535 | 1d833b97 | Florent Chuffart | lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev)) |
1536 | 1d833b97 | Florent Chuffart | text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref) |
1537 | 1d833b97 | Florent Chuffart | } |
1538 | 1d833b97 | Florent Chuffart | col=1 |
1539 | 1d833b97 | Florent Chuffart | for (roi_index in 1:length(rois$chr)) { |
1540 | 1d833b97 | Florent Chuffart | roi = rois[roi_index,] |
1541 | 1d833b97 | Florent Chuffart | y_lev = as.integer(roi$chr) + 0.3 |
1542 | 1d833b97 | Florent Chuffart | lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col) |
1543 | 1d833b97 | Florent Chuffart | text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index) |
1544 | 1d833b97 | Florent Chuffart | col = col + 1 |
1545 | 1d833b97 | Florent Chuffart | } |
1546 | 1d833b97 | Florent Chuffart | } |
1547 | 1d833b97 | Florent Chuffart | return(rois) |
1548 | 1d833b97 | Florent Chuffart | } |
1549 | 1d833b97 | Florent Chuffart | rois = NULL |
1550 | 1d833b97 | Florent Chuffart | rois_BY_RM = get_inter_strain_rois("BY", "RM", min_cur_width = min_cur_width, diff_allowed = diff_allowed) |
1551 | 1d833b97 | Florent Chuffart | rois_BY_YJM = get_inter_strain_rois("BY", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed) |
1552 | 1d833b97 | Florent Chuffart | for (roi_1_index in 1:length(rois_BY_RM[,1])) { |
1553 | 1d833b97 | Florent Chuffart | roi_1 = rois_BY_RM[roi_1_index,] |
1554 | 1d833b97 | Florent Chuffart | roi_2_candidates = rois_BY_YJM[rois_BY_YJM$chr== roi_1$chr & rois_BY_YJM$begin <= roi_1$end & rois_BY_YJM$end >= roi_1$begin , ] ; |
1555 | 1d833b97 | Florent Chuffart | # print(length(roi_2_candidates[,1])) |
1556 | 1d833b97 | Florent Chuffart | if (length(roi_2_candidates[,1]) > 0) { |
1557 | 1d833b97 | Florent Chuffart | for(roi_2_index in 1:length(roi_2_candidates[,1])) { |
1558 | 1d833b97 | Florent Chuffart | roi_2 = roi_2_candidates[roi_2_index,] |
1559 | 1d833b97 | Florent Chuffart | roi = list(chr=roi_1$chr, begin=max(roi_1$begin, roi_2$begin), end=min(roi_1$end, roi_2$end), strain_ref="BY") |
1560 | 1d833b97 | Florent Chuffart | roi[["length"]] = roi$end - roi$begin + 1 |
1561 | 1d833b97 | Florent Chuffart | if (roi$length >= min_cur_width) { |
1562 | 1d833b97 | Florent Chuffart | # if (length(rois[,1]) == 153) { |
1563 | 1d833b97 | Florent Chuffart | # print(paste(length(rois[,1]), roi_1_index, roi_2_index )) |
1564 | 1d833b97 | Florent Chuffart | # print(roi_1) |
1565 | 1d833b97 | Florent Chuffart | # print(roi_2) |
1566 | 1d833b97 | Florent Chuffart | # print(roi) |
1567 | 1d833b97 | Florent Chuffart | # } |
1568 | 1d833b97 | Florent Chuffart | rois = dfadd(rois,roi) |
1569 | 1d833b97 | Florent Chuffart | } |
1570 | 1d833b97 | Florent Chuffart | } |
1571 | 1d833b97 | Florent Chuffart | } |
1572 | 1d833b97 | Florent Chuffart | } |
1573 | 1d833b97 | Florent Chuffart | print(length(rois[,1])) |
1574 | 1d833b97 | Florent Chuffart | print(sum(rois$length)) |
1575 | 1d833b97 | Florent Chuffart | rois_1st_round = rois |
1576 | 1d833b97 | Florent Chuffart | rois_2nd_round = NULL |
1577 | 1d833b97 | Florent Chuffart | rois_RM_YJM = get_inter_strain_rois("RM", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed) |
1578 | 1d833b97 | Florent Chuffart | for (roi_1_index in 1:length(rois_1st_round[,1])) { |
1579 | 1d833b97 | Florent Chuffart | roi_1 = rois_1st_round[roi_1_index,] |
1580 | 8e9facd8 | Florent Chuffart | translated_roi_1 = translate_roi(roi_1, "RM", config = config) |
1581 | 1d833b97 | Florent Chuffart | t_b = min(translated_roi_1$begin, translated_roi_1$end) |
1582 | 1d833b97 | Florent Chuffart | t_e = max(translated_roi_1$begin, translated_roi_1$end) |
1583 | 1d833b97 | Florent Chuffart | roi_2_candidates = rois_RM_YJM[rois_RM_YJM$chr== translated_roi_1$chr & rois_RM_YJM$begin <= t_e & rois_RM_YJM$end >= t_b , ] ; |
1584 | 1d833b97 | Florent Chuffart | if (length(roi_2_candidates[,1]) > 0) { |
1585 | 1d833b97 | Florent Chuffart | for(roi_2_index in 1:length(roi_2_candidates[,1])) { |
1586 | 1d833b97 | Florent Chuffart | roi_2 = roi_2_candidates[roi_2_index,] |
1587 | 1d833b97 | Florent Chuffart | roi = list(chr=translated_roi_1$chr, begin=max(t_b, roi_2$begin), end=min(t_e, roi_2$end), strain_ref="RM") |
1588 | 1d833b97 | Florent Chuffart | roi[["length"]] = roi$end - roi$begin + 1 |
1589 | 1d833b97 | Florent Chuffart | if (roi$length >= min_cur_width) { |
1590 | 1d833b97 | Florent Chuffart | rois_2nd_round = dfadd(rois_2nd_round,roi) |
1591 | 1d833b97 | Florent Chuffart | } |
1592 | 1d833b97 | Florent Chuffart | } |
1593 | 1d833b97 | Florent Chuffart | } |
1594 | 1d833b97 | Florent Chuffart | } |
1595 | 1d833b97 | Florent Chuffart | print(length(rois_2nd_round[,1])) |
1596 | 1d833b97 | Florent Chuffart | print(sum(rois_2nd_round$length)) |
1597 | 1d833b97 | Florent Chuffart | rois = rois_2nd_round |
1598 | 1d833b97 | Florent Chuffart | |
1599 | 1d833b97 | Florent Chuffart | rois_translator_round = list() |
1600 | 1d833b97 | Florent Chuffart | for (roi_index in 1:length(rois[,1])) { |
1601 | 1d833b97 | Florent Chuffart | roi = rois[roi_index,] |
1602 | 8e9facd8 | Florent Chuffart | BY_roi = translate_roi(roi, "BY", config = config) |
1603 | 1d833b97 | Florent Chuffart | tmp_BY_roi = BY_roi |
1604 | 1d833b97 | Florent Chuffart | BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end) |
1605 | 1d833b97 | Florent Chuffart | BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end) |
1606 | 1d833b97 | Florent Chuffart | BY_roi$length = abs(BY_roi$length) |
1607 | 1d833b97 | Florent Chuffart | rois_translator_round = dfadd(rois_translator_round, BY_roi) |
1608 | 1d833b97 | Florent Chuffart | } |
1609 | 1d833b97 | Florent Chuffart | rois = rois_translator_round |
1610 | 1d833b97 | Florent Chuffart | |
1611 | 1d833b97 | Florent Chuffart | rois_3rd_round = NULL |
1612 | 1d833b97 | Florent Chuffart | for (roi_index in 1:length(rois[,1])) { |
1613 | 1d833b97 | Florent Chuffart | # for (roi_index in 1:2) { |
1614 | 1d833b97 | Florent Chuffart | current_roi = rois[roi_index,] |
1615 | 1d833b97 | Florent Chuffart | # print(roi_index) |
1616 | 1d833b97 | Florent Chuffart | |
1617 | 1d833b97 | Florent Chuffart | to_be_check_rois = dfadd(NULL, current_roi) |
1618 | 1d833b97 | Florent Chuffart | NEED_RERUN = TRUE |
1619 | 1d833b97 | Florent Chuffart | while (NEED_RERUN) { |
1620 | 1d833b97 | Florent Chuffart | # print("RERUN"), |
1621 | 1d833b97 | Florent Chuffart | NEED_RERUN = FALSE |
1622 | 1d833b97 | Florent Chuffart | to_be_check_again = NULL |
1623 | 1d833b97 | Florent Chuffart | for (to_be_check_roi_index in 1:length(to_be_check_rois[,1])) { |
1624 | 1d833b97 | Florent Chuffart | # print(to_be_check_rois) |
1625 | 1d833b97 | Florent Chuffart | to_be_check_roi = to_be_check_rois[to_be_check_roi_index,] |
1626 | 1d833b97 | Florent Chuffart | combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"), c("RM", "BY"), c("YJM", "BY"), c("YJM", "RM")) |
1627 | 1d833b97 | Florent Chuffart | for (combi in combis) { |
1628 | 1d833b97 | Florent Chuffart | # print(combi) |
1629 | 1d833b97 | Florent Chuffart | strain1 = combi[1] |
1630 | 1d833b97 | Florent Chuffart | strain2 = combi[2] |
1631 | 8e9facd8 | Florent Chuffart | trans_roi = translate_roi(to_be_check_roi, strain1, config = config) |
1632 | 1d833b97 | Florent Chuffart | lower_bound=min(trans_roi$begin, trans_roi$end) |
1633 | 1d833b97 | Florent Chuffart | upper_bound=max(trans_roi$begin, trans_roi$end) |
1634 | 1d833b97 | Florent Chuffart | |
1635 | 1d833b97 | Florent Chuffart | check_overlaping = structure(function(strain1 = "BY", strain2 = "RM", chr = NULL, lower_bound=NULL, upper_bound=NULL) { |
1636 | 1d833b97 | Florent Chuffart | reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM" |
1637 | 1d833b97 | Florent Chuffart | if (strain1 == strain2) { |
1638 | 1d833b97 | Florent Chuffart | roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1 |
1639 | 1d833b97 | Florent Chuffart | return(roi) |
1640 | 1d833b97 | Florent Chuffart | } |
1641 | 1d833b97 | Florent Chuffart | # Launch c2c file |
1642 | 1d833b97 | Florent Chuffart | if (reverse) { |
1643 | 1d833b97 | Florent Chuffart | c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]) |
1644 | 1d833b97 | Florent Chuffart | } else { |
1645 | 1d833b97 | Florent Chuffart | c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]) |
1646 | 1d833b97 | Florent Chuffart | } |
1647 | 1d833b97 | Florent Chuffart | c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE) |
1648 | 1d833b97 | Florent Chuffart | # filtering it |
1649 | 1d833b97 | Florent Chuffart | c2c = c2c[c2c$V6=="-",] |
1650 | 1d833b97 | Florent Chuffart | # Reverse |
1651 | 1d833b97 | Florent Chuffart | if (reverse) { |
1652 | 1d833b97 | Florent Chuffart | tmp_col = c2c$V1 |
1653 | 1d833b97 | Florent Chuffart | c2c$V1 = c2c$V7 |
1654 | 1d833b97 | Florent Chuffart | c2c$V7 = tmp_col |
1655 | 1d833b97 | Florent Chuffart | tmp_col = c2c$V2 |
1656 | 1d833b97 | Florent Chuffart | c2c$V2 = c2c$V9 |
1657 | 1d833b97 | Florent Chuffart | c2c$V9 = tmp_col |
1658 | 1d833b97 | Florent Chuffart | tmp_col = c2c$V3 |
1659 | 1d833b97 | Florent Chuffart | c2c$V3 = c2c$V10 |
1660 | 1d833b97 | Florent Chuffart | c2c$V10 = tmp_col |
1661 | 1d833b97 | Florent Chuffart | } |
1662 | 1d833b97 | Florent Chuffart | |
1663 | 1d833b97 | Florent Chuffart | if (strain1 == "BY") { |
1664 | 1d833b97 | Florent Chuffart | chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="") |
1665 | 1d833b97 | Florent Chuffart | } else if (strain1 == "RM") { |
1666 | 1d833b97 | Florent Chuffart | chro_1 = paste("supercontig_1.",chr,sep="") |
1667 | 1d833b97 | Florent Chuffart | } else if (strain1 == "YJM") { |
1668 | 1d833b97 | Florent Chuffart | chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]] |
1669 | 1d833b97 | Florent Chuffart | } |
1670 | 1d833b97 | Florent Chuffart | # print(chro_1) |
1671 | 1d833b97 | Florent Chuffart | if (!is.null(lower_bound) & !is.null(upper_bound)) { |
1672 | 1d833b97 | Florent Chuffart | if (reverse) { |
1673 | 1d833b97 | Florent Chuffart | tmp_c2c = c2c[c2c$V1==chro_1 & ((c2c$V3>=lower_bound & c2c$V2<=upper_bound & c2c$V8==1) | (c2c$V2>=lower_bound & c2c$V3<=upper_bound & c2c$V8==-1)),] |
1674 | 1d833b97 | Florent Chuffart | } else { |
1675 | 1d833b97 | Florent Chuffart | tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=lower_bound & c2c$V2<=upper_bound,] |
1676 | 1d833b97 | Florent Chuffart | } |
1677 | 1d833b97 | Florent Chuffart | } else { |
1678 | 1d833b97 | Florent Chuffart | tmp_c2c = c2c[c2c$V1 == chr,] |
1679 | 1d833b97 | Florent Chuffart | } |
1680 | 1d833b97 | Florent Chuffart | if (length(tmp_c2c[,1]) > 1) { |
1681 | 1d833b97 | Florent Chuffart | pbs = apply(t(1:(length(tmp_c2c[,1]) - 1)), 2, function(i){ |
1682 | 1d833b97 | Florent Chuffart | # print(paste(i, "/", length(tmp_c2c[,1]))) |
1683 | 1d833b97 | Florent Chuffart | apply(t((i+1):length(tmp_c2c[,1])), 2, function(j){ |
1684 | 1d833b97 | Florent Chuffart | l1 = tmp_c2c[i,] |
1685 | 1d833b97 | Florent Chuffart | b1 = min(l1$V2, l1$V3) |
1686 | 1d833b97 | Florent Chuffart | e1 = max(l1$V2, l1$V3) |
1687 | 1d833b97 | Florent Chuffart | l2 = tmp_c2c[j,] |
1688 | 1d833b97 | Florent Chuffart | b2 = min(l2$V2, l2$V3) |
1689 | 1d833b97 | Florent Chuffart | e2 = max(l2$V2, l2$V3) |
1690 | 1d833b97 | Florent Chuffart | if ((e1>=b2 & b1<=e2) | (e2>=b1 & b2<=e1)) { |
1691 | 1d833b97 | Florent Chuffart | print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ",chr, " [", b1, ",", e1, "] [", b2, ",", e2, "]", sep="")) |
1692 | 1d833b97 | Florent Chuffart | pb = list(strain1, strain2, chr, b1, e1, b2, e2) |
1693 | 1d833b97 | Florent Chuffart | pb |
1694 | 1d833b97 | Florent Chuffart | } else { |
1695 | 1d833b97 | Florent Chuffart | NULL |
1696 | 1d833b97 | Florent Chuffart | } |
1697 | 1d833b97 | Florent Chuffart | }) |
1698 | 1d833b97 | Florent Chuffart | }) |
1699 | 1d833b97 | Florent Chuffart | return(pbs) |
1700 | 1d833b97 | Florent Chuffart | } |
1701 | 1d833b97 | Florent Chuffart | |
1702 | 1d833b97 | Florent Chuffart | }, ex=function(){ |
1703 | 1d833b97 | Florent Chuffart | source("src/nucleo_miner/yeast_strain_conversion.R"); |
1704 | 1d833b97 | Florent Chuffart | pbs1 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=TRUE) |
1705 | 1d833b97 | Florent Chuffart | pbs3 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=TRUE) |
1706 | 1d833b97 | Florent Chuffart | pbs5 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=TRUE) |
1707 | 1d833b97 | Florent Chuffart | pbs2 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=FALSE) |
1708 | 1d833b97 | Florent Chuffart | pbs4 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=FALSE) |
1709 | 1d833b97 | Florent Chuffart | pbs6 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=FALSE) |
1710 | 1d833b97 | Florent Chuffart | }) |
1711 | 1d833b97 | Florent Chuffart | |
1712 | 1d833b97 | Florent Chuffart | |
1713 | 1d833b97 | Florent Chuffart | res = check_overlaping(strain1 = strain1, strain2 = strain2, chr = trans_roi$chr, lower_bound=lower_bound, upper_bound=upper_bound) |
1714 | 1d833b97 | Florent Chuffart | if (!is.null(res)) { |
1715 | 1d833b97 | Florent Chuffart | df_res = data.frame(matrix(unlist(res), ncol = 7, byrow=TRUE), stringsAsFactors=FALSE) |
1716 | 1d833b97 | Florent Chuffart | interval = df_res[1,] |
1717 | 1d833b97 | Florent Chuffart | inter_min = as.numeric(max( min(interval$X4, interval$X5), min(interval$X6, interval$X7))) |
1718 | 1d833b97 | Florent Chuffart | inter_max = as.numeric(min( max(interval$X4, interval$X5), max(interval$X6, interval$X7))) |
1719 | 1d833b97 | Florent Chuffart | # print(paste("SPLIT ROI", roi_index, "for", combi[1], combi[2])) |
1720 | 1d833b97 | Florent Chuffart | new_roi1 = trans_roi |
1721 | 1d833b97 | Florent Chuffart | new_roi2 = trans_roi |
1722 | 1d833b97 | Florent Chuffart | new_roi1$begin = lower_bound |
1723 | 1d833b97 | Florent Chuffart | new_roi1$end = inter_min - 1 |
1724 | 1d833b97 | Florent Chuffart | new_roi1$length = new_roi1$end - new_roi1$begin + 1 |
1725 | 1d833b97 | Florent Chuffart | new_roi2$begin = inter_max + 1 |
1726 | 1d833b97 | Florent Chuffart | new_roi2$end = upper_bound |
1727 | 1d833b97 | Florent Chuffart | new_roi2$length = new_roi2$end - new_roi2$begin + 1 |
1728 | 1d833b97 | Florent Chuffart | if (new_roi1$length > min_cur_width) { |
1729 | 8e9facd8 | Florent Chuffart | BY_roi = translate_roi(new_roi1, "BY", config = config) |
1730 | 1d833b97 | Florent Chuffart | tmp_BY_roi = BY_roi |
1731 | 1d833b97 | Florent Chuffart | BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end) |
1732 | 1d833b97 | Florent Chuffart | BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end) |
1733 | 1d833b97 | Florent Chuffart | BY_roi$length = abs(BY_roi$length) |
1734 | 1d833b97 | Florent Chuffart | to_be_check_again = dfadd(to_be_check_again, BY_roi) |
1735 | 1d833b97 | Florent Chuffart | } |
1736 | 1d833b97 | Florent Chuffart | if (new_roi2$length > min_cur_width) { |
1737 | 8e9facd8 | Florent Chuffart | BY_roi = translate_roi(new_roi2, "BY", config = config) |
1738 | 1d833b97 | Florent Chuffart | tmp_BY_roi = BY_roi |
1739 | 1d833b97 | Florent Chuffart | BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end) |
1740 | 1d833b97 | Florent Chuffart | BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end) |
1741 | 1d833b97 | Florent Chuffart | BY_roi$length = abs(BY_roi$length) |
1742 | 1d833b97 | Florent Chuffart | to_be_check_again = dfadd(to_be_check_again, BY_roi) |
1743 | 1d833b97 | Florent Chuffart | } |
1744 | 1d833b97 | Florent Chuffart | if (to_be_check_roi_index < length(to_be_check_rois[,1])) { |
1745 | 1d833b97 | Florent Chuffart | for (i in (to_be_check_roi_index + 1):length(to_be_check_rois[,1])) { |
1746 | 1d833b97 | Florent Chuffart | to_be_check_again = dfadd(to_be_check_again, to_be_check_rois[i,]) |
1747 | 1d833b97 | Florent Chuffart | } |
1748 | 1d833b97 | Florent Chuffart | } |
1749 | 1d833b97 | Florent Chuffart | NEED_RERUN = TRUE |
1750 | 1d833b97 | Florent Chuffart | break |
1751 | 1d833b97 | Florent Chuffart | } |
1752 | 1d833b97 | Florent Chuffart | } |
1753 | 1d833b97 | Florent Chuffart | if (NEED_RERUN) { |
1754 | 1d833b97 | Florent Chuffart | to_be_check_rois = to_be_check_again |
1755 | 1d833b97 | Florent Chuffart | break |
1756 | 1d833b97 | Florent Chuffart | } |
1757 | 1d833b97 | Florent Chuffart | } |
1758 | 1d833b97 | Florent Chuffart | } |
1759 | 1d833b97 | Florent Chuffart | |
1760 | 1d833b97 | Florent Chuffart | checked_rois = to_be_check_rois |
1761 | 1d833b97 | Florent Chuffart | for (checked_roi_index in 1:length(checked_rois[,1])) { |
1762 | 1d833b97 | Florent Chuffart | rois_3rd_round = dfadd(rois_3rd_round, checked_rois[checked_roi_index,]) |
1763 | 1d833b97 | Florent Chuffart | } |
1764 | 1d833b97 | Florent Chuffart | } |
1765 | 1d833b97 | Florent Chuffart | |
1766 | 729c934e | Florent Chuffart | |
1767 | 1d833b97 | Florent Chuffart | print(length(rois_3rd_round[,1])) |
1768 | 1d833b97 | Florent Chuffart | print(sum(rois_3rd_round$length)) |
1769 | 1d833b97 | Florent Chuffart | rois = rois_3rd_round |
1770 | 1d833b97 | Florent Chuffart | |
1771 | 1d833b97 | Florent Chuffart | |
1772 | 1d833b97 | Florent Chuffart | if (plot) { |
1773 | 1d833b97 | Florent Chuffart | print(paste(length(rois$chr), "area of interest.")) |
1774 | 1d833b97 | Florent Chuffart | # Plot rois |
1775 | 8e9facd8 | Florent Chuffart | genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[["BY"]], "fasta") |
1776 | 1d833b97 | Florent Chuffart | plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))})))) |
1777 | 1d833b97 | Florent Chuffart | for (name in names(genome)) { |
1778 | 1d833b97 | Florent Chuffart | if (TRUE) { |
1779 | 1d833b97 | Florent Chuffart | chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[["BY"]][[name]]]], sep="") |
1780 | 1d833b97 | Florent Chuffart | } else { |
1781 | 1d833b97 | Florent Chuffart | chr_ref = name |
1782 | 1d833b97 | Florent Chuffart | } |
1783 | 1d833b97 | Florent Chuffart | y_lev = as.integer(config$FASTA_INDEXES[["BY"]][[name]]) |
1784 | 1d833b97 | Florent Chuffart | lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev)) |
1785 | 1d833b97 | Florent Chuffart | text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref) |
1786 | 1d833b97 | Florent Chuffart | } |
1787 | 1d833b97 | Florent Chuffart | col=1 |
1788 | 1d833b97 | Florent Chuffart | for (roi_index in 1:length(rois$chr)) { |
1789 | 1d833b97 | Florent Chuffart | roi = rois[roi_index,] |
1790 | 1d833b97 | Florent Chuffart | y_lev = as.integer(roi$chr) + 0.3 |
1791 | 1d833b97 | Florent Chuffart | lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col) |
1792 | 1d833b97 | Florent Chuffart | text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index) |
1793 | 1d833b97 | Florent Chuffart | col = col + 1 |
1794 | 1d833b97 | Florent Chuffart | } |
1795 | 1d833b97 | Florent Chuffart | } |
1796 | 1d833b97 | Florent Chuffart | return (rois) |
1797 | 1d833b97 | Florent Chuffart | } |
1798 | 1d833b97 | Florent Chuffart | |
1799 | 1d833b97 | Florent Chuffart | |
1800 | 1d833b97 | Florent Chuffart | |
1801 | 1d833b97 | Florent Chuffart | |
1802 | 61914235 | Florent Chuffart | build_replicates = structure(function(# Stage replicates data |
1803 | 61914235 | Florent Chuffart | ### This function loads in memory data corresponding to the given experiments. |
1804 | 61914235 | Florent Chuffart | expe, ##<< a list of vector corresponding to vector of replicates. |
1805 | 61914235 | Florent Chuffart | roi, ##<< the region that we are interested in. |
1806 | 61914235 | Florent Chuffart | only_fetch=FALSE, ##<< filter or not inputs. |
1807 | 61914235 | Florent Chuffart | get_genome=FALSE,##<< Load or not corresponding genome. |
1808 | 61914235 | Florent Chuffart | all_samples, ##<< Global list of samples. |
1809 | 61914235 | Florent Chuffart | config=NULL ##<< GLOBAL config variable. |
1810 | 61914235 | Florent Chuffart | ) { |
1811 | 61914235 | Florent Chuffart | build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) { |
1812 | 61914235 | Florent Chuffart | samples=list() |
1813 | 61914235 | Florent Chuffart | for (i in samples_ids) { |
1814 | 61914235 | Florent Chuffart | sample = as.list(all_samples[all_samples$id==i,]) |
1815 | a0b91fee | Florent Chuffart | sample$orig_roi = roi |
1816 | 8e9facd8 | Florent Chuffart | sample$roi = translate_roi(roi, sample$strain, config = config) |
1817 | 61914235 | Florent Chuffart | if (get_genome) { |
1818 | 61914235 | Florent Chuffart | # Get Genome |
1819 | 61914235 | Florent Chuffart | fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]] |
1820 | 61914235 | Florent Chuffart | sample$roi$genome = get_content(fasta_ref_filename, "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end] |
1821 | 61914235 | Florent Chuffart | } |
1822 | 61914235 | Florent Chuffart | # Get inputs |
1823 | 61914235 | Florent Chuffart | sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="") |
1824 | 61914235 | Florent Chuffart | sample$inputs = get_content(sample_inputs_filename, "table", stringsAsFactors=FALSE) |
1825 | 61914235 | Florent Chuffart | sample$total_reads = sum(sample$inputs[,4]) |
1826 | 61914235 | Florent Chuffart | if (!only_fetch) { |
1827 | b8a95426 | Florent Chuffart | sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE) |
1828 | 61914235 | Florent Chuffart | } |
1829 | 61914235 | Florent Chuffart | # Get TF outputs for Mnase_Seq samples |
1830 | 61914235 | Florent Chuffart | if (sample$marker == "Mnase_Seq" & get_ouputs) { |
1831 | 61914235 | Florent Chuffart | sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="") |
1832 | 61914235 | Florent Chuffart | sample$outputs = get_content(sample_outputs_filename, "table", header=TRUE, sep="\t") |
1833 | 61914235 | Florent Chuffart | if (!only_fetch) { |
1834 | 61914235 | Florent Chuffart | sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300) |
1835 | 61914235 | Florent Chuffart | } |
1836 | 61914235 | Florent Chuffart | } |
1837 | 61914235 | Florent Chuffart | samples[[length(samples) + 1]] = sample |
1838 | 61914235 | Florent Chuffart | } |
1839 | 61914235 | Florent Chuffart | return(samples) |
1840 | 61914235 | Florent Chuffart | } |
1841 | 61914235 | Florent Chuffart | replicates = list() |
1842 | 61914235 | Florent Chuffart | for(samples_ids in expe) { |
1843 | 61914235 | Florent Chuffart | samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples) |
1844 | 61914235 | Florent Chuffart | replicates[[length(replicates) + 1]] = samples |
1845 | 61914235 | Florent Chuffart | } |
1846 | 61914235 | Florent Chuffart | return(replicates) |
1847 | 61914235 | Florent Chuffart | }, ex = function() { |
1848 | 61914235 | Florent Chuffart | # library(rjson) |
1849 | 61914235 | Florent Chuffart | # library(nucleominer) |
1850 | 61914235 | Florent Chuffart | # |
1851 | 61914235 | Florent Chuffart | # # Read config file |
1852 | 61914235 | Florent Chuffart | # json_conf_file = "nucleo_miner_config.json" |
1853 | 61914235 | Florent Chuffart | # config = fromJSON(paste(readLines(json_conf_file), collapse="")) |
1854 | 61914235 | Florent Chuffart | # # Read sample file |
1855 | 61914235 | Florent Chuffart | # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE) |
1856 | 61914235 | Florent Chuffart | # # here are the sample ids in a list |
1857 | 61914235 | Florent Chuffart | # expes = list(c(1)) |
1858 | 61914235 | Florent Chuffart | # # here is the region that we wnt to see the coverage |
1859 | 61914235 | Florent Chuffart | # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY") |
1860 | 61914235 | Florent Chuffart | # # it displays the corverage |
1861 | 61914235 | Florent Chuffart | # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config) |
1862 | 61914235 | Florent Chuffart | # out = watch_samples(replicates, config$READ_LENGTH, |
1863 | 61914235 | Florent Chuffart | # plot_coverage = TRUE, |
1864 | 61914235 | Florent Chuffart | # plot_squared_reads = FALSE, |
1865 | 61914235 | Florent Chuffart | # plot_ref_genome = FALSE, |
1866 | 61914235 | Florent Chuffart | # plot_arrow_raw_reads = FALSE, |
1867 | 61914235 | Florent Chuffart | # plot_arrow_nuc_reads = FALSE, |
1868 | 61914235 | Florent Chuffart | # plot_gaussian_reads = FALSE, |
1869 | 61914235 | Florent Chuffart | # plot_gaussian_unified_reads = FALSE, |
1870 | 61914235 | Florent Chuffart | # plot_ellipse_nucs = FALSE, |
1871 | 61914235 | Florent Chuffart | # plot_wp_nucs = FALSE, |
1872 | 61914235 | Florent Chuffart | # plot_wp_nuc_model = FALSE, |
1873 | 61914235 | Florent Chuffart | # plot_common_nucs = FALSE, |
1874 | 61914235 | Florent Chuffart | # height = 50) |
1875 | 61914235 | Florent Chuffart | }) |
1876 | 61914235 | Florent Chuffart | |
1877 | 61914235 | Florent Chuffart | |
1878 | 61914235 | Florent Chuffart | |
1879 | 61914235 | Florent Chuffart | |
1880 | 61914235 | Florent Chuffart | |
1881 | 61914235 | Florent Chuffart | |
1882 | 61914235 | Florent Chuffart | |
1883 | 61914235 | Florent Chuffart | |
1884 | 61914235 | Florent Chuffart | |
1885 | 61914235 | Florent Chuffart | |
1886 | 61914235 | Florent Chuffart | |
1887 | 61914235 | Florent Chuffart | |
1888 | 61914235 | Florent Chuffart | |
1889 | 61914235 | Florent Chuffart | |
1890 | 61914235 | Florent Chuffart | |
1891 | 61914235 | Florent Chuffart | |
1892 | 61914235 | Florent Chuffart | |
1893 | 61914235 | Florent Chuffart | |
1894 | 61914235 | Florent Chuffart | |
1895 | 61914235 | Florent Chuffart | |
1896 | 61914235 | Florent Chuffart | |
1897 | 61914235 | Florent Chuffart | |
1898 | 61914235 | Florent Chuffart | |
1899 | 61914235 | Florent Chuffart | |
1900 | 61914235 | Florent Chuffart | |
1901 | 61914235 | Florent Chuffart | |
1902 | 61914235 | Florent Chuffart | |
1903 | 61914235 | Florent Chuffart | |
1904 | 61914235 | Florent Chuffart | |
1905 | 61914235 | Florent Chuffart | |
1906 | 61914235 | Florent Chuffart | |
1907 | 61914235 | Florent Chuffart | |
1908 | 61914235 | Florent Chuffart | |
1909 | 1d833b97 | Florent Chuffart | |
1910 | 1d833b97 | Florent Chuffart | |
1911 | 1d833b97 | Florent Chuffart | |
1912 | 1d833b97 | Florent Chuffart | |
1913 | 1d833b97 | Florent Chuffart | |
1914 | 1d833b97 | Florent Chuffart | |
1915 | 1d833b97 | Florent Chuffart | |
1916 | 1d833b97 | Florent Chuffart | |
1917 | 1d833b97 | Florent Chuffart | |
1918 | 1d833b97 | Florent Chuffart | |
1919 | 1d833b97 | Florent Chuffart | |
1920 | 1d833b97 | Florent Chuffart | |
1921 | 1d833b97 | Florent Chuffart | |
1922 | 1d833b97 | Florent Chuffart | |
1923 | 1d833b97 | Florent Chuffart | |
1924 | 1d833b97 | Florent Chuffart | |
1925 | 1d833b97 | Florent Chuffart | |
1926 | 1d833b97 | Florent Chuffart | |
1927 | 1d833b97 | Florent Chuffart | |
1928 | 1d833b97 | Florent Chuffart | |
1929 | 1d833b97 | Florent Chuffart | |
1930 | 1d833b97 | Florent Chuffart | |
1931 | 1d833b97 | Florent Chuffart | |
1932 | 1d833b97 | Florent Chuffart | |
1933 | 1d833b97 | Florent Chuffart | |
1934 | 1d833b97 | Florent Chuffart | |
1935 | 1d833b97 | Florent Chuffart | |
1936 | 1d833b97 | Florent Chuffart | |
1937 | 1d833b97 | Florent Chuffart | |
1938 | 1d833b97 | Florent Chuffart | |
1939 | 1d833b97 | Florent Chuffart | |
1940 | 1d833b97 | Florent Chuffart | |
1941 | 1d833b97 | Florent Chuffart | |
1942 | 1d833b97 | Florent Chuffart | |
1943 | 6b5a528c | Florent Chuffart | |
1944 | 6b5a528c | Florent Chuffart | |
1945 | 6b5a528c | Florent Chuffart | |
1946 | 6b5a528c | Florent Chuffart | |
1947 | 6b5a528c | Florent Chuffart | |
1948 | 6b5a528c | Florent Chuffart | |
1949 | 6b5a528c | Florent Chuffart | |
1950 | 6b5a528c | Florent Chuffart | |
1951 | 6b5a528c | Florent Chuffart | |
1952 | 6b5a528c | Florent Chuffart | |
1953 | 6b5a528c | Florent Chuffart | |
1954 | 6b5a528c | Florent Chuffart | |
1955 | 6b5a528c | Florent Chuffart | |
1956 | 6b5a528c | Florent Chuffart | |
1957 | 6b5a528c | Florent Chuffart | |
1958 | 6b5a528c | Florent Chuffart | |
1959 | 6b5a528c | Florent Chuffart | |
1960 | 6b5a528c | Florent Chuffart | |
1961 | 6b5a528c | Florent Chuffart | |
1962 | 6b5a528c | Florent Chuffart | |
1963 | 6b5a528c | Florent Chuffart | |
1964 | 6b5a528c | Florent Chuffart | |
1965 | 6b5a528c | Florent Chuffart | |
1966 | 6b5a528c | Florent Chuffart | |
1967 | 6b5a528c | Florent Chuffart | |
1968 | 6b5a528c | Florent Chuffart | |
1969 | 6b5a528c | Florent Chuffart | |
1970 | 6b5a528c | Florent Chuffart | |
1971 | 6b5a528c | Florent Chuffart | |
1972 | 6b5a528c | Florent Chuffart | |
1973 | 6b5a528c | Florent Chuffart | |
1974 | 6b5a528c | Florent Chuffart | |
1975 | 6b5a528c | Florent Chuffart | |
1976 | 6b5a528c | Florent Chuffart | |
1977 | 6b5a528c | Florent Chuffart | perform_anovas = function(# Performaing ANOVAs |
1978 | 6b5a528c | Florent Chuffart | ### Counts reads and Performs ANOVAS for each common nucleosomes involved. |
1979 | 6b5a528c | Florent Chuffart | replicates, ##<< Set of replicates, each replicate is a list of samples (ideally 3). Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=..., end=..., chr=..., genome=...)}. In the \emph{perform_anovas} contexte, we need 4 replicates (4 * (3 samples)): 2 strains * (1 marker + 1 input (Mnase_Seq)). |
1980 | 6b5a528c | Florent Chuffart | aligned_inter_strain_nucs, ##<< List of common nucleosomes. |
1981 | 6b5a528c | Florent Chuffart | inputs_name="Mnase_Seq", ##<< Name of the input. |
1982 | 6b5a528c | Florent Chuffart | plot_anova_boxes=FALSE ##<< Plot (or not) boxplot for each nuc. |
1983 | 6b5a528c | Florent Chuffart | ) { |
1984 | 6b5a528c | Florent Chuffart | anova_results = NULL |
1985 | 6b5a528c | Florent Chuffart | for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) { |
1986 | 6b5a528c | Florent Chuffart | inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,] |
1987 | 6b5a528c | Florent Chuffart | # counting reads |
1988 | 6b5a528c | Florent Chuffart | my_data = NULL |
1989 | 6b5a528c | Florent Chuffart | for (replicate_rank in 1:length(replicates)) { |
1990 | 6b5a528c | Florent Chuffart | samples = replicates[[replicate_rank]] |
1991 | 6b5a528c | Florent Chuffart | strain = samples[[1]]$strain |
1992 | 6b5a528c | Florent Chuffart | marker = samples[[1]]$marker |
1993 | 6b5a528c | Florent Chuffart | for (sample_rank in 1:length(samples)) { |
1994 | 6b5a528c | Florent Chuffart | sample = samples[[sample_rank]] |
1995 | 6b5a528c | Florent Chuffart | nuc_width = as.integer(inter_strain_nuc[[paste("upper_bound_",strain,sep="")]]) - inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] |
1996 | 6b5a528c | Florent Chuffart | nuc_reads = filter_tf_inputs(sample$inputs, inter_strain_nuc[[paste("chr_",strain,sep="")]], inter_strain_nuc[[paste("lower_bound_",strain,sep="")]], inter_strain_nuc[[paste("upper_bound_",strain,sep="")]], nuc_width - 1) |
1997 | 6b5a528c | Florent Chuffart | indic = sum(nuc_reads[,4]) * 1000000/sample$total_reads / nuc_width |
1998 | 6b5a528c | Florent Chuffart | my_data = dfadd(my_data, list(strain = strain, marker = marker, indic = indic)) |
1999 | 6b5a528c | Florent Chuffart | } |
2000 | 6b5a528c | Florent Chuffart | } |
2001 | 6b5a528c | Florent Chuffart | my_data$strain = as.factor(my_data$strain) |
2002 | 6b5a528c | Florent Chuffart | my_data$marker = as.factor(my_data$marker) |
2003 | 6b5a528c | Florent Chuffart | |
2004 | 6b5a528c | Florent Chuffart | strain_ref1 = replicates[[1]][[1]]$strain |
2005 | 6b5a528c | Florent Chuffart | strain_ref2 = replicates[[2]][[1]]$strain |
2006 | 6b5a528c | Florent Chuffart | marker = replicates[[length(replicates)]][[1]]$marker |
2007 | 6b5a528c | Florent Chuffart | |
2008 | 6b5a528c | Florent Chuffart | # Collecting anova results |
2009 | 6b5a528c | Florent Chuffart | anova_result = list() |
2010 | 6b5a528c | Florent Chuffart | # nucs info |
2011 | 6b5a528c | Florent Chuffart | for (name in names(inter_strain_nuc)) { |
2012 | 6b5a528c | Florent Chuffart | anova_result[[name]] = inter_strain_nuc[[name]] |
2013 | 6b5a528c | Florent Chuffart | } |
2014 | 6b5a528c | Florent Chuffart | # |
2015 | 6b5a528c | Florent Chuffart | for (strain in c(strain_ref1, strain_ref2)) { |
2016 | 6b5a528c | Florent Chuffart | for (manip in c(inputs_name, marker)) { |
2017 | 6b5a528c | Florent Chuffart | replicat = 1 |
2018 | 6b5a528c | Florent Chuffart | for(indic in my_data[my_data$strain == strain & my_data$marker == manip,]$indic) { |
2019 | 6b5a528c | Florent Chuffart | anova_result[[paste(strain, manip, replicat, sep="_")]] = indic |
2020 | 6b5a528c | Florent Chuffart | replicat = replicat + 1 |
2021 | 6b5a528c | Florent Chuffart | } |
2022 | 6b5a528c | Florent Chuffart | } |
2023 | 6b5a528c | Florent Chuffart | } |
2024 | 6b5a528c | Florent Chuffart | |
2025 | 6b5a528c | Florent Chuffart | # Compute ANOVAs |
2026 | 6b5a528c | Florent Chuffart | mnase_data = my_data[my_data$marker == inputs_name,] |
2027 | 6b5a528c | Florent Chuffart | mnase_aov = aov(indic~strain,mnase_data) |
2028 | 6b5a528c | Florent Chuffart | mnase_effects = model.tables(mnase_aov)$tables |
2029 | 6b5a528c | Florent Chuffart | mnase_pvalues = summary(mnase_aov)[[1]]$Pr |
2030 | 6b5a528c | Florent Chuffart | # boxplot(indic~ma*st,mnase_data) |
2031 | 6b5a528c | Florent Chuffart | anova_result[["mnase_st"]] = mnase_effects$strain[1] |
2032 | 6b5a528c | Florent Chuffart | anova_result[["mnase_st_pvalue"]] = mnase_pvalues[1] |
2033 | 6b5a528c | Florent Chuffart | # |
2034 | 6b5a528c | Florent Chuffart | marker_data = my_data[my_data$marker == marker,] |
2035 | 6b5a528c | Florent Chuffart | marker_aov = aov(indic~strain,marker_data) |
2036 | 6b5a528c | Florent Chuffart | marker_effects = model.tables(marker_aov)$tables |
2037 | 6b5a528c | Florent Chuffart | marker_pvalues = summary(marker_aov)[[1]]$Pr |
2038 | 6b5a528c | Florent Chuffart | # boxplot(indic~ma*st,marker_data) |
2039 | 6b5a528c | Florent Chuffart | anova_result[["marker_st"]] = marker_effects$strain[1] |
2040 | 6b5a528c | Florent Chuffart | anova_result[["marker_st_pvalue"]] = marker_pvalues[1] |
2041 | 6b5a528c | Florent Chuffart | # |
2042 | 6b5a528c | Florent Chuffart | st1_data = my_data[my_data$strain == strain_ref1,] |
2043 | 6b5a528c | Florent Chuffart | st1_aov = aov(indic~marker,st1_data) |
2044 | 6b5a528c | Florent Chuffart | st1_effects = model.tables(st1_aov)$tables |
2045 | 6b5a528c | Florent Chuffart | st1_pvalues = summary(st1_aov)[[1]]$Pr |
2046 | 6b5a528c | Florent Chuffart | # boxplot(indic~ma*st,st1_data) |
2047 | 6b5a528c | Florent Chuffart | anova_result[["st1_ma"]]=st1_effects$ma[inputs_name] |
2048 | 6b5a528c | Florent Chuffart | anova_result[["st1_ma_pvalue"]]=st1_pvalues[1] |
2049 | 6b5a528c | Florent Chuffart | # |
2050 | 6b5a528c | Florent Chuffart | st2_data = my_data[my_data$strain == strain_ref2,] |
2051 | 6b5a528c | Florent Chuffart | st2_aov = aov(indic~marker,st2_data) |
2052 | 6b5a528c | Florent Chuffart | st2_effects = model.tables(st2_aov)$tables |
2053 | 6b5a528c | Florent Chuffart | st2_pvalues = summary(st2_aov)[[1]]$Pr |
2054 | 6b5a528c | Florent Chuffart | # boxplot(indic~ma*st,st2_data) |
2055 | 6b5a528c | Florent Chuffart | anova_result[["st2_ma"]]=st2_effects$ma[inputs_name] |
2056 | 6b5a528c | Florent Chuffart | anova_result[["st2_ma_pvalue"]]=st2_pvalues[1] |
2057 | 6b5a528c | Florent Chuffart | # |
2058 | 6b5a528c | Florent Chuffart | correl_data = my_data |
2059 | 6b5a528c | Florent Chuffart | correl_aov = aov(indic~strain*marker,correl_data) |
2060 | 6b5a528c | Florent Chuffart | correl_effects = model.tables(correl_aov)$tables |
2061 | 6b5a528c | Florent Chuffart | correl_pvalues = summary(correl_aov)[[1]]$Pr |
2062 | 6b5a528c | Florent Chuffart | if (plot_anova_boxes) { |
2063 | 6b5a528c | Florent Chuffart | x11() |
2064 | 6b5a528c | Florent Chuffart | boxplot(indic~marker*strain,correl_data) |
2065 | 6b5a528c | Florent Chuffart | } |
2066 | 6b5a528c | Florent Chuffart | anova_result[["correl_st"]]=correl_effects$strain[1] |
2067 | 6b5a528c | Florent Chuffart | anova_result[["correl_ma"]]=correl_effects$ma[inputs_name] |
2068 | 6b5a528c | Florent Chuffart | anova_result[["correl_st_ma"]]=correl_effects$"strain:marker"[1,1] |
2069 | 6b5a528c | Florent Chuffart | anova_result[["correl_st_pvalue"]]=correl_pvalues[1] |
2070 | 6b5a528c | Florent Chuffart | anova_result[["correl_ma_pvalue"]]=correl_pvalues[2] |
2071 | 6b5a528c | Florent Chuffart | anova_result[["correl_st_ma_pvalue"]]=correl_pvalues[3] |
2072 | 6b5a528c | Florent Chuffart | |
2073 | 6b5a528c | Florent Chuffart | anova_results = dfadd(anova_results, anova_result) |
2074 | 6b5a528c | Florent Chuffart | } |
2075 | 6b5a528c | Florent Chuffart | return(anova_results) |
2076 | 6b5a528c | Florent Chuffart | ### Returns ANOVA results and comunted reads. |
2077 | 6b5a528c | Florent Chuffart | } |
2078 | 6b5a528c | Florent Chuffart | |
2079 | 6b5a528c | Florent Chuffart | |
2080 | 6b5a528c | Florent Chuffart | watch_samples = function(# Watching analysis of samples |
2081 | 6b5a528c | Florent Chuffart | ### This function allows to view analysis for a particuler region of the genome. |
2082 | 6b5a528c | Florent Chuffart | replicates, ##<< replicates under the form... |
2083 | 6b5a528c | Florent Chuffart | read_length, ##<< length of the reads |
2084 | 6b5a528c | Florent Chuffart | plot_ref_genome = TRUE, ##<< Plot (or not) reference genome. |
2085 | 6b5a528c | Florent Chuffart | plot_arrow_raw_reads = TRUE, ##<< Plot (or not) arrows for raw reads. |
2086 | 6b5a528c | Florent Chuffart | plot_arrow_nuc_reads = TRUE, ##<< Plot (or not) arrows for reads aasiocied to a nucleosome. |
2087 | 6b5a528c | Florent Chuffart | plot_squared_reads = TRUE, ##<< Plot (or not) reads in the square fashion. |
2088 | 6b5a528c | Florent Chuffart | plot_coverage = FALSE, ##<< Plot (or not) reads in the covergae fashion. fashion. |
2089 | 6b5a528c | Florent Chuffart | plot_gaussian_reads = TRUE, ##<< Plot (or not) gaussian model of a F anf R reads. |
2090 | 6b5a528c | Florent Chuffart | plot_gaussian_unified_reads = TRUE, ##<< Plot (or not) gaussian model of a nuc. |
2091 | 6b5a528c | Florent Chuffart | plot_ellipse_nucs = TRUE, ##<< Plot (or not) ellipse for a nuc. |
2092 | 7646593d | Florent Chuffart | change_col = TRUE, ##<< Change the color of each nucleosome. |
2093 | 6b5a528c | Florent Chuffart | plot_wp_nucs = TRUE, ##<< Plot (or not) cluster of nucs |
2094 | 6b5a528c | Florent Chuffart | plot_wp_nuc_model = TRUE, ##<< Plot (or not) gaussian model for a cluster of nucs |
2095 | 6b5a528c | Florent Chuffart | plot_common_nucs = TRUE, ##<< Plot (or not) aligned reads. |
2096 | 6b5a528c | Florent Chuffart | plot_anovas = FALSE, ##<< Plot (or not) scatter for each nuc. |
2097 | 6b5a528c | Florent Chuffart | plot_anova_boxes = FALSE, ##<< Plot (or not) boxplot for each nuc. |
2098 | 6b5a528c | Florent Chuffart | plot_wp_nucs_4_nonmnase = FALSE, ##<< Plot (or not) clusters for non inputs samples. |
2099 | 729c934e | Florent Chuffart | plot_chain = FALSE, ##<< Plot (or not) clusterised nuceosomes between mnase samples. |
2100 | 6b5a528c | Florent Chuffart | aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed. |
2101 | 6b5a528c | Florent Chuffart | aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed. |
2102 | 1d833b97 | Florent Chuffart | height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis. |
2103 | 1d833b97 | Florent Chuffart | config=NULL ##<< GLOBAL config variable |
2104 | 6b5a528c | Florent Chuffart | ){ |
2105 | 6b5a528c | Florent Chuffart | returned_list = list() |
2106 | 6b5a528c | Florent Chuffart | # Computing global display parameters |
2107 | 6b5a528c | Florent Chuffart | if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) { |
2108 | 6b5a528c | Florent Chuffart | x_min_glo = replicates[[1]][[1]]$roi[["begin"]] |
2109 | 6b5a528c | Florent Chuffart | x_max_glo = replicates[[1]][[1]]$roi[["end"]] |
2110 | 6b5a528c | Florent Chuffart | } else { |
2111 | 6b5a528c | Florent Chuffart | x_min_glo = - replicates[[1]][[1]]$roi[["begin"]] |
2112 | 6b5a528c | Florent Chuffart | x_max_glo = - replicates[[1]][[1]]$roi[["end"]] |
2113 | 6b5a528c | Florent Chuffart | } |
2114 | 6b5a528c | Florent Chuffart | base_glo = 0 |
2115 | 6b5a528c | Florent Chuffart | nb_rank_glo = 0 |
2116 | 6b5a528c | Florent Chuffart | for (samples in replicates) { |
2117 | 6b5a528c | Florent Chuffart | nb_rank_glo = nb_rank_glo + length(samples) |
2118 | 6b5a528c | Florent Chuffart | } |
2119 | 6b5a528c | Florent Chuffart | ylim_glo = c(base_glo, base_glo + height * nb_rank_glo) |
2120 | 6b5a528c | Florent Chuffart | y_min_glo = min(ylim_glo) |
2121 | 6b5a528c | Florent Chuffart | y_max_glo = max(ylim_glo) |
2122 | 6b5a528c | Florent Chuffart | delta_y_glo = y_max_glo - y_min_glo |
2123 | 6b5a528c | Florent Chuffart | # Plot main frame |
2124 | 935a568c | Florent Chuffart | plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab="#reads (per million reads)", xlab=paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr), main="NucleoMiner2" ) |
2125 | 6b5a528c | Florent Chuffart | axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2)) |
2126 | 6b5a528c | Florent Chuffart | # Go |
2127 | 6b5a528c | Florent Chuffart | replicates_wp_nucs = list() |
2128 | 6b5a528c | Florent Chuffart | for (replicate_rank in 1:length(replicates)) { |
2129 | 6b5a528c | Florent Chuffart | # Computing replicate parameters |
2130 | 6b5a528c | Florent Chuffart | nb_rank = length(samples) |
2131 | 6b5a528c | Florent Chuffart | base = (replicate_rank-1) * height * nb_rank |
2132 | 6b5a528c | Florent Chuffart | ylim = c(base, base + height * nb_rank) |
2133 | 6b5a528c | Florent Chuffart | y_min = min(ylim) |
2134 | 6b5a528c | Florent Chuffart | y_max = max(ylim) |
2135 | 6b5a528c | Florent Chuffart | delta_y = y_max - y_min |
2136 | 6b5a528c | Florent Chuffart | samples = replicates[[replicate_rank]] |
2137 | 6b5a528c | Florent Chuffart | for (sample_rank in 1:length(samples)) { |
2138 | 6b5a528c | Florent Chuffart | # computing sample parameters |
2139 | 6b5a528c | Florent Chuffart | sample = samples[[sample_rank]] |
2140 | 6b5a528c | Florent Chuffart | y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank |
2141 | 6b5a528c | Florent Chuffart | text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep="")) |
2142 | 6b5a528c | Florent Chuffart | |
2143 | 6b5a528c | Florent Chuffart | if (sample$roi[["begin"]] < sample$roi[["end"]]) { |
2144 | 6b5a528c | Florent Chuffart | x_min = sample$roi[["begin"]] |
2145 | 6b5a528c | Florent Chuffart | x_max = sample$roi[["end"]] |
2146 | 6b5a528c | Florent Chuffart | } else { |
2147 | 6b5a528c | Florent Chuffart | x_min = - sample$roi[["begin"]] |
2148 | 6b5a528c | Florent Chuffart | x_max = - sample$roi[["end"]] |
2149 | 6b5a528c | Florent Chuffart | } |
2150 | 6b5a528c | Florent Chuffart | shift = x_min_glo - x_min |
2151 | 6b5a528c | Florent Chuffart | # Plot Genome seq |
2152 | 6b5a528c | Florent Chuffart | if (plot_ref_genome) { |
2153 | 6b5a528c | Florent Chuffart | text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier") |
2154 | 6b5a528c | Florent Chuffart | } |
2155 | 6b5a528c | Florent Chuffart | # Plot reads |
2156 | 6b5a528c | Florent Chuffart | reads = sample$inputs |
2157 | 6b5a528c | Florent Chuffart | signs = sign_from_strand(reads[,3]) |
2158 | 6b5a528c | Florent Chuffart | if (plot_arrow_raw_reads) { |
2159 | 6b5a528c | Florent Chuffart | arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, |
2160 | 6b5a528c | Florent Chuffart | col=1, length=0.15/nb_rank) |
2161 | 6b5a528c | Florent Chuffart | } |
2162 | 6b5a528c | Florent Chuffart | if (plot_squared_reads) { |
2163 | 6b5a528c | Florent Chuffart | # require(plotrix) |
2164 | 6b5a528c | Florent Chuffart | rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift, y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0) |
2165 | 6b5a528c | Florent Chuffart | } |
2166 | 6b5a528c | Florent Chuffart | if (plot_coverage) { |
2167 | a52f85d7 | Florent Chuffart | if (length(reads[,1]) != 0) { |
2168 | a52f85d7 | Florent Chuffart | step_h = sign(x_min) * signs * reads[,4] |
2169 | a52f85d7 | Florent Chuffart | step_b = sign(x_min) * reads[,2] + shift |
2170 | a52f85d7 | Florent Chuffart | step_e = sign(x_min) * (reads[,2] + signs * 150) + shift |
2171 | a52f85d7 | Florent Chuffart | steps_x = min(step_b, step_e):max(step_b, step_e) |
2172 | a52f85d7 | Florent Chuffart | steps_y = rep(0, length(steps_x)) |
2173 | a52f85d7 | Florent Chuffart | for (i in 1:length(step_h)) { |
2174 | a52f85d7 | Florent Chuffart | steps_y[which(steps_x==min(step_b[i], step_e[i]))] = steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i]) |
2175 | a52f85d7 | Florent Chuffart | steps_y[which(steps_x==max(step_b[i], step_e[i]))] = steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i]) |
2176 | a52f85d7 | Florent Chuffart | } |
2177 | a52f85d7 | Florent Chuffart | tmp_index = which(steps_y != 0) |
2178 | a52f85d7 | Florent Chuffart | steps_x = steps_x[tmp_index] |
2179 | a52f85d7 | Florent Chuffart | steps_y = steps_y[tmp_index] |
2180 | a52f85d7 | Florent Chuffart | tmp_current_level = 0 |
2181 | a52f85d7 | Florent Chuffart | for (i in 1:length(steps_y)) { |
2182 | a52f85d7 | Florent Chuffart | steps_y[i] = tmp_current_level + steps_y[i] |
2183 | a52f85d7 | Florent Chuffart | tmp_current_level = steps_y[i] |
2184 | a52f85d7 | Florent Chuffart | } |
2185 | a52f85d7 | Florent Chuffart | steps_y = c(0, steps_y) |
2186 | a52f85d7 | Florent Chuffart | steps_y = steps_y * 1000000/sample$total_reads |
2187 | a52f85d7 | Florent Chuffart | } else { |
2188 | a52f85d7 | Florent Chuffart | steps_y = c(0, 0, 0) |
2189 | 1e310114 | Florent Chuffart | steps_x = c(x_min, x_max) |
2190 | 6b5a528c | Florent Chuffart | } |
2191 | 0d1475ac | Florent Chuffart | # print(steps_x) |
2192 | 0d1475ac | Florent Chuffart | # print(steps_y) |
2193 | 6b5a528c | Florent Chuffart | lines(stepfun(steps_x, steps_y + y_lev), pch="") |
2194 | 6b5a528c | Florent Chuffart | abline(y_lev,0) |
2195 | 6b5a528c | Florent Chuffart | returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y) |
2196 | 6b5a528c | Florent Chuffart | } |
2197 | 6b5a528c | Florent Chuffart | # Plot nucs |
2198 | 6b5a528c | Florent Chuffart | if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) { |
2199 | 6b5a528c | Florent Chuffart | nucs = sample$outputs |
2200 | 6b5a528c | Florent Chuffart | if (length(nucs$center) > 0) { |
2201 | 6b5a528c | Florent Chuffart | col = 1 |
2202 | 6b5a528c | Florent Chuffart | for (i in 1:length(nucs$center)) { |
2203 | 7646593d | Florent Chuffart | if (change_col) { |
2204 | 7646593d | Florent Chuffart | col = col + 1 |
2205 | 7646593d | Florent Chuffart | } |
2206 | 6b5a528c | Florent Chuffart | nuc = nucs[i,] |
2207 | 6b5a528c | Florent Chuffart | involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width) |
2208 | 6b5a528c | Florent Chuffart | involved_signs = apply(t(involved_reads[,3]), 2, function(strand) { if (strand == "F") return(1) else return(-1)}) |
2209 | 6b5a528c | Florent Chuffart | total_involved_reads = sum(involved_reads[,4]) |
2210 | 6b5a528c | Florent Chuffart | if (plot_arrow_nuc_reads ) { |
2211 | 6b5a528c | Florent Chuffart | arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, |
2212 | 6b5a528c | Florent Chuffart | col=col, length=0.15/nb_rank) |
2213 | 6b5a528c | Florent Chuffart | } |
2214 | 6b5a528c | Florent Chuffart | if (plot_gaussian_reads | plot_gaussian_unified_reads) { |
2215 | 6b5a528c | Florent Chuffart | flatted_reads = flat_reads(involved_reads, nuc$width) |
2216 | 6b5a528c | Florent Chuffart | delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width) |
2217 | 6b5a528c | Florent Chuffart | } |
2218 | 6b5a528c | Florent Chuffart | if (plot_gaussian_reads ) { |
2219 | 6b5a528c | Florent Chuffart | flatted_reads = flat_reads(involved_reads, nuc$width) |
2220 | 6b5a528c | Florent Chuffart | delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width) |
2221 | 883439d6 | Florent Chuffart | lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col) |
2222 | 883439d6 | Florent Chuffart | lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col) |
2223 | 6b5a528c | Florent Chuffart | } |
2224 | 6b5a528c | Florent Chuffart | if (plot_gaussian_unified_reads ) { |
2225 | 883439d6 | Florent Chuffart | lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=2) |
2226 | 6b5a528c | Florent Chuffart | } |
2227 | 6b5a528c | Florent Chuffart | if (plot_ellipse_nucs) { |
2228 | 6b5a528c | Florent Chuffart | # require(plotrix) |
2229 | 883439d6 | Florent Chuffart | draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col) |
2230 | 6b5a528c | Florent Chuffart | } |
2231 | 6b5a528c | Florent Chuffart | } |
2232 | 6b5a528c | Florent Chuffart | } else { |
2233 | 6b5a528c | Florent Chuffart | print("WARNING! No nucs to print.") |
2234 | 6b5a528c | Florent Chuffart | } |
2235 | 6b5a528c | Florent Chuffart | } |
2236 | 6b5a528c | Florent Chuffart | } |
2237 | 729c934e | Florent Chuffart | |
2238 | 6b5a528c | Florent Chuffart | # Plot wp nucs |
2239 | 729c934e | Florent Chuffart | if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_common_nucs | plot_chain)) { |
2240 | 6b5a528c | Florent Chuffart | if (samples[[1]]$marker == "Mnase_Seq") { |
2241 | 7646593d | Florent Chuffart | if (is.null(aggregated_intra_strain_nucs)) { |
2242 | 6b5a528c | Florent Chuffart | wp_nucs = aggregate_intra_strain_nucs(samples)[[1]] |
2243 | 6b5a528c | Florent Chuffart | } else { |
2244 | 6b5a528c | Florent Chuffart | wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]] |
2245 | 6b5a528c | Florent Chuffart | } |
2246 | 6b5a528c | Florent Chuffart | } else { |
2247 | 6b5a528c | Florent Chuffart | wp_nucs = replicates_wp_nucs[[replicate_rank-2]] |
2248 | 6b5a528c | Florent Chuffart | } |
2249 | 729c934e | Florent Chuffart | if (plot_chain) { |
2250 | 729c934e | Florent Chuffart | tf_nucs = lapply(wp_nucs, function(nuc) { |
2251 | 729c934e | Florent Chuffart | bar = apply(t(nuc$nucs), 2, function(tmp_nuc){ |
2252 | 729c934e | Florent Chuffart | tmp_nuc = tmp_nuc[[1]] |
2253 | 729c934e | Florent Chuffart | tmp_nuc$inputs = NULL |
2254 | 729c934e | Florent Chuffart | tmp_nuc$original_reads = NULL |
2255 | 729c934e | Florent Chuffart | tmp_nuc$wp = nuc$wp |
2256 | 729c934e | Florent Chuffart | # print(tmp_nuc) |
2257 | 729c934e | Florent Chuffart | return(tmp_nuc) |
2258 | dc58ee6c | Florent Chuffart | }) |
2259 | dc58ee6c | Florent Chuffart | return(do.call(rbind, bar)) |
2260 | dc58ee6c | Florent Chuffart | }) |
2261 | 729c934e | Florent Chuffart | tf_nucs = data.frame(do.call(rbind, tf_nucs)) |
2262 | dc58ee6c | Florent Chuffart | tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2 |
2263 | dc58ee6c | Florent Chuffart | tmp_y = y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank |
2264 | dc58ee6c | Florent Chuffart | tmp_y_prev = tmp_y[-length(tmp_y)] |
2265 | dc58ee6c | Florent Chuffart | tmp_y_next = tmp_y[-1] |
2266 | dc58ee6c | Florent Chuffart | tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2 |
2267 | dc58ee6c | Florent Chuffart | tmp_track = unlist(tf_nucs$track) |
2268 | dc58ee6c | Florent Chuffart | tmp_track_prev = tmp_track[-length(tmp_track)] |
2269 | dc58ee6c | Florent Chuffart | tmp_track_next = tmp_track[-1] |
2270 | dc58ee6c | Florent Chuffart | tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25 |
2271 | dc58ee6c | Florent Chuffart | tmp_x_prev = tmp_x[-length(tmp_x)] |
2272 | dc58ee6c | Florent Chuffart | tmp_x_next = tmp_x[-1] |
2273 | dc58ee6c | Florent Chuffart | need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50}) |
2274 | dc58ee6c | Florent Chuffart | tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift |
2275 | dc58ee6c | Florent Chuffart | tmp_lod_inter =signif(unlist(tf_nucs$lod_score)[-1], 2) |
2276 | dc58ee6c | Florent Chuffart | new_tmp_x = c() |
2277 | dc58ee6c | Florent Chuffart | new_tmp_y = c() |
2278 | dc58ee6c | Florent Chuffart | index_odd = 1:length(tmp_x) * 2 - 1 |
2279 | dc58ee6c | Florent Chuffart | index_even = (1:(length(tmp_x) - 1)) * 2 |
2280 | dc58ee6c | Florent Chuffart | new_tmp_x[index_odd] = tmp_x |
2281 | dc58ee6c | Florent Chuffart | new_tmp_y[index_odd] = tmp_y |
2282 | dc58ee6c | Florent Chuffart | new_tmp_x[index_even] = tmp_x_inter |
2283 | dc58ee6c | Florent Chuffart | new_tmp_y[index_even] = tmp_y_inter |
2284 | dc58ee6c | Florent Chuffart | lines(new_tmp_x , new_tmp_y, lw=2) |
2285 | dc58ee6c | Florent Chuffart | points(tmp_x, tmp_y, cex=4, pch=16, col="white") |
2286 | dc58ee6c | Florent Chuffart | points(tmp_x, tmp_y, cex=4, lw=2) |
2287 | dc58ee6c | Florent Chuffart | text(tmp_x, tmp_y, 1:nrow(tf_nucs)) |
2288 | 7646593d | Florent Chuffart | text(tmp_x_inter, tmp_y_inter, tmp_lod_inter, srt=90, cex=0.9, bg = "yellow")#, col=(tmp_lod_inter < 20) + 2) |
2289 | 729c934e | Florent Chuffart | } |
2290 | 729c934e | Florent Chuffart | |
2291 | 729c934e | Florent Chuffart | if (plot_wp_nucs | plot_common_nucs ) { |
2292 | 729c934e | Florent Chuffart | replicates_wp_nucs[[replicate_rank]] = wp_nucs |
2293 | 729c934e | Florent Chuffart | for (wp_nuc in wp_nucs) { |
2294 | 729c934e | Florent Chuffart | if (wp_nuc$wp){ |
2295 | 729c934e | Florent Chuffart | rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1) |
2296 | 729c934e | Florent Chuffart | all_original_reads = c() |
2297 | 729c934e | Florent Chuffart | for(initial_nuc in wp_nuc$nucs) { |
2298 | 729c934e | Florent Chuffart | all_original_reads = c(all_original_reads, initial_nuc$original_reads) |
2299 | 729c934e | Florent Chuffart | } |
2300 | 729c934e | Florent Chuffart | delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound |
2301 | 729c934e | Florent Chuffart | if (FALSE) { |
2302 | 729c934e | Florent Chuffart | rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col="#EEEEEE", border=1) |
2303 | 729c934e | Florent Chuffart | } |
2304 | 729c934e | Florent Chuffart | if (plot_wp_nuc_model) { |
2305 | 729c934e | Florent Chuffart | lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1) |
2306 | 729c934e | Florent Chuffart | } |
2307 | 729c934e | Florent Chuffart | } |
2308 | 729c934e | Florent Chuffart | } |
2309 | 729c934e | Florent Chuffart | } |
2310 | 6b5a528c | Florent Chuffart | } |
2311 | 6b5a528c | Florent Chuffart | } |
2312 | 6b5a528c | Florent Chuffart | |
2313 | 6b5a528c | Florent Chuffart | if (plot_common_nucs | plot_anovas | plot_anova_boxes) { |
2314 | 6b5a528c | Florent Chuffart | if (is.null(aligned_inter_strain_nucs)) { |
2315 | 1d833b97 | Florent Chuffart | aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]] |
2316 | 6b5a528c | Florent Chuffart | } |
2317 | 6b5a528c | Florent Chuffart | if (plot_common_nucs) { |
2318 | 6b5a528c | Florent Chuffart | #Plot common wp nucs |
2319 | 6b5a528c | Florent Chuffart | mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list() |
2320 | 6b5a528c | Florent Chuffart | for (replicate_rank in 1:length(replicates)) { |
2321 | 6b5a528c | Florent Chuffart | nb_rank[[replicate_rank]] = length(samples) |
2322 | 6b5a528c | Florent Chuffart | base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]] |
2323 | 6b5a528c | Florent Chuffart | ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]]) |
2324 | 6b5a528c | Florent Chuffart | y_min[[replicate_rank]] = min(ylim[[replicate_rank]]) |
2325 | 6b5a528c | Florent Chuffart | y_max[[replicate_rank]] = max(ylim[[replicate_rank]]) |
2326 | 6b5a528c | Florent Chuffart | delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]] |
2327 | 6b5a528c | Florent Chuffart | mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2 |
2328 | 6b5a528c | Florent Chuffart | |
2329 | 6b5a528c | Florent Chuffart | samples = replicates[[replicate_rank]] |
2330 | 6b5a528c | Florent Chuffart | for (sample_rank in 1:length(samples)) { |
2331 | 6b5a528c | Florent Chuffart | sample = samples[[sample_rank]] |
2332 | 6b5a528c | Florent Chuffart | y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]] |
2333 | 6b5a528c | Florent Chuffart | if (sample$roi[["begin"]] < sample$roi[["end"]]) { |
2334 | 6b5a528c | Florent Chuffart | x_min[[replicate_rank]] = sample$roi[["begin"]] |
2335 | 6b5a528c | Florent Chuffart | x_max[[replicate_rank]] = sample$roi[["end"]] |
2336 | 6b5a528c | Florent Chuffart | } else { |
2337 | 6b5a528c | Florent Chuffart | x_min[[replicate_rank]] = - sample$roi[["begin"]] |
2338 | 6b5a528c | Florent Chuffart | x_max[[replicate_rank]] = - sample$roi[["end"]] |
2339 | 6b5a528c | Florent Chuffart | } |
2340 | 6b5a528c | Florent Chuffart | shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]] |
2341 | 6b5a528c | Florent Chuffart | } |
2342 | 6b5a528c | Florent Chuffart | } |
2343 | 6b5a528c | Florent Chuffart | for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) { |
2344 | 6b5a528c | Florent Chuffart | inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,] |
2345 | 6b5a528c | Florent Chuffart | tmp_xs = tmp_ys = c() |
2346 | 6b5a528c | Florent Chuffart | for (replicate_rank in 1:length(replicates)) { |
2347 | 6b5a528c | Florent Chuffart | samples = replicates[[replicate_rank]] |
2348 | 6b5a528c | Florent Chuffart | strain = samples[[1]]$strain |
2349 | 6b5a528c | Florent Chuffart | tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]]) |
2350 | 6b5a528c | Florent Chuffart | tmp_ys = c(tmp_ys, mid_y[[replicate_rank]]) |
2351 | 6b5a528c | Florent Chuffart | } |
2352 | 6b5a528c | Florent Chuffart | lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]]), cex=dev.size()[1]*200/(x_max[[1]]-x_min[[1]]), pch=19) |
2353 | 6b5a528c | Florent Chuffart | } |
2354 | 6b5a528c | Florent Chuffart | } |
2355 | 6b5a528c | Florent Chuffart | if (plot_anovas | plot_anova_boxes) { |
2356 | 6b5a528c | Florent Chuffart | anova_results = perform_anovas(replicates, aligned_inter_strain_nucs, plot_anova_boxes=plot_anova_boxes) |
2357 | 6b5a528c | Florent Chuffart | thres = FDR(anova_results[,"correl_st_ma_pvalue"],0.05) |
2358 | 6b5a528c | Florent Chuffart | if (is.na(thres)) { |
2359 | 6b5a528c | Florent Chuffart | # Boneferroni multiple test |
2360 | 6b5a528c | Florent Chuffart | thres = 0.05/length(anova_results[,1]) |
2361 | 6b5a528c | Florent Chuffart | } |
2362 | 6b5a528c | Florent Chuffart | filtred_anova_results = anova_results[anova_results[,"correl_st_ma_pvalue"]<thres,] |
2363 | 6b5a528c | Florent Chuffart | x11() |
2364 | 6b5a528c | Florent Chuffart | if (plot_anovas) { |
2365 | 6b5a528c | Florent Chuffart | plot(anova_results[,"mnase_st"],anova_results[,"correl_st_ma"], pch=1, xlim=c(-0.07,0.07), ylim=c(-0.07,0.07),main="SNEPs" ,xlab = "strain effect", ylab = "snep effect") |
2366 | 6b5a528c | Florent Chuffart | points(x=filtred_anova_results[,"mnase_st"],y=filtred_anova_results[,"correl_st_ma"],col=2, pch="+") |
2367 | 6b5a528c | Florent Chuffart | } |
2368 | 6b5a528c | Florent Chuffart | } |
2369 | 6b5a528c | Florent Chuffart | } |
2370 | 6b5a528c | Florent Chuffart | return(returned_list) |
2371 | 6b5a528c | Florent Chuffart | } |
2372 | 6b5a528c | Florent Chuffart | |
2373 | 6b5a528c | Florent Chuffart | |
2374 | 6b5a528c | Florent Chuffart |