|
1 |
\documentclass[a4paper]{article}
|
|
2 |
\usepackage{a4wide}
|
|
3 |
% \usepackage[top=2in, bottom=1.5in, left=1in, right=1in]{geometry}
|
|
4 |
\usepackage{subfigure}
|
|
5 |
\title{NucleoMiner 2.0: Detecting quantitative epigenomic variation at single-nucleosome resolution.} \author{Florent Chuffart}
|
|
6 |
\begin{document}
|
|
7 |
\maketitle
|
|
8 |
<<dir-and-date, echo = FALSE, eval=FALSE >>=
|
|
9 |
date();
|
|
10 |
R.Version()$version.string;
|
|
11 |
tmplyxdir <- getwd();
|
|
12 |
setwd(".")
|
|
13 |
getwd();
|
|
14 |
@
|
|
15 |
\tableofcontents{}
|
|
16 |
|
|
17 |
\section{Context}
|
|
18 |
|
|
19 |
<<dir-and-date, echo=FALSE, results=verbatim>>=
|
|
20 |
getwd()
|
|
21 |
date()
|
|
22 |
R.Version()$version.string
|
|
23 |
@
|
|
24 |
|
|
25 |
% \section{Workflow description}
|
|
26 |
%
|
|
27 |
% See Figure~\ref{fig:dataflow}.
|
|
28 |
% \begin{figure}
|
|
29 |
% \mbox{\includegraphics[trim = 0mm 0mm 0mm 0mm, width=.99\linewidth]{figs/NM2_fig1}}
|
|
30 |
% \caption{\label{fig:dataflow}Bowtie, TemplateFilter, intra-strain LOD of positions, inter-strain LOD of positions, Well-positioned, Fuzzy Regions, DESeq, FDR.}
|
|
31 |
% \end{figure}
|
|
32 |
|
|
33 |
\section{Dataset description}
|
|
34 |
|
|
35 |
<< load_config, echo=FALSE, results=hide , eval=TRUE >>=
|
|
36 |
library(rjson)
|
|
37 |
library(nucleominer)
|
|
38 |
|
|
39 |
# Read config file
|
|
40 |
json_conf_file = "nucleo_miner_config.json"
|
|
41 |
config = fromJSON(paste(readLines(json_conf_file), collapse=""))
|
|
42 |
|
|
43 |
# Read sample file
|
|
44 |
all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
|
|
45 |
|
|
46 |
# Remove samples that seems to be eronous for some manipulation efficency reasons e.g. PCR
|
|
47 |
all_samples = all_samples[!(all_samples$id %in% c(33,45,48,55)), ]
|
|
48 |
|
|
49 |
combis = list(c("BY", "RM"))
|
|
50 |
combi = combis[[1]]
|
|
51 |
strains = combis[[1]]
|
|
52 |
marker = "H3K14ac"
|
|
53 |
manips = c("Mnase_Seq", marker)
|
|
54 |
all_samples = all_samples [
|
|
55 |
(all_samples$marker == "Mnase_Seq" | all_samples$marker == marker)
|
|
56 |
& (all_samples$strain == "BY" | all_samples$strain == "RM"),]
|
|
57 |
@
|
|
58 |
|
|
59 |
{\footnotesize
|
|
60 |
<<echo=TRUE, eval=TRUE>>=
|
|
61 |
all_samples
|
|
62 |
@
|
|
63 |
}
|
|
64 |
|
|
65 |
|
|
66 |
|
|
67 |
|
|
68 |
\section{Genome alignments and fragmentation}
|
|
69 |
|
|
70 |
\subsection{Customize Function "translate"}
|
|
71 |
|
|
72 |
Customize function "translate" which allows to use the alignments when performing various tasks.
|
|
73 |
|
|
74 |
Several possibilities:
|
|
75 |
\begin{itemize}
|
|
76 |
\item All in same strain: regions are whole chromosomes and use the default translate function which is neutral.
|
|
77 |
\item Two or more strains: Edit a list of regions and customize the translate function which performs the correspondence between the different genomes. How we did it: Explain how to obtain .c2c with NM1.0, then use it to produce the list of regions and customize "translate".
|
|
78 |
\end{itemize}
|
|
79 |
|
|
80 |
\subsection{Computing Common Uninterrupted Regions (CUR) Between Strains}
|
|
81 |
|
|
82 |
|
|
83 |
<< compute_cur, echo=FALSE, eval=TRUE, results=hide>>=
|
|
84 |
cur_filename = paste(config$RESULTS_DIR, "/all_curs.tab", sep="")
|
|
85 |
if (!file.exists(config$RESULTS_DIR)) {
|
|
86 |
dir.create(file.path(".", config$RESULTS_DIR), recursive=TRUE)
|
|
87 |
}
|
|
88 |
if (!file.exists(cur_filename)) {
|
|
89 |
print("Computing and caching common uninterrupted regions...")
|
|
90 |
write.table(compute_inter_all_strain_curs(config=config), file=cur_filename, row.names=FALSE, quote=FALSE)
|
|
91 |
print("done.")
|
|
92 |
}
|
|
93 |
curs = get_content(cur_filename, "table", stringsAsFactors=FALSE, head=TRUE)
|
|
94 |
curs$chr = as.character(curs$chr)
|
|
95 |
curs = curs[curs$chr==1, ]
|
|
96 |
# Filtering CURs of the 1st chromosome
|
|
97 |
curs = curs[curs$chr==1, ]
|
|
98 |
cur_indexes = 1:length(curs[,1])
|
|
99 |
@
|
|
100 |
|
|
101 |
{\footnotesize
|
|
102 |
<<echo=TRUE, eval=TRUE>>=
|
|
103 |
head(curs)
|
|
104 |
@
|
|
105 |
}
|
|
106 |
|
|
107 |
\section{Inference of well-positioned nucleosomes in each strain}
|
|
108 |
|
|
109 |
\subsection{Run TemplateFilter on Mnase Samples}
|
|
110 |
|
|
111 |
For each sample we perform TemplateFilter analysis. TemplateFilter is launched for each entire chromosome (Not only CUR).
|
|
112 |
|
|
113 |
{\footnotesize
|
|
114 |
<<template_filter, echo=FALSE, eval=TRUE >>=
|
|
115 |
for (id in 1:6) {
|
|
116 |
cmd = paste("TemplateFilter data/TF/sample_", id, "_TF.txt data/Templates_7.1.txt -out_prefix data/TF/sample_", id, "_all_nucs -corr_bound 0.5 -min_width 100 -max_width 160 -overlap 3", sep="")
|
|
117 |
print(paste(cmd))
|
|
118 |
if (!file.exists(paste("data/TF/sample_", id, "_all_nucs.tab", sep=""))) {
|
|
119 |
system(cmd)
|
|
120 |
}
|
|
121 |
}
|
|
122 |
@
|
|
123 |
}
|
|
124 |
|
|
125 |
\subsection{Extracting Maps for Well Positionned and Fuzzy Nucleosomes}
|
|
126 |
|
|
127 |
\setkeys{Gin}{width=.99\linewidth}
|
|
128 |
\begin{figure}
|
|
129 |
\begin{center}
|
|
130 |
\subfigure{
|
|
131 |
<< intra_strain_alignment1, echo=FALSE, results=hide, fig=TRUE, height=5, width=16, eval=TRUE >>=
|
|
132 |
cur_index = 2
|
|
133 |
cur = as.list(curs[cur_index,])
|
|
134 |
cur$begin = 168850
|
|
135 |
cur$end = 170200
|
|
136 |
expes = list(c(1,2,3))
|
|
137 |
replicates = build_replicates(expes, cur, all_samples= all_samples, config=config)
|
|
138 |
out = watch_samples(replicates, config$READ_LENGTH,
|
|
139 |
plot_coverage = FALSE,
|
|
140 |
plot_squared_reads = TRUE,
|
|
141 |
plot_ref_genome = FALSE,
|
|
142 |
plot_arrow_raw_reads = FALSE,
|
|
143 |
plot_arrow_nuc_reads = FALSE,
|
|
144 |
plot_gaussian_reads = FALSE,
|
|
145 |
plot_gaussian_unified_reads = FALSE,
|
|
146 |
plot_ellipse_nucs = FALSE,
|
|
147 |
plot_wp_nucs = FALSE,
|
|
148 |
plot_fuzzy_nucs = FALSE,
|
|
149 |
plot_wp_nuc_model = FALSE,
|
|
150 |
plot_common_nucs = FALSE,
|
|
151 |
height = 300,
|
|
152 |
xlab = "",
|
|
153 |
config = config
|
|
154 |
)
|
|
155 |
@
|
|
156 |
}
|
|
157 |
\subfigure{
|
|
158 |
<< intra_strain_alignment2, echo=FALSE, results=hide, fig=TRUE, height=5, width=16, eval=TRUE >>=
|
|
159 |
out = watch_samples(replicates, config$READ_LENGTH,
|
|
160 |
plot_coverage = FALSE,
|
|
161 |
change_col = FALSE,
|
|
162 |
plot_squared_reads = TRUE,
|
|
163 |
plot_ref_genome = FALSE,
|
|
164 |
plot_arrow_raw_reads = FALSE,
|
|
165 |
plot_arrow_nuc_reads = FALSE,
|
|
166 |
plot_gaussian_reads = FALSE,
|
|
167 |
plot_gaussian_unified_reads = FALSE,
|
|
168 |
plot_ellipse_nucs = TRUE,
|
|
169 |
plot_wp_nucs = FALSE,
|
|
170 |
plot_fuzzy_nucs = FALSE,
|
|
171 |
plot_wp_nuc_model = FALSE,
|
|
172 |
plot_common_nucs = FALSE,
|
|
173 |
plot_chain = FALSE,
|
|
174 |
height = 300,
|
|
175 |
xlab = "",
|
|
176 |
config = config
|
|
177 |
)
|
|
178 |
@
|
|
179 |
}
|
|
180 |
\subfigure{
|
|
181 |
<< intra_strain_alignment3, echo=FALSE, results=hide, fig=TRUE, height=5, width=16, eval=TRUE >>=
|
|
182 |
config$TRACK_LOD_OFFSET = c(0,0,0,0,0,0,0,1,0,0,1,0,-1,0,0,-1,0,0,-1,0,0)
|
|
183 |
config$LEGEND_LOD_POS = c(2,2,2,2,2,2,2,4,2,2,4,3,2,4,4,2,4,2,2,2,4)
|
|
184 |
out = watch_samples(replicates, config$READ_LENGTH,
|
|
185 |
plot_coverage = FALSE,
|
|
186 |
change_col = FALSE,
|
|
187 |
plot_squared_reads = TRUE,
|
|
188 |
plot_ref_genome = FALSE,
|
|
189 |
plot_arrow_raw_reads = FALSE,
|
|
190 |
plot_arrow_nuc_reads = FALSE,
|
|
191 |
plot_gaussian_reads = FALSE,
|
|
192 |
plot_gaussian_unified_reads = TRUE,
|
|
193 |
plot_ellipse_nucs = FALSE,
|
|
194 |
plot_wp_nucs = FALSE,
|
|
195 |
plot_fuzzy_nucs = FALSE,
|
|
196 |
plot_wp_nuc_model = FALSE,
|
|
197 |
plot_common_nucs = FALSE,
|
|
198 |
plot_chain = TRUE,
|
|
199 |
height = 300,
|
|
200 |
xlab = "",
|
|
201 |
config = config
|
|
202 |
)
|
|
203 |
@
|
|
204 |
}
|
|
205 |
\subfigure{
|
|
206 |
<< intra_strain_alignment4, echo=FALSE, results=hide, fig=TRUE, height=5, width=16, eval=TRUE >>=
|
|
207 |
out = watch_samples(replicates, config$READ_LENGTH,
|
|
208 |
plot_coverage = FALSE,
|
|
209 |
change_col = FALSE,
|
|
210 |
plot_squared_reads = TRUE,
|
|
211 |
plot_ref_genome = FALSE,
|
|
212 |
plot_arrow_raw_reads = FALSE,
|
|
213 |
plot_arrow_nuc_reads = FALSE,
|
|
214 |
plot_gaussian_reads = FALSE,
|
|
215 |
plot_gaussian_unified_reads = FALSE,
|
|
216 |
plot_ellipse_nucs = TRUE,
|
|
217 |
plot_wp_nucs = TRUE,
|
|
218 |
plot_fuzzy_nucs = FALSE,
|
|
219 |
plot_wp_nuc_model = FALSE,
|
|
220 |
plot_common_nucs = FALSE,
|
|
221 |
height = 300,
|
|
222 |
xlab = "",
|
|
223 |
config = config
|
|
224 |
)
|
|
225 |
@
|
|
226 |
}
|
|
227 |
\subfigure{
|
|
228 |
<< intra_strain_alignment_fuzzy, echo=FALSE, results=hide, fig=TRUE, height=5, width=16, eval=TRUE >>=
|
|
229 |
out = watch_samples(replicates, config$READ_LENGTH,
|
|
230 |
plot_coverage = FALSE,
|
|
231 |
change_col = FALSE,
|
|
232 |
plot_squared_reads = TRUE,
|
|
233 |
plot_ref_genome = FALSE,
|
|
234 |
plot_arrow_raw_reads = FALSE,
|
|
235 |
plot_arrow_nuc_reads = FALSE,
|
|
236 |
plot_gaussian_reads = FALSE,
|
|
237 |
plot_gaussian_unified_reads = FALSE,
|
|
238 |
plot_ellipse_nucs = FALSE,
|
|
239 |
plot_wp_nucs = TRUE,
|
|
240 |
plot_fuzzy_nucs = TRUE,
|
|
241 |
plot_wp_nuc_model = FALSE,
|
|
242 |
plot_common_nucs = FALSE,
|
|
243 |
height = 300,
|
|
244 |
xlab = "",
|
|
245 |
config = config
|
|
246 |
)
|
|
247 |
@
|
|
248 |
}
|
|
249 |
\end{center}
|
|
250 |
\caption{Extracting Maps for Well Positionned and Fuzzy Nucleosomes}
|
|
251 |
\label{fig:inf}
|
|
252 |
\end{figure}
|
|
253 |
|
|
254 |
|
|
255 |
|
|
256 |
\section{Alignment of nucleosomes for each pair of strains}
|
|
257 |
|
|
258 |
\setkeys{Gin}{width=.99\linewidth}
|
|
259 |
\begin{figure}
|
|
260 |
\begin{center}
|
|
261 |
\subfigure{
|
|
262 |
<< inter_strain_alignment_wp, echo=FALSE, results=hide, fig=TRUE, height=6, width=16, eval=TRUE >>=
|
|
263 |
expes = list(c(1,2,3), c(4,5,6))
|
|
264 |
replicates = build_replicates(expes, cur, all_samples= all_samples, config=config)
|
|
265 |
out = watch_samples(replicates, config$READ_LENGTH,
|
|
266 |
plot_coverage = FALSE,
|
|
267 |
plot_squared_reads = TRUE,
|
|
268 |
plot_ref_genome = FALSE,
|
|
269 |
plot_arrow_raw_reads = FALSE,
|
|
270 |
plot_arrow_nuc_reads = FALSE,
|
|
271 |
plot_gaussian_reads = FALSE,
|
|
272 |
plot_gaussian_unified_reads = FALSE,
|
|
273 |
plot_ellipse_nucs = FALSE,
|
|
274 |
plot_wp_nucs = TRUE,
|
|
275 |
plot_wp_nuc_model = TRUE,
|
|
276 |
plot_common_nucs = TRUE,
|
|
277 |
height = 300,
|
|
278 |
config = config
|
|
279 |
)
|
|
280 |
@
|
|
281 |
}
|
|
282 |
\subfigure{
|
|
283 |
<< inter_strain_alignment_unrs, echo=FALSE, results=hide, fig=TRUE, height=6, width=16, eval=TRUE >>=
|
|
284 |
out = watch_samples(replicates, config$READ_LENGTH,
|
|
285 |
plot_coverage = FALSE,
|
|
286 |
plot_squared_reads = TRUE,
|
|
287 |
plot_ref_genome = FALSE,
|
|
288 |
plot_arrow_raw_reads = FALSE,
|
|
289 |
plot_arrow_nuc_reads = FALSE,
|
|
290 |
plot_gaussian_reads = FALSE,
|
|
291 |
plot_gaussian_unified_reads = FALSE,
|
|
292 |
plot_ellipse_nucs = FALSE,
|
|
293 |
plot_wp_nucs = TRUE,
|
|
294 |
plot_wp_nuc_model = FALSE,
|
|
295 |
plot_common_nucs = TRUE,
|
|
296 |
plot_common_unrs = TRUE,
|
|
297 |
height = 300,
|
|
298 |
config = config
|
|
299 |
)
|
|
300 |
@
|
|
301 |
}
|
|
302 |
\setkeys{Gin}{width=.49\linewidth}
|
|
303 |
\subfigure{
|
|
304 |
<< inter_strain_alignment_unrs_1, echo=FALSE, results=hide, fig=TRUE, height=6, width=8, eval=TRUE >>=
|
|
305 |
cur_index = 3
|
|
306 |
cur = as.list(curs[cur_index,])
|
|
307 |
cur$begin = 193500
|
|
308 |
cur$end = 194000
|
|
309 |
replicates = build_replicates(expes, cur, all_samples= all_samples, config=config)
|
|
310 |
out = watch_samples(replicates, config$READ_LENGTH,
|
|
311 |
plot_coverage = FALSE,
|
|
312 |
plot_squared_reads = TRUE,
|
|
313 |
plot_ref_genome = FALSE,
|
|
314 |
plot_arrow_raw_reads = FALSE,
|
|
315 |
plot_arrow_nuc_reads = FALSE,
|
|
316 |
plot_gaussian_reads = FALSE,
|
|
317 |
plot_gaussian_unified_reads = FALSE,
|
|
318 |
plot_ellipse_nucs = FALSE,
|
|
319 |
plot_wp_nucs = TRUE,
|
|
320 |
plot_wp_nuc_model = FALSE,
|
|
321 |
plot_common_nucs = TRUE,
|
|
322 |
plot_common_unrs = TRUE,
|
|
323 |
height = 300,
|
|
324 |
config = config
|
|
325 |
)
|
|
326 |
@
|
|
327 |
}%
|
|
328 |
\subfigure{
|
|
329 |
<< inter_strain_alignment_unrs_3, echo=FALSE, results=hide, fig=TRUE, height=6, width=8, eval=TRUE >>=
|
|
330 |
cur$begin = 196758
|
|
331 |
cur$end = 197425
|
|
332 |
replicates = build_replicates(expes, cur, all_samples= all_samples, config=config)
|
|
333 |
out = watch_samples(replicates, config$READ_LENGTH,
|
|
334 |
plot_coverage = FALSE,
|
|
335 |
plot_squared_reads = TRUE,
|
|
336 |
plot_ref_genome = FALSE,
|
|
337 |
plot_arrow_raw_reads = FALSE,
|
|
338 |
plot_arrow_nuc_reads = FALSE,
|
|
339 |
plot_gaussian_reads = FALSE,
|
|
340 |
plot_gaussian_unified_reads = FALSE,
|
|
341 |
plot_ellipse_nucs = FALSE,
|
|
342 |
plot_wp_nucs = TRUE,
|
|
343 |
plot_wp_nuc_model = FALSE,
|
|
344 |
plot_common_nucs = TRUE,
|
|
345 |
plot_common_unrs = TRUE,
|
|
346 |
height = 300,
|
|
347 |
config = config
|
|
348 |
)
|
|
349 |
@
|
|
350 |
}
|
|
351 |
\end{center}
|
|
352 |
\caption{Alignment of nucleosomes for each pair of strains}
|
|
353 |
\end{figure}
|
|
354 |
|
|
355 |
|
|
356 |
|
|
357 |
|
|
358 |
|
|
359 |
|
|
360 |
|
|
361 |
|
|
362 |
|
|
363 |
<< distrib_wp_size_precossesing, echo=FALSE, eval=TRUE >>=
|
|
364 |
# if (!file.exists(unr_map_filename) & !file.exists(wp_map_filename) & !file.exists(fuzzy_map_filename)) {
|
|
365 |
# source("extract_maps.R")
|
|
366 |
# }
|
|
367 |
|
|
368 |
BY_wp = read.table( paste(config$RESULTS_DIR, "/full/BY_wp.tab", sep=""), header=TRUE)
|
|
369 |
BY_wp = BY_wp[BY_wp$wp == 1,]
|
|
370 |
BY_wp$length = BY_wp$upper_bound - BY_wp$lower_bound
|
|
371 |
|
|
372 |
RM_wp = read.table(paste(config$RESULTS_DIR, "/full/RM_wp.tab", sep=""), header=TRUE)
|
|
373 |
RM_wp = RM_wp[RM_wp$wp == 1,]
|
|
374 |
RM_wp$length = RM_wp$upper_bound - RM_wp$lower_bound
|
|
375 |
|
|
376 |
BY_fuzzy = read.table(paste(config$RESULTS_DIR, "/full/BY_fuzzy.tab", sep=""), header=TRUE)
|
|
377 |
BY_fuzzy$length = BY_fuzzy$upper_bound - BY_fuzzy$lower_bound
|
|
378 |
BY_fuzzy = BY_fuzzy[BY_fuzzy$length >= 75, ]
|
|
379 |
|
|
380 |
RM_fuzzy = read.table(paste(config$RESULTS_DIR, "/full/RM_fuzzy.tab", sep=""), header=TRUE)
|
|
381 |
RM_fuzzy$length = RM_fuzzy$upper_bound - RM_fuzzy$lower_bound
|
|
382 |
RM_fuzzy = RM_fuzzy[RM_fuzzy$length >= 75, ]
|
|
383 |
|
|
384 |
BY_unr = read.table(paste(config$RESULTS_DIR, "/full/BY_in_BY_RM_unr.tab", sep=""), header=TRUE)
|
|
385 |
BY_unr$length = BY_unr$upper_bound - BY_unr$lower_bound
|
|
386 |
BY_unr = BY_unr[BY_unr$length >= 75, ]
|
|
387 |
|
|
388 |
common_wp_filename = paste(config$RESULTS_DIR, "/full/BY_RM_common_wp.tab", sep="")
|
|
389 |
common_wp = read.table(common_wp_filename, header=TRUE)
|
|
390 |
|
|
391 |
@
|
|
392 |
|
|
393 |
{\footnotesize
|
|
394 |
<<echo=TRUE, eval=TRUE>>=
|
|
395 |
head(BY_wp)
|
|
396 |
head(BY_fuzzy)
|
|
397 |
head(BY_unr)
|
|
398 |
@
|
|
399 |
}
|
|
400 |
|
|
401 |
\setkeys{Gin}{width=.33\linewidth}
|
|
402 |
\begin{figure}
|
|
403 |
\begin{center}
|
|
404 |
\subfigure{
|
|
405 |
<< distrib_wp_size, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
406 |
hist(BY_wp$length, nclass=90, main = paste("Well positioned in BY"), xlab = "size (bp)")
|
|
407 |
@
|
|
408 |
}%
|
|
409 |
\subfigure{
|
|
410 |
<< distrib_fuzzy_size, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
411 |
hist(BY_fuzzy$length, nclass=150, main = paste("Fuzzy in BY"), xlab = "size (bp)")
|
|
412 |
abline(v=1:10 * 150, lty=2, col="grey")
|
|
413 |
@
|
|
414 |
}%
|
|
415 |
\subfigure{
|
|
416 |
<< distrib_unr_size, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
417 |
hist(BY_unr$length, nclass=200, main = paste("UNR in BY-RM"), xlab = "size (bp)")
|
|
418 |
abline(v=1:30 * 150, lty=2, col="grey")
|
|
419 |
@
|
|
420 |
}
|
|
421 |
\end{center}
|
|
422 |
\caption{Distribution of nucleosome size}
|
|
423 |
\end{figure}
|
|
424 |
|
|
425 |
|
|
426 |
|
|
427 |
|
|
428 |
|
|
429 |
|
|
430 |
<< distrib_llr_tails, echo=FALSE, eval=TRUE >>=
|
|
431 |
by_llrs_filename = paste(config$RESULTS_DIR, "/full/BY_llrs.tab", sep="")
|
|
432 |
rm_llrs_filename = paste(config$RESULTS_DIR, "/full/RM_llrs.tab", sep="")
|
|
433 |
by_rm_llrs_filename = paste(config$RESULTS_DIR, "/full/BY_RM_llrs.tab", sep="")
|
|
434 |
|
|
435 |
BY_llrs = read.table(by_llrs_filename, header=TRUE)
|
|
436 |
RM_llrs = read.table(rm_llrs_filename, header=TRUE)
|
|
437 |
BY_RM_llrs = read.table(by_rm_llrs_filename, header=TRUE)
|
|
438 |
@
|
|
439 |
|
|
440 |
|
|
441 |
|
|
442 |
\setkeys{Gin}{width=.33\linewidth}
|
|
443 |
\begin{figure}
|
|
444 |
\begin{center}
|
|
445 |
\subfigure{
|
|
446 |
<< distrib_llr1_by, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
447 |
hist(BY_llrs[[1]], nclass=200, main="BY LLR1", xlim=c(0,500))
|
|
448 |
abline(v=20, lty=2, col="grey")
|
|
449 |
@
|
|
450 |
}%
|
|
451 |
\subfigure{
|
|
452 |
<< distrib_llr1_rm, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
453 |
hist(RM_llrs[[1]], nclass=200, main="RM LLR1", xlim=c(0,500))
|
|
454 |
abline(v=20, lty=2, col="grey")
|
|
455 |
@
|
|
456 |
}%
|
|
457 |
\subfigure{
|
|
458 |
<< distrib_all_llr3_byrm, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
459 |
hist(BY_RM_llrs[[1]], nclass=200, main="All BY RM LLR3", xlim=c(0,1500))
|
|
460 |
abline(v=100, lty=2, col="grey")
|
|
461 |
@
|
|
462 |
}%
|
|
463 |
|
|
464 |
\subfigure{
|
|
465 |
<< distrib_llr2_by, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
466 |
hist(BY_wp$wp_llr,nclass=100, xlab = "BY LLR3", main = "BY replicates")
|
|
467 |
@
|
|
468 |
}%
|
|
469 |
\subfigure{
|
|
470 |
<< distrib_llr2_rm, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
471 |
hist(RM_wp$wp_llr,nclass=100, xlab = "RM LLR2", main = "RM replicates")
|
|
472 |
@
|
|
473 |
}%
|
|
474 |
\subfigure{
|
|
475 |
<< distrib_filtred_llr3_byrm, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
476 |
hist(common_wp$llr_score,nclass=100, xlab = "BY RM LLR3 for paired wp", main = "BY-RM comparisons")
|
|
477 |
@
|
|
478 |
}%
|
|
479 |
|
|
480 |
\end{center}
|
|
481 |
\caption{Distribution of LLRs}
|
|
482 |
\end{figure}
|
|
483 |
|
|
484 |
|
|
485 |
\setkeys{Gin}{width=.33\linewidth}
|
|
486 |
|
|
487 |
\begin{figure}
|
|
488 |
\begin{center}
|
|
489 |
\subfigure{
|
|
490 |
<< distrib_llrpval_by, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
491 |
by_pv = BY_wp$wp_pval
|
|
492 |
hist(-log10(by_pv),nclass=100, xlab = "-log10(pvalue)", main = "BY replicates")
|
|
493 |
thres_pval = FDR(by_pv, 0.005)
|
|
494 |
abline(v=-log10(thres_pval), lty=2, col="grey")
|
|
495 |
@
|
|
496 |
}%
|
|
497 |
\subfigure{
|
|
498 |
<< distrib_llrpval_rm, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
499 |
rm_pv = RM_wp$wp_pval
|
|
500 |
hist(-log10(rm_pv),nclass=100, xlab = "-log10(pvalue)", main = "RM replicates")
|
|
501 |
thres_pval = FDR(rm_pv, 0.005)
|
|
502 |
abline(v=-log10(thres_pval), lty=2, col="grey")
|
|
503 |
@
|
|
504 |
}%
|
|
505 |
\subfigure{
|
|
506 |
<< distrib_llrpval_byrm, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
507 |
cpv = common_wp$common_wp_pval
|
|
508 |
hist(-log10(cpv),nclass=100, xlab = "-log10(pvalue)", main = "BY-RM comparisons")
|
|
509 |
thres_pval = FDR(cpv, 0.005)
|
|
510 |
abline(v=-log10(thres_pval), lty=2, col="grey")
|
|
511 |
@
|
|
512 |
}%
|
|
513 |
\end{center}
|
|
514 |
\caption{Distribution of LLR2 and LLR3 p-values}
|
|
515 |
\end{figure}
|
|
516 |
|
|
517 |
|
|
518 |
|
|
519 |
|
|
520 |
{\footnotesize
|
|
521 |
<< stat_about, echo=TRUE , eval=TRUE >>=
|
|
522 |
by_pv = na.omit(BY_wp$wp_pval)
|
|
523 |
length(by_pv)
|
|
524 |
print(sum(by_pv > FDR(by_pv, 0.005)))
|
|
525 |
|
|
526 |
rm_pv = na.omit(RM_wp$wp_pval)
|
|
527 |
length(rm_pv)
|
|
528 |
print(sum(rm_pv > FDR(rm_pv, 0.005)))
|
|
529 |
|
|
530 |
dim(common_wp)
|
|
531 |
cpv = common_wp$common_wp_pval
|
|
532 |
thres_pval = FDR(cpv, 0.005)
|
|
533 |
sum(cpv < thres_pval)
|
|
534 |
length(cpv)
|
|
535 |
thres_pval
|
|
536 |
sum(cpv < thres_pval)
|
|
537 |
sum(cpv > 0.05)
|
|
538 |
sum(cpv > 0.01)
|
|
539 |
@
|
|
540 |
}
|
|
541 |
|
|
542 |
<< stat_about2, echo=FALSE, eval=FALSE >>=
|
|
543 |
foo = density(BY_wp$length)
|
|
544 |
print(foo$x[which(foo$y == max(foo$y))])
|
|
545 |
|
|
546 |
foo = density(RM_wp$length)
|
|
547 |
print(foo$x[which(foo$y == max(foo$y))])
|
|
548 |
|
|
549 |
print(sum(BY_fuzzy$length <= 200)/nrow(BY_fuzzy))
|
|
550 |
|
|
551 |
print(sum(RM_fuzzy$length <= 200)/nrow(RM_fuzzy))
|
|
552 |
|
|
553 |
print(sum(BY_fuzzy$length > 400)/nrow(BY_fuzzy))
|
|
554 |
|
|
555 |
print(sum(RM_fuzzy$length > 400)/nrow(RM_fuzzy))
|
|
556 |
|
|
557 |
print(sum(BY_unr$length > 600))
|
|
558 |
|
|
559 |
print(sum(BY_unr$length <= 200)/nrow(BY_unr))
|
|
560 |
|
|
561 |
print(sum(wpunr_mnase$mnase_l2fc_pval < FDR(wpunr_mnase$mnase_l2fc_pval , 0.05)))
|
|
562 |
@
|
|
563 |
|
|
564 |
|
|
565 |
|
|
566 |
|
|
567 |
|
|
568 |
|
|
569 |
\section{About Dyads Shifts}
|
|
570 |
|
|
571 |
<< dyad_shift, echo=FALSE, results=hide , eval=TRUE >>=
|
|
572 |
common_wp_with_dyad_shift_filename = "cache/common_wp_with_dyad_shift.tab"
|
|
573 |
if (!file.exists(common_wp_with_dyad_shift_filename)) {
|
|
574 |
tmp_trs_filename = paste(config$RESULTS_DIR, "/full/RM_wp_tr_2_BY.tab", sep="")
|
|
575 |
trs_RM_to_BY = read.table(tmp_trs_filename, header=TRUE, stringsAsFactors=FALSE)
|
|
576 |
diffs = apply(t(1:length(common_wp[,1])), 2, function(i) {
|
|
577 |
if (i %% 100 == 1) {print(paste(i, "/", length(common_wp[,1])))}
|
|
578 |
tmp_common_wp = common_wp[i,]
|
|
579 |
tmp_BY_wp = BY_wp[BY_wp$cur_index == tmp_common_wp$cur_index & BY_wp$index_nuc == tmp_common_wp[[paste("index_nuc_", "BY", sep="")]],]
|
|
580 |
tmp_RM_wp = RM_wp[RM_wp$cur_index == tmp_common_wp$cur_index & RM_wp$index_nuc == tmp_common_wp[[paste("index_nuc_", "RM", sep="")]],]
|
|
581 |
tr = trs_RM_to_BY[trs_RM_to_BY$cur_index == tmp_RM_wp$cur_index & trs_RM_to_BY$index_nuc == tmp_RM_wp$index_nuc, ]
|
|
582 |
dyad_rm = (tr$end + tr$begin) / 2
|
|
583 |
dyad_by = (tmp_BY_wp$upper_bound + tmp_BY_wp$lower_bound) / 2
|
|
584 |
diff = abs(dyad_by - dyad_rm)
|
|
585 |
tr$diff = diff
|
|
586 |
return(tr)
|
|
587 |
})
|
|
588 |
foo = do.call(rbind, diffs)
|
|
589 |
common_wp_with_dyad_shift = common_wp
|
|
590 |
common_wp_with_dyad_shift$dyad_shift = unlist(foo$diff)
|
|
591 |
save(common_wp_with_dyad_shift, diffs, file=common_wp_with_dyad_shift_filename)
|
|
592 |
}
|
|
593 |
load(common_wp_with_dyad_shift_filename)
|
|
594 |
common_wp = common_wp_with_dyad_shift
|
|
595 |
# layout(1:3)
|
|
596 |
# hist(common_wp_with_dyad_shift$dyad_shift, nclass=100)
|
|
597 |
# hist(common_wp_with_dyad_shift$dyad_shift[cpv < thres_pval], nclass=100)
|
|
598 |
# hist(common_wp_with_dyad_shift$dyad_shift[cpv >= thres_pval], nclass=50)
|
|
599 |
@
|
|
600 |
|
|
601 |
{ \footnotesize
|
|
602 |
<< >>=
|
|
603 |
by_pv = BY_wp$wp_pval
|
|
604 |
thres_pval = FDR(by_pv, 0.005)
|
|
605 |
ds = BY_wp[BY_wp$wp_pval < thres_pval, ]$dyad_shift
|
|
606 |
quantile(ds, probs=((1:20)/20))
|
|
607 |
|
|
608 |
rm_pv = RM_wp$wp_pval
|
|
609 |
thres_pval = FDR(rm_pv, 0.005)
|
|
610 |
ds = RM_wp[RM_wp$wp_pval < thres_pval, ]$dyad_shift
|
|
611 |
quantile(ds, probs=((1:20)/20))
|
|
612 |
|
|
613 |
cpv = common_wp$common_wp_pval
|
|
614 |
thres_pval = FDR(cpv, 0.005)
|
|
615 |
quantile(common_wp$dyad_shift, probs=((1:20)/20))
|
|
616 |
quantile(common_wp$dyad_shift[cpv < thres_pval], probs=((1:20)/20))
|
|
617 |
quantile(common_wp$dyad_shift[cpv >= thres_pval], probs=((1:20)/20))
|
|
618 |
@
|
|
619 |
}
|
|
620 |
|
|
621 |
|
|
622 |
\setkeys{Gin}{width=.33\linewidth}
|
|
623 |
|
|
624 |
\begin{figure}
|
|
625 |
\begin{center}
|
|
626 |
\subfigure{
|
|
627 |
<< distrib_dyad_shift_by, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
628 |
hist(ds, nclass=100, xlab = "dyad shift", main = "BY replicates")
|
|
629 |
@
|
|
630 |
}%
|
|
631 |
\subfigure{
|
|
632 |
<< distrib_dyad_shift_rm, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
633 |
hist(RM_wp$dyad_shift, nclass=100, xlab = "dyad_shift", main = "RM replicates")
|
|
634 |
@
|
|
635 |
}%
|
|
636 |
\subfigure{
|
|
637 |
<< distrib_dyad_shift_byrm, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
638 |
hist(common_wp_with_dyad_shift$dyad_shift, nclass=100, main="BY-RM common wp")
|
|
639 |
@
|
|
640 |
}%
|
|
641 |
\end{center}
|
|
642 |
\caption{Distribution of dyad shifts}
|
|
643 |
\end{figure}
|
|
644 |
|
|
645 |
|
|
646 |
% \section{Counting coverage for each nucleosome}
|
|
647 |
%
|
|
648 |
% << count_reads, echo=FALSE, eval=TRUE>>=
|
|
649 |
% wp_count_filename = paste(config$RESULTS_DIR, "/full/",combi[1],"_",combi[2],"_",marker,"_wp_and_nbreads.tab",sep="")
|
|
650 |
% unr_count_filename = paste(config$RESULTS_DIR, "/full/",combi[1],"_",combi[2],"_",marker,"_unr_and_nbreads.tab",sep="")
|
|
651 |
% if (!file.exists(wp_count_filename) | !file.exists(unr_count_filename)) {
|
|
652 |
% source("count_reads.R")
|
|
653 |
% }
|
|
654 |
% wp_count = read.table(wp_count_filename, header=TRUE)
|
|
655 |
% unr_count = read.table(unr_count_filename, header=TRUE)
|
|
656 |
% @
|
|
657 |
% << >>=
|
|
658 |
% head(wp_count)
|
|
659 |
% head(unr_count)
|
|
660 |
% @
|
|
661 |
|
|
662 |
|
|
663 |
\section{Statistical analysis of quantitative variation}
|
|
664 |
|
|
665 |
|
|
666 |
{\footnotesize
|
|
667 |
<<>>=
|
|
668 |
wpunr_snep = read.table(paste(config$RESULTS_DIR, "/full/" ,combi[1],"_",combi[2],"_",marker,"_wp_snep.tab",sep=""), header=TRUE)
|
|
669 |
head(wpunr_snep)
|
|
670 |
|
|
671 |
check_iso_snep = wpunr_snep = read.table(paste(config$RESULTS_DIR, "/full/" ,combi[1],"_",combi[2],"_",marker,"_wpunr_snep.tab",sep=""), header=TRUE)
|
|
672 |
check_iso_snep = check_iso_snep[order(check_iso_snep$chr_BY, check_iso_snep$lower_bound_BY),]
|
|
673 |
# isolated, 943 iso wp and unr over 5240 for Fabien
|
|
674 |
nb_iso = 0
|
|
675 |
for (snep_index in which(check_iso_snep$snep_index)) {
|
|
676 |
cur_snep = check_iso_snep[snep_index, ]
|
|
677 |
prev_snep = check_iso_snep[snep_index-1,]
|
|
678 |
next_snep = check_iso_snep[snep_index+1,]
|
|
679 |
print(snep_index - 1)
|
|
680 |
|
|
681 |
if (nrow(prev_snep) == 0) {
|
|
682 |
is_prev_not_snep = TRUE
|
|
683 |
} else if (prev_snep$pvalsGLM >= 0.01 | prev_snep$cur_index != cur_snep$cur_index
|
|
684 |
) { # it's NOT a SNEP
|
|
685 |
is_prev_not_snep = TRUE
|
|
686 |
} else {
|
|
687 |
is_prev_not_snep = FALSE
|
|
688 |
}
|
|
689 |
|
|
690 |
if (nrow(next_snep) == 0) {
|
|
691 |
is_next_not_snep = TRUE
|
|
692 |
} else if (next_snep$pvalsGLM >= 0.01 | next_snep$cur_index != cur_snep$cur_index
|
|
693 |
) { # it's NOT a SNEP
|
|
694 |
is_next_not_snep = TRUE
|
|
695 |
} else {
|
|
696 |
is_next_not_snep = FALSE
|
|
697 |
}
|
|
698 |
|
|
699 |
if (is_prev_not_snep & is_next_not_snep) {
|
|
700 |
nb_iso = nb_iso + 1
|
|
701 |
}
|
|
702 |
}
|
|
703 |
|
|
704 |
print(nb_iso)
|
|
705 |
print(nb_iso/sum(check_iso_snep$snep_index))
|
|
706 |
|
|
707 |
|
|
708 |
wpunr_mnase = read.table( paste(config$RESULTS_DIR, "/full/" ,combi[1],"_",combi[2],"_wp_mnase.tab",sep=""), header=TRUE)
|
|
709 |
head(wpunr_mnase)
|
|
710 |
|
|
711 |
sf = read.table(paste(config$RESULTS_DIR,"/full/size_factors.tab",sep=""), header=TRUE)
|
|
712 |
head(sf)
|
|
713 |
@
|
|
714 |
}
|
|
715 |
|
|
716 |
<<launch_deseq, echo=FALSE, results=hide, eval=TRUE >>=
|
|
717 |
top_left_index = which(-log10(wpunr_mnase$mnase_l2fc_pval) < 1 & -log10(wpunr_snep$pvalsGLM) > 12)[1]
|
|
718 |
top_right_index = which(-log10(wpunr_mnase$mnase_l2fc_pval) > 10 & -log10(wpunr_snep$pvalsGLM) > 10)[1]
|
|
719 |
bottom_right_index = which(-log10(wpunr_mnase$mnase_l2fc_pval) > 10 & -log10(wpunr_snep$pvalsGLM) < 2)[1]
|
|
720 |
|
|
721 |
curs = get_content(cur_filename, "table", stringsAsFactors=FALSE, head=TRUE)
|
|
722 |
the_nuc = wpunr_snep[top_left_index,]
|
|
723 |
cur_index = the_nuc$cur_index
|
|
724 |
cur = as.list(curs[cur_index,])
|
|
725 |
cur$begin = the_nuc$lower_bound_BY - 500
|
|
726 |
cur$end = the_nuc$upper_bound_BY + 1500
|
|
727 |
expes = list()
|
|
728 |
for (manip in manips) {
|
|
729 |
for (strain in strains) {
|
|
730 |
expes[[length(expes) + 1]] = all_samples[all_samples$strain == strain & all_samples$marker == manip, ]$id
|
|
731 |
}
|
|
732 |
}
|
|
733 |
sf = sf[match(unlist(expes), sf$sample_id),]
|
|
734 |
replicates = build_replicates(expes, cur, all_samples= all_samples, config=config)
|
|
735 |
out = watch_samples(replicates, config$READ_LENGTH,
|
|
736 |
plot_coverage = TRUE,
|
|
737 |
plot_squared_reads = FALSE,
|
|
738 |
plot_ref_genome = FALSE,
|
|
739 |
plot_arrow_raw_reads = FALSE,
|
|
740 |
plot_arrow_nuc_reads = FALSE,
|
|
741 |
plot_gaussian_reads = FALSE,
|
|
742 |
plot_gaussian_unified_reads = FALSE,
|
|
743 |
plot_ellipse_nucs = FALSE,
|
|
744 |
plot_wp_nucs = TRUE,
|
|
745 |
plot_wp_nuc_model = FALSE,
|
|
746 |
plot_common_nucs = TRUE,
|
|
747 |
plot_common_unrs = TRUE,
|
|
748 |
height = 300,
|
|
749 |
config = config
|
|
750 |
)
|
|
751 |
@
|
|
752 |
|
|
753 |
\setkeys{Gin}{width=.99\linewidth}
|
|
754 |
\begin{figure}
|
|
755 |
\begin{center}
|
|
756 |
\subfigure{
|
|
757 |
<< only_nuc_4_cov, echo=FALSE, results=hide, fig=TRUE, width=8, height=2, echo=FALSE, eval=TRUE >>=
|
|
758 |
layout(matrix(c(2,1,1,2),1 , byrow=TRUE))
|
|
759 |
expes2 = list()
|
|
760 |
for (manip in c("Mnase_Seq")) {
|
|
761 |
for (strain in strains) {
|
|
762 |
expes2[[length(expes2) + 1]] = all_samples[all_samples$strain == strain & all_samples$marker == manip, ]$id
|
|
763 |
}
|
|
764 |
}
|
|
765 |
replicates = build_replicates(expes2, cur, all_samples= all_samples, config=config)
|
|
766 |
bar = watch_samples(replicates, config$READ_LENGTH,
|
|
767 |
plot_coverage = FALSE,
|
|
768 |
plot_squared_reads = FALSE,
|
|
769 |
plot_ref_genome = FALSE,
|
|
770 |
plot_arrow_raw_reads = FALSE,
|
|
771 |
plot_arrow_nuc_reads = FALSE,
|
|
772 |
plot_gaussian_reads = FALSE,
|
|
773 |
plot_gaussian_unified_reads = FALSE,
|
|
774 |
plot_ellipse_nucs = FALSE,
|
|
775 |
plot_wp_nucs = TRUE,
|
|
776 |
plot_wp_nuc_model = FALSE,
|
|
777 |
plot_common_nucs = TRUE,
|
|
778 |
plot_common_unrs = TRUE,
|
|
779 |
height = 300,
|
|
780 |
config = config
|
|
781 |
)
|
|
782 |
@
|
|
783 |
}
|
|
784 |
\subfigure{
|
|
785 |
<< splitted_fig, fig=TRUE, width=8, height=7, results=hide, echo=FALSE, eval=TRUE >>=
|
|
786 |
layout(matrix(c(1,5,5,2,4,4,4,3),2, byrow=TRUE))
|
|
787 |
|
|
788 |
data = data.frame(nb_reads = unlist(wpunr_snep[top_left_index,11:21])/sf$all_1,
|
|
789 |
strain = c("BY", "BY", "BY", "RM", "RM", "RM", "BY", "BY", "BY", "RM", "RM"),
|
|
790 |
manip = c("Mnase_Seq", "Mnase_Seq", "Mnase_Seq", "Mnase_Seq", "Mnase_Seq", "Mnase_Seq", marker, marker, marker, marker, marker))
|
|
791 |
boxplot(nb_reads ~ strain + manip, data, las=3)
|
|
792 |
|
|
793 |
data = data.frame(nb_reads = unlist(wpunr_snep[top_right_index,11:21])/sf$all_1,
|
|
794 |
strain = c("BY", "BY", "BY", "RM", "RM", "RM", "BY", "BY", "BY", "RM", "RM"),
|
|
795 |
manip = c("Mnase_Seq", "Mnase_Seq", "Mnase_Seq", "Mnase_Seq", "Mnase_Seq", "Mnase_Seq", marker, marker, marker, marker, marker))
|
|
796 |
boxplot(nb_reads ~ strain + manip, data, las=3)
|
|
797 |
|
|
798 |
data = data.frame(nb_reads = unlist(wpunr_snep[bottom_right_index,11:21])/sf$all_1,
|
|
799 |
strain = c("BY", "BY", "BY", "RM", "RM", "RM", "BY", "BY", "BY", "RM", "RM"),
|
|
800 |
manip = c("Mnase_Seq", "Mnase_Seq", "Mnase_Seq", "Mnase_Seq", "Mnase_Seq", "Mnase_Seq", marker, marker, marker, marker, marker))
|
|
801 |
boxplot(nb_reads ~ strain + manip, data, las=3)
|
|
802 |
|
|
803 |
plot(-log10(wpunr_mnase$mnase_l2fc_pval), -log10(wpunr_snep$pvalsGLM), col=wpunr_snep$snep_index + 1, xlab = "-log10(pvalue MNase)", ylab = "-log10(pvalue SNEP)", main = "", pch=".")
|
|
804 |
points(-log10(wpunr_mnase$mnase_l2fc_pval)[top_left_index], -log10(wpunr_snep$pvalsGLM)[top_left_index], col=wpunr_snep$snep_index[top_left_index] + 1, pch = 21, bg="grey")
|
|
805 |
points(-log10(wpunr_mnase$mnase_l2fc_pval)[bottom_right_index], -log10(wpunr_snep$pvalsGLM)[bottom_right_index], col=wpunr_snep$snep_index[bottom_right_index] + 1, pch = 21, bg="grey")
|
|
806 |
points(-log10(wpunr_mnase$mnase_l2fc_pval)[top_right_index], -log10(wpunr_snep$pvalsGLM)[top_right_index], col=wpunr_snep$snep_index[top_right_index] + 1, pch = 21, bg="grey")
|
|
807 |
|
|
808 |
|
|
809 |
plot(0,0, col=0, xlim=c(cur$begin, cur$end), ylim=c(-1000, 4500), xlab="chr I", ylab="Normalised coverage")
|
|
810 |
lty_manip = list(Mnase_Seq = 1)
|
|
811 |
lty_manip[[marker]] = 2
|
|
812 |
col_strain = list()
|
|
813 |
col_strain[[combi[[1]]]] = 2
|
|
814 |
col_strain[[combi[[2]]]] = 4
|
|
815 |
for (cov_name in names(out)) {
|
|
816 |
sample_id = as.numeric(substr(cov_name, 5, 10))
|
|
817 |
strain = all_samples[all_samples$id == sample_id,]$strain
|
|
818 |
manip = all_samples[all_samples$id == sample_id,]$marker
|
|
819 |
lines(cur$begin:cur$end, out[[cov_name]](cur$begin:cur$end), pch="", col=col_strain[[strain]], lty=lty_manip[[manip]],lw=2)
|
|
820 |
}
|
|
821 |
legend("topright", col=c(2, 4, 2, 4), c("BY Mnas Seq", "RM Mnas Seq", paste("BY", marker), paste("RM", marker)), lty=c(1, 1, 2, 2), lw=2)
|
|
822 |
@
|
|
823 |
}
|
|
824 |
\end{center}
|
|
825 |
\caption{MNase effect Vs. SNEP effect}
|
|
826 |
\end{figure}
|
|
827 |
|
|
828 |
|
|
829 |
|
|
830 |
|
|
831 |
\section{Consistency with Nagarajan PLoS Genetics 2010}
|
|
832 |
|
|
833 |
\subsection{Counting common ff Naga nucleosomes}
|
|
834 |
|
|
835 |
\setkeys{Gin}{width=.99\linewidth}
|
|
836 |
\begin{figure}
|
|
837 |
\begin{center}
|
|
838 |
<<ff_vs_naga_consistency_wp, fig=TRUE, results=hide, echo=FALSE, eval=TRUE >>=
|
|
839 |
print("Vs. Nagarajan for wp...")
|
|
840 |
|
|
841 |
ff_wp_snep = get_content(paste("results/full/BY_RM_H3K14ac_wp_snep.tab",sep=""), "table", header=TRUE,stringsAsFactors=FALSE);
|
|
842 |
ff_wp_snep$ff_center = (ff_wp_snep[,2] + ff_wp_snep[,3])/2;
|
|
843 |
ff_wp_snep$index_ff = 1:length(ff_wp_snep$chr_BY);
|
|
844 |
|
|
845 |
naga_orig_snep = get_content(paste("results/naga/Nagarajan_H3K14ac_BY_RM_NManova_nuc.txt",sep =""), "table", stringsAsFactors=FALSE);
|
|
846 |
naga_orig_snep$index_naga = 1:length(naga_orig_snep[,1])
|
|
847 |
naga_orig_snep$nuc_center = (naga_orig_snep[,5]+naga_orig_snep[,6])/2;
|
|
848 |
naga_orig_snep$chr_arab = apply(t(naga_orig_snep[,1]),2,function(x){ROM2ARAB()[[substr(as.character(x), 4,1000)]]});
|
|
849 |
|
|
850 |
ff_naga_common_wp_filename = "cache/ff_naga_common_wp.tab"
|
|
851 |
|
|
852 |
if (!file.exists(ff_naga_common_wp_filename)) {
|
|
853 |
naga_chr_BY = list()
|
|
854 |
for (chr_BY in unique(ff_wp_snep$chr_BY)) {
|
|
855 |
naga_chr_BY[[paste("chr_BY",chr_BY, sep = "_")]] = naga_orig_snep[naga_orig_snep$chr_arab == chr_BY,]
|
|
856 |
}
|
|
857 |
foo = apply(t(1:length(ff_wp_snep[,1])), 2, function(nuc_ff_index) {;
|
|
858 |
if (nuc_ff_index %% 100 == 1) {print(paste(nuc_ff_index, "/", length(ff_wp_snep[,1])))}
|
|
859 |
nuc_ff = ff_wp_snep[nuc_ff_index,]
|
|
860 |
tmp_naga = naga_chr_BY[[paste("chr_BY", nuc_ff$chr_BY, sep = "_")]]
|
|
861 |
tmp_ff_center = rep(nuc_ff$ff_center, length(tmp_naga[,1]))
|
|
862 |
tmp_naga$diff = abs(tmp_naga$nuc_center - tmp_ff_center)
|
|
863 |
index_nearest = order(tmp_naga$diff)[1]
|
|
864 |
res = list(index_naga = tmp_naga[index_nearest,]$index_naga, index_ff = nuc_ff$index_ff, diff = tmp_naga[index_nearest,]$diff);
|
|
865 |
return(res);
|
|
866 |
});
|
|
867 |
tmp_joint = do.call(rbind, foo)
|
|
868 |
tmp_joint = data.frame(lapply(data.frame(tmp_joint, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE)
|
|
869 |
|
|
870 |
naga_indexes = unique(tmp_joint$index_naga)
|
|
871 |
dup_naga_indexes = unique(tmp_joint[duplicated(tmp_joint$index_naga),]$index_naga)
|
|
872 |
unique_naga_indexes = naga_indexes[!(naga_indexes %in% dup_naga_indexes)]
|
|
873 |
uniq_joint_1 = tmp_joint[tmp_joint$index_naga %in% unique_naga_indexes,]
|
|
874 |
|
|
875 |
uniq_joint_2 = tmp_joint[tmp_joint$index_naga %in% dup_naga_indexes,]
|
|
876 |
|
|
877 |
bar = apply(t(dup_naga_indexes), 2, function(naga_nuc){
|
|
878 |
i = which(naga_nuc == dup_naga_indexes)
|
|
879 |
if (i %% 100 == 1) {print(paste(i, "/", length(dup_naga_indexes)))}
|
|
880 |
tmp_tmp_joint = uniq_joint_2[uniq_joint_2$index_naga == naga_nuc,]
|
|
881 |
ret = tmp_tmp_joint[tmp_tmp_joint$diff == min(tmp_tmp_joint$diff),][1,]
|
|
882 |
return(ret)
|
|
883 |
})
|
|
884 |
uniq_joint_2 = do.call(rbind, bar)
|
|
885 |
uniq_joint_2 = data.frame(lapply(data.frame(uniq_joint_2, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE)
|
|
886 |
|
|
887 |
ff_naga_common_wp = rbind(uniq_joint_1, uniq_joint_2)
|
|
888 |
ff_naga_common_wp = ff_naga_common_wp[order(ff_naga_common_wp$index_ff),]
|
|
889 |
|
|
890 |
write.table(ff_naga_common_wp, ff_naga_common_wp_filename, sep = "\t",row.names = FALSE, quote=FALSE);
|
|
891 |
}
|
|
892 |
|
|
893 |
ff_naga_common_wp = get_content(ff_naga_common_wp_filename, "table", header=TRUE);
|
|
894 |
|
|
895 |
hist(ff_naga_common_wp$diff, nclass=1000,xlab = "distance (bp)", main = "ff naga commom wp distance")
|
|
896 |
|
|
897 |
@
|
|
898 |
\end{center}
|
|
899 |
\caption{...}
|
|
900 |
\end{figure}
|
|
901 |
|
|
902 |
|
|
903 |
\setkeys{Gin}{width=.99\linewidth}
|
|
904 |
\begin{figure}
|
|
905 |
\begin{center}
|
|
906 |
|
|
907 |
<<ff_vs_naga_consistency_thres, fig=TRUE, results=hide, echo=FALSE, eval=TRUE >>=
|
|
908 |
diad_diff_thres = 75
|
|
909 |
ff_naga_common_wp = ff_naga_common_wp[ff_naga_common_wp$diff <= diad_diff_thres,]
|
|
910 |
nb_ff_naga_common_wp = nrow(ff_naga_common_wp)
|
|
911 |
print (paste(nb_ff_naga_common_wp,"common wp between ff and naga with a threshold of", diad_diff_thres))
|
|
912 |
|
|
913 |
fdr = 0.0001
|
|
914 |
|
|
915 |
ff_naga_common_wp$naga_pval = naga_orig_snep[ff_naga_common_wp$index_naga,]$V15
|
|
916 |
ff_naga_common_wp$ff_pval = ff_wp_snep[ff_naga_common_wp$index_ff,]$pvalsGLM
|
|
917 |
ff_naga_common_wp$naga_seffect = naga_orig_snep[ff_naga_common_wp$index_naga,]$V14
|
|
918 |
ff_naga_common_wp$ff_seffect = ff_wp_snep[ff_naga_common_wp$index_ff,]$manipMnase_Seq.strainRM
|
|
919 |
naga_wp_snep_pval_thres = FDR(ff_naga_common_wp$naga_pval, fdr)
|
|
920 |
ff_wp_snep_pval_thres = FDR(ff_naga_common_wp$ff_pval, fdr)
|
|
921 |
ff_naga_common_wp$naga_snep = ff_naga_common_wp$naga_pval < naga_wp_snep_pval_thres
|
|
922 |
ff_naga_common_wp$ff_snep = ff_naga_common_wp$ff_pval < ff_wp_snep_pval_thres
|
|
923 |
|
|
924 |
ff_naga_common_wp_snep = ff_naga_common_wp[ff_naga_common_wp$ff_snep & ff_naga_common_wp$naga_snep,]
|
|
925 |
nb_ff_naga_common_wp_snep = nrow(ff_naga_common_wp_snep)
|
|
926 |
naga_wp_snep = ff_naga_common_wp[ff_naga_common_wp$naga_snep, ]
|
|
927 |
ff_wp_snep = ff_naga_common_wp[ff_naga_common_wp$ff_snep,]
|
|
928 |
nb_naga_wp_snep = nrow(naga_wp_snep)
|
|
929 |
nb_ff_wp_snep = nrow(ff_wp_snep)
|
|
930 |
|
|
931 |
hist(ff_naga_common_wp$diff, nclass=70, xlab = "distance (bp)", main = "distance between ff naga common wp")
|
|
932 |
@
|
|
933 |
\end{center}
|
|
934 |
\caption{FF vs. Naga nucleosome positions}
|
|
935 |
\end{figure}
|
|
936 |
|
|
937 |
|
|
938 |
|
|
939 |
|
|
940 |
|
|
941 |
|
|
942 |
|
|
943 |
|
|
944 |
|
|
945 |
\setkeys{Gin}{width=.99\linewidth}
|
|
946 |
\begin{figure}
|
|
947 |
\begin{center}
|
|
948 |
<<ff_vs_naga_unr, fig=TRUE, results=hide, echo=FALSE, eval=TRUE >>=
|
|
949 |
print("Vs. Nagarajan for unr...")
|
|
950 |
ff_unr_snep = get_content(paste("results/full/BY_RM_H3K14ac_unr_snep.tab", sep = ""), "table", header=TRUE, stringsAsFactors=FALSE);
|
|
951 |
ff_unr_snep$index_ff=1:length(ff_unr_snep$chr_BY);
|
|
952 |
ff_naga_common_unr_filename = "cache/ff_naga_common_unr.tab"
|
|
953 |
if (!file.exists(ff_naga_common_unr_filename)) {
|
|
954 |
results = apply(t(1:nrow(ff_unr_snep)), 2, function(i) {
|
|
955 |
if (i %% 100 == 1) {print(paste(i, "/", nrow(ff_unr_snep)))}
|
|
956 |
unr_ff = ff_unr_snep[i,]
|
|
957 |
tmp_naga = naga_chr_BY[[paste("chr_BY", unr_ff$chr_BY, sep = "_")]]
|
|
958 |
tmp_naga = tmp_naga[(tmp_naga$V2 >= unr_ff$lower_bound_BY & tmp_naga$V2 <= unr_ff$upper_bound_BY) |
|
|
959 |
(tmp_naga$V3 >= unr_ff$lower_bound_BY & tmp_naga$V3 <= unr_ff$upper_bound_BY) |
|
|
960 |
(tmp_naga$V2 <= unr_ff$lower_bound_BY & tmp_naga$V3 >= unr_ff$upper_bound_BY),]
|
|
961 |
if (length(tmp_naga[,1]) > 0) {
|
|
962 |
overlaps = apply(t(1:length(tmp_naga[,1])), 2, function(tmp_naga_idx) {
|
|
963 |
tmp2_naga = tmp_naga[tmp_naga_idx,]
|
|
964 |
overlap = (min(tmp2_naga$V3, unr_ff$upper_bound_BY) - max(tmp2_naga$V2, unr_ff$lower_bound_BY)) / (tmp2_naga$V3 - tmp2_naga$V2)
|
|
965 |
return(signif(overlap,5))
|
|
966 |
})
|
|
967 |
overlaps = unlist(overlaps)
|
|
968 |
tmp_naga$overlaps = overlaps
|
|
969 |
res = list(index_naga=tmp_naga$index_naga, index_ff=rep(unr_ff$index_ff, length(tmp_naga$index_naga)), overlap=overlaps)
|
|
970 |
return (t(do.call("rbind", res)))
|
|
971 |
} else {
|
|
972 |
return (NULL)
|
|
973 |
}
|
|
974 |
})
|
|
975 |
results = do.call("rbind", results)
|
|
976 |
write.table(results, ff_naga_common_unr_filename, row.names=FALSE, quote=FALSE)
|
|
977 |
}
|
|
978 |
ff_naga_common_unr = get_content(ff_naga_common_unr_filename, "table", head=TRUE);
|
|
979 |
ov = 0.70
|
|
980 |
ff_naga_common_unr = ff_naga_common_unr[ff_naga_common_unr$overlap > ov,]
|
|
981 |
|
|
982 |
fdr = 0.0001
|
|
983 |
ff_naga_common_unr$naga_pval = naga_orig_snep[ff_naga_common_unr$index_naga,]$V15
|
|
984 |
ff_naga_common_unr$ff_pval = ff_unr_snep[ff_naga_common_unr$index_ff,]$pvalsGLM
|
|
985 |
ff_naga_common_unr$naga_len = naga_orig_snep[ff_naga_common_unr$index_naga,]$V3 - naga_orig_snep[ff_naga_common_unr$index_naga,]$V2
|
|
986 |
ff_naga_common_unr$ff_len = ff_unr_snep[ff_naga_common_unr$index_ff,]$upper_bound_BY - ff_unr_snep[ff_naga_common_unr$index_ff,]$lower_bound_BY
|
|
987 |
ff_naga_common_unr$naga_seffect = naga_orig_snep[ff_naga_common_unr$index_naga,]$V14
|
|
988 |
ff_naga_common_unr$ff_seffect = ff_unr_snep[ff_naga_common_unr$index_ff,]$manipMnase_Seq.strainRM
|
|
989 |
naga_unr_snep_pval_thres = FDR(ff_naga_common_unr$naga_pval, fdr)
|
|
990 |
ff_unr_snep_pval_thres = FDR(ff_naga_common_unr$ff_pval, fdr)
|
|
991 |
ff_naga_common_unr$naga_snep = ff_naga_common_unr$naga_pval < naga_unr_snep_pval_thres
|
|
992 |
ff_naga_common_unr$ff_snep = ff_naga_common_unr$ff_pval < ff_unr_snep_pval_thres
|
|
993 |
|
|
994 |
print("Removing Naga nuc that are associated with an ff wp...")
|
|
995 |
ff_naga_common_unr = ff_naga_common_unr[!(ff_naga_common_unr$index_naga %in% ff_naga_common_wp$index_naga),]
|
|
996 |
|
|
997 |
dim (ff_naga_common_unr)
|
|
998 |
|
|
999 |
print("Dealing with duplicated ff index in UNRs...")
|
|
1000 |
all_ff_indexes = unique(ff_naga_common_unr$index_ff)
|
|
1001 |
duplicated_ff_indexes = unique(ff_naga_common_unr[duplicated(ff_naga_common_unr$index_ff),]$index_ff)
|
|
1002 |
unique_ff_indexes = all_ff_indexes[!(all_ff_indexes %in% duplicated_ff_indexes)]
|
|
1003 |
duplicated_ff_naga_common_unr_filename = "cache/duplicated_ff_naga_common_unr.tab"
|
|
1004 |
if (!file.exists(duplicated_ff_naga_common_unr_filename)) {
|
|
1005 |
foo = apply(t(1:length(duplicated_ff_indexes)), 2, function(i){
|
|
1006 |
if (i %% 100 == 1) {print(paste(i, "/", length(duplicated_ff_indexes)))}
|
|
1007 |
tmp_duplicated_ff_indexes = ff_naga_common_unr[ff_naga_common_unr$index_ff == duplicated_ff_indexes[i],]
|
|
1008 |
if ( length(tmp_duplicated_ff_indexes[,1]) > 0 ) {
|
|
1009 |
the_dup = tmp_duplicated_ff_indexes[tmp_duplicated_ff_indexes$naga_pval == min(tmp_duplicated_ff_indexes$naga_pval),][1,]
|
|
1010 |
return(the_dup)
|
|
1011 |
} else {
|
|
1012 |
return(NULL)
|
|
1013 |
}
|
|
1014 |
})
|
|
1015 |
duplicated_ff_naga_common_unr = do.call(rbind, foo)
|
|
1016 |
duplicated_ff_naga_common_unr = data.frame(lapply(data.frame(duplicated_ff_naga_common_unr, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE)
|
|
1017 |
write.table(duplicated_ff_naga_common_unr, file = duplicated_ff_naga_common_unr_filename, sep = "\t",row.names = FALSE, quote=FALSE);
|
|
1018 |
}
|
|
1019 |
duplicated_ff_naga_common_unr = read.table(duplicated_ff_naga_common_unr_filename, header=TRUE)
|
|
1020 |
unique_ff_naga_common_unr = ff_naga_common_unr[ff_naga_common_unr$index_ff %in% unique_ff_indexes, ]
|
|
1021 |
ff_naga_common_unr = rbind(unique_ff_naga_common_unr, duplicated_ff_naga_common_unr)
|
|
1022 |
|
|
1023 |
ff_naga_common_unr = ff_naga_common_unr[ff_naga_common_unr$ff_len < (6*150),]
|
|
1024 |
|
|
1025 |
|
|
1026 |
print("common_ff...")
|
|
1027 |
nb_ff_naga_common_unr = nrow(ff_naga_common_unr)
|
|
1028 |
ff_naga_common_unr_snep = ff_naga_common_unr[ff_naga_common_unr$ff_snep & ff_naga_common_unr$naga_snep,]
|
|
1029 |
nb_ff_naga_common_unr_snep = nrow(ff_naga_common_unr_snep)
|
|
1030 |
naga_unr_snep = ff_naga_common_unr[ff_naga_common_unr$naga_snep, ]
|
|
1031 |
nb_naga_unr_snep = nrow(naga_unr_snep)
|
|
1032 |
ff_unr_snep = ff_naga_common_unr[ff_naga_common_unr$ff_snep,]
|
|
1033 |
nb_ff_unr_snep = nrow(ff_unr_snep)
|
|
1034 |
|
|
1035 |
layout(matrix(1:2, nrow=1), respect=TRUE)
|
|
1036 |
# hist(ff_naga_common_unr$overlap, nclass=70, xlab = "overlap", main = "overlap between ff naga common unr")
|
|
1037 |
hist(ff_naga_common_unr_snep$ff_len, nclass=200)
|
|
1038 |
hist(ff_naga_common_unr_snep$naga_len)
|
|
1039 |
@
|
|
1040 |
\end{center}
|
|
1041 |
\caption{UNR positionning}
|
|
1042 |
\end{figure}
|
|
1043 |
|
|
1044 |
|
|
1045 |
|
|
1046 |
|
|
1047 |
Percentage of Naga SNEPs among all nucs:
|
|
1048 |
$$\frac{\Sexpr{nb_naga_wp_snep + nb_naga_unr_snep}}{\Sexpr{nrow(ff_naga_common_wp) + nrow(ff_naga_common_unr)}} = \Sexpr{signif((nb_naga_wp_snep + nb_naga_unr_snep)/(nrow(ff_naga_common_wp) + nrow(ff_naga_common_unr)) * 100, 3)}\%$$
|
|
1049 |
|
|
1050 |
Percentage of FF SNEPs among all nucs:
|
|
1051 |
$$\frac{\Sexpr{nb_ff_wp_snep + nb_ff_unr_snep}}{\Sexpr{nrow(ff_naga_common_wp) + nrow(ff_naga_common_unr)}} = \Sexpr{signif((nb_ff_wp_snep + nb_ff_unr_snep)/(nrow(ff_naga_common_wp) + nrow(ff_naga_common_unr)) * 100, 3)}\%$$
|
|
1052 |
|
|
1053 |
Percentage of wp FF SNEPs that were SNEP in Naga:
|
|
1054 |
$$\frac{\Sexpr{nb_ff_naga_common_wp_snep}}{\Sexpr{nb_ff_wp_snep}} = \Sexpr{signif(nb_ff_naga_common_wp_snep/nb_ff_wp_snep * 100, 3)}\%$$
|
|
1055 |
|
|
1056 |
Percentage of unr FF SNEPs that were SNEP in Naga:
|
|
1057 |
$$\frac{\Sexpr{nb_ff_naga_common_unr_snep}}{\Sexpr{nb_ff_unr_snep}} = \Sexpr{signif(nb_ff_naga_common_unr_snep/nb_ff_unr_snep * 100, 3)}\%$$
|
|
1058 |
|
|
1059 |
Percentage of FF SNEPs that were SNEP in Naga:
|
|
1060 |
$$\frac{\Sexpr{nb_ff_naga_common_wp_snep + nb_ff_naga_common_unr_snep}}{\Sexpr{nb_ff_wp_snep + nb_ff_unr_snep}} = \Sexpr{signif((nb_ff_naga_common_wp_snep + nb_ff_naga_common_unr_snep)/(nb_ff_wp_snep + nb_ff_unr_snep) * 100, 3)}\%$$
|
|
1061 |
|
|
1062 |
Percentage of Naga SNEPs that are either a wp or unr SNEP in FF:
|
|
1063 |
$$\frac{\Sexpr{nb_ff_naga_common_wp_snep + nb_ff_naga_common_unr_snep}}{\Sexpr{nb_naga_wp_snep + nb_naga_unr_snep}} = \Sexpr{signif((nb_ff_naga_common_wp_snep + nb_ff_naga_common_unr_snep)/(nb_naga_wp_snep + nb_naga_unr_snep) * 100, 3)}\%$$
|
|
1064 |
|
|
1065 |
|
|
1066 |
\subsection{SNEP effect correlation}
|
|
1067 |
|
|
1068 |
|
|
1069 |
\setkeys{Gin}{width=.99\linewidth}
|
|
1070 |
\begin{figure}
|
|
1071 |
\begin{center}
|
|
1072 |
\subfigure{
|
|
1073 |
<< ff_vs_naga_cov_1, results=hide, fig=TRUE, height=5, width=16, echo=FALSE, eval=TRUE >>=
|
|
1074 |
cur_index = 2
|
|
1075 |
cur = as.list(curs[cur_index,])
|
|
1076 |
cur$begin = 168850
|
|
1077 |
cur$end = 170200
|
|
1078 |
foo = get_content("data/TF/sample_1_TF.txt", "table", header=TRUE)
|
|
1079 |
by_mnase_seq_cov = foo[foo$V1 == 1 & foo$V2 > cur$begin & foo$V2 < cur$end, ]
|
|
1080 |
f = by_mnase_seq_cov[by_mnase_seq_cov$V3 == "F",]
|
|
1081 |
f_exp = rep(0, cur$end - cur$begin + 1)
|
|
1082 |
f_exp[f$V2 - cur$begin + 1] = f$V4
|
|
1083 |
r = by_mnase_seq_cov[by_mnase_seq_cov$V3 == "R",]
|
|
1084 |
r_exp = rep(0, cur$end - cur$begin + 1)
|
|
1085 |
r_exp[r$V2 - cur$begin + 1] = r$V4
|
|
1086 |
plot(0,0, col=0, xlim=c(cur$begin, cur$end), ylim=c(-150,100), ylab="MNase-seq")
|
|
1087 |
# lines(cur$begin:cur$end, f_exp)
|
|
1088 |
# lines(cur$begin:cur$end, -r_exp)
|
|
1089 |
abline(h=0)
|
|
1090 |
|
|
1091 |
|
|
1092 |
by_mnase_seq_cov = foo[foo$V1 == 1 & foo$V2 > cur$begin - 50 & foo$V2 < cur$end + 50, ]
|
|
1093 |
f = by_mnase_seq_cov[by_mnase_seq_cov$V3 == "F",]
|
|
1094 |
f_exp = rep(0, cur$end - cur$begin + 1)
|
|
1095 |
for (i in 1:nrow(f)) {
|
|
1096 |
l = f[i,]
|
|
1097 |
if (l$V2 <= cur$end) {
|
|
1098 |
b = min(cur$end-cur$begin+1,max(1,l$V2-cur$begin+1))
|
|
1099 |
e = min(cur$end-cur$begin+1,l$V2-cur$begin+1+50)
|
|
1100 |
# print(paste(l$V2 - cur$begin + 1, b,":", e))
|
|
1101 |
f_exp[b:e] = f_exp[b:e] + l$V4
|
|
1102 |
}
|
|
1103 |
}
|
|
1104 |
lines(cur$begin:cur$end, f_exp, lwd=3)
|
|
1105 |
|
|
1106 |
r = by_mnase_seq_cov[by_mnase_seq_cov$V3 == "R",]
|
|
1107 |
r_exp = rep(0, cur$end - cur$begin + 1)
|
|
1108 |
for (i in 1:nrow(r)) {
|
|
1109 |
l = r[i,]
|
|
1110 |
if (l$V2 <= cur$end + 50) {
|
|
1111 |
b = min(cur$end-cur$begin+1,max(1,l$V2-cur$begin+1-50))
|
|
1112 |
e = max(1,min(cur$end-cur$begin+1,l$V2-cur$begin+1))
|
|
1113 |
# print(paste(l$V2 - cur$begin + 1, b,":", e))
|
|
1114 |
r_exp[b:e] = r_exp[b:e] + l$V4
|
|
1115 |
}
|
|
1116 |
}
|
|
1117 |
lines(cur$begin:cur$end, -r_exp, lwd=3)
|
|
1118 |
|
|
1119 |
@
|
|
1120 |
}
|
|
1121 |
\subfigure{
|
|
1122 |
<< ff_vs_naga_cov_2, results=hide, fig=TRUE, height=5, width=16, echo=FALSE, eval=TRUE >>=
|
|
1123 |
by_arrays_normalized_cby01_filename = "data/by_arrays_normalized_cby01.tab"
|
|
1124 |
if (!file.exists(by_arrays_normalized_cby01_filename)) {
|
|
1125 |
foo = get_content("data/BY_arrays_normalized.dat", "table", header=TRUE)
|
|
1126 |
by_arrays_normalized_cby01 = foo[foo$chromosome == "chrI" & foo$position > cur$begin & foo$position < cur$end , c("chromosome", "position", "CBY01")]
|
|
1127 |
write.table(by_arrays_normalized_cby01, by_arrays_normalized_cby01_filename, sep = "\t",row.names = FALSE, quote=FALSE);
|
|
1128 |
}
|
|
1129 |
by_arrays_normalized_cby01 = get_content(by_arrays_normalized_cby01_filename, "table", header=TRUE);
|
|
1130 |
|
|
1131 |
plot(by_arrays_normalized_cby01$position, by_arrays_normalized_cby01$CBY01, type="l", xlim=c(cur$begin, cur$end), lwd=3, xlab="chr I", ylab="MNase-CHIP")
|
|
1132 |
@
|
|
1133 |
}
|
|
1134 |
\subfigure{
|
|
1135 |
<<SNEP_eff_ff2Naga, results=hide, fig=TRUE, echo=FALSE, eval=TRUE >>=
|
|
1136 |
X = c(-ff_naga_common_wp$naga_seffect, -ff_naga_common_unr$naga_seffect)
|
|
1137 |
Y = c(ff_naga_common_wp$ff_seffect, ff_naga_common_unr$ff_seffect)
|
|
1138 |
|
|
1139 |
plot(0,0,col=0, xlim=c(-1.5,1.5), ylim=c(-4,4), xlab = "SNEP effect Naga", ylab="SNEP effect FF")
|
|
1140 |
points(X,Y, col = adjustcolor("darkviolet", alpha.f = 0.03), pch=19)
|
|
1141 |
|
|
1142 |
abline( lm(Y ~ X), lty = 1, lwd = 2, col = "darkviolet" )
|
|
1143 |
legend("topleft",legend= sprintf("Spearman correlation: %.3f",cor(X,Y, method = "spearman")))
|
|
1144 |
|
|
1145 |
@
|
|
1146 |
}
|
|
1147 |
\end{center}
|
|
1148 |
\caption{FF vs Naga}
|
|
1149 |
\label{fig:inf}
|
|
1150 |
\end{figure}
|
|
1151 |
|
|
1152 |
|
|
1153 |
\end{document}
|
|
1154 |
|
|
1155 |
|
|
1156 |
|