Révision 729c934e src/R/nucleominer.R
b/src/R/nucleominer.R | ||
---|---|---|
124 | 124 |
llX = sum(log(dnorm(x,mean=meanX,sd=sdX))) |
125 | 125 |
llY = sum(log(dnorm(y,mean=meanY,sd=sdY))) |
126 | 126 |
llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY))) |
127 |
ratio = -(llX + llY - llXY)
|
|
127 |
ratio = llX + llY - llXY
|
|
128 | 128 |
return(ratio) |
129 | 129 |
### Returns the likelihood ratio. |
130 | 130 |
}, ex=function(){ |
... | ... | |
267 | 267 |
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes. |
268 | 268 |
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample. |
269 | 269 |
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=..., end=..., chr=..., genome=...)}. |
270 |
lod_thres=-20, ##<< Log likelihood ration threshold.
|
|
270 |
lod_thres=20, ##<< Log likelihood ration threshold. |
|
271 | 271 |
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound. |
272 | 272 |
){ |
273 | 273 |
end_of_tracks = function(tracks) { |
... | ... | |
311 | 311 |
indexes = c() |
312 | 312 |
track_readers = c() |
313 | 313 |
current_nuc = NULL |
314 |
lod_score = lod_thres - 1
|
|
314 |
lod_score = lod_thres + 1
|
|
315 | 315 |
# Read nucs from TF outputs |
316 | 316 |
tf_outs = list() |
317 | 317 |
i = 1 |
... | ... | |
361 | 361 |
} |
362 | 362 |
# print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" ")) |
363 | 363 |
if (is.na(lod_score)) { |
364 |
lod_score = lod_thres - 1
|
|
364 |
lod_score = lod_thres + 1
|
|
365 | 365 |
} |
366 |
if (lod_score > lod_thres) {
|
|
367 |
# Store lod_score
|
|
368 |
new_nuc$lod_score = lod_score
|
|
366 |
# Store lod_score
|
|
367 |
new_nuc$lod_score = lod_score
|
|
368 |
if (lod_score < lod_thres) {
|
|
369 | 369 |
# aggregate to current cluster |
370 | 370 |
# update bound |
371 | 371 |
if (new_nuc$upper_bound > new_cluster$upper_bound) { |
... | ... | |
456 | 456 |
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed. |
457 | 457 |
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed. |
458 | 458 |
corr_thres=0.5, ##<< Correlation threshold. |
459 |
lod_thres=-100, ##<< LOD cut off.
|
|
459 |
lod_thres=100, ##<< LOD cut off. |
|
460 | 460 |
config=NULL, ##<< GLOBAL config variable |
461 | 461 |
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed. |
462 | 462 |
) { |
... | ... | |
544 | 544 |
lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2) |
545 | 545 |
lod_scores = c(lod_scores, lod_score) |
546 | 546 |
# Filtering on LOD Score |
547 |
if (lod_score > lod_thres) {
|
|
547 |
if (lod_score < lod_thres) {
|
|
548 | 548 |
tmp_nuc = list() |
549 | 549 |
# strain_ref1 |
550 | 550 |
tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr |
... | ... | |
1730 | 1730 |
} |
1731 | 1731 |
} |
1732 | 1732 |
|
1733 |
|
|
1733 | 1734 |
print(length(rois_3rd_round[,1])) |
1734 | 1735 |
print(sum(rois_3rd_round$length)) |
1735 | 1736 |
rois = rois_3rd_round |
... | ... | |
2061 | 2062 |
plot_anovas = FALSE, ##<< Plot (or not) scatter for each nuc. |
2062 | 2063 |
plot_anova_boxes = FALSE, ##<< Plot (or not) boxplot for each nuc. |
2063 | 2064 |
plot_wp_nucs_4_nonmnase = FALSE, ##<< Plot (or not) clusters for non inputs samples. |
2065 |
plot_chain = FALSE, ##<< Plot (or not) clusterised nuceosomes between mnase samples. |
|
2064 | 2066 |
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed. |
2065 | 2067 |
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed. |
2066 | 2068 |
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis. |
... | ... | |
2196 | 2198 |
} |
2197 | 2199 |
} |
2198 | 2200 |
} |
2201 |
|
|
2199 | 2202 |
# Plot wp nucs |
2200 |
if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_common_nucs)) { |
|
2203 |
if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_common_nucs | plot_chain)) {
|
|
2201 | 2204 |
if (samples[[1]]$marker == "Mnase_Seq") { |
2202 | 2205 |
if (is.null(aggregated_intra_strain_nucs)) { |
2203 | 2206 |
wp_nucs = aggregate_intra_strain_nucs(samples)[[1]] |
2207 |
foo <<- wp_nucs |
|
2204 | 2208 |
} else { |
2205 | 2209 |
wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]] |
2206 | 2210 |
} |
2207 | 2211 |
} else { |
2208 | 2212 |
wp_nucs = replicates_wp_nucs[[replicate_rank-2]] |
2209 | 2213 |
} |
2210 |
replicates_wp_nucs[[replicate_rank]] = wp_nucs |
|
2211 |
for (wp_nuc in wp_nucs) { |
|
2212 |
if (wp_nuc$wp){ |
|
2213 |
rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1) |
|
2214 |
all_original_reads = c() |
|
2215 |
for(initial_nuc in wp_nuc$nucs) { |
|
2216 |
all_original_reads = c(all_original_reads, initial_nuc$original_reads) |
|
2217 |
} |
|
2218 |
delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound |
|
2219 |
if (FALSE) { |
|
2220 |
rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col="#EEEEEE", border=1) |
|
2221 |
} |
|
2222 |
if (plot_wp_nuc_model) { |
|
2223 |
lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1) |
|
2224 |
} |
|
2225 |
} |
|
2226 |
} |
|
2214 |
|
|
2215 |
if (plot_chain) { |
|
2216 |
tf_nucs = lapply(wp_nucs, function(nuc) { |
|
2217 |
bar = apply(t(nuc$nucs), 2, function(tmp_nuc){ |
|
2218 |
tmp_nuc = tmp_nuc[[1]] |
|
2219 |
tmp_nuc$inputs = NULL |
|
2220 |
tmp_nuc$original_reads = NULL |
|
2221 |
tmp_nuc$wp = nuc$wp |
|
2222 |
# print(tmp_nuc) |
|
2223 |
return(tmp_nuc) |
|
2224 |
}) |
|
2225 |
return(do.call(rbind, bar)) |
|
2226 |
}) |
|
2227 |
tf_nucs = data.frame(do.call(rbind, tf_nucs)) |
|
2228 |
|
|
2229 |
points((unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2 , y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank, cex=4, pch=16, col="white") |
|
2230 |
points((unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2 , y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank, cex=4) |
|
2231 |
text((unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2 , y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank, 1:nrow(tf_nucs)) |
|
2232 |
|
|
2233 |
xs = (1:nrow(tf_nucs) - 1) * (x_max_glo - x_min_glo) / (nrow(tf_nucs)) + lb |
|
2234 |
points(xs, rep(0, nrow(tf_nucs)), cex=2, pch=16, col="white") |
|
2235 |
points(xs, rep(0, nrow(tf_nucs)), cex=2, col = unlist(tf_nucs$wp)+2) |
|
2236 |
text(xs, 0, 1:nrow(tf_nucs), cex=0.5) |
|
2237 |
xs = (1:nrow(tf_nucs) - 1.5) * (x_max_glo - x_min_glo) / (nrow(tf_nucs)) + lb |
|
2238 |
text(xs, 0, signif(unlist(tf_nucs$lod_score)), cex=0.5, col=(unlist(tf_nucs$lod_score) < 150) + 2) |
|
2239 |
} |
|
2240 |
|
|
2241 |
if (plot_wp_nucs | plot_common_nucs ) { |
|
2242 |
replicates_wp_nucs[[replicate_rank]] = wp_nucs |
|
2243 |
for (wp_nuc in wp_nucs) { |
|
2244 |
if (wp_nuc$wp){ |
|
2245 |
rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1) |
|
2246 |
all_original_reads = c() |
|
2247 |
for(initial_nuc in wp_nuc$nucs) { |
|
2248 |
all_original_reads = c(all_original_reads, initial_nuc$original_reads) |
|
2249 |
} |
|
2250 |
delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound |
|
2251 |
if (FALSE) { |
|
2252 |
rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col="#EEEEEE", border=1) |
|
2253 |
} |
|
2254 |
if (plot_wp_nuc_model) { |
|
2255 |
lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1) |
|
2256 |
} |
|
2257 |
} |
|
2258 |
} |
|
2259 |
} |
|
2227 | 2260 |
} |
2228 | 2261 |
} |
2229 | 2262 |
|
Formats disponibles : Unified diff