124 |
124 |
llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
|
125 |
125 |
llY = sum(log(dnorm(y,mean=meanY,sd=sdY)))
|
126 |
126 |
llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY)))
|
127 |
|
ratio = -(llX + llY - llXY)
|
|
127 |
ratio = llX + llY - llXY
|
128 |
128 |
return(ratio)
|
129 |
129 |
### Returns the likelihood ratio.
|
130 |
130 |
}, ex=function(){
|
... | ... | |
267 |
267 |
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
|
268 |
268 |
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
|
269 |
269 |
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=..., end=..., chr=..., genome=...)}.
|
270 |
|
lod_thres=-20, ##<< Log likelihood ration threshold.
|
|
270 |
lod_thres=20, ##<< Log likelihood ration threshold.
|
271 |
271 |
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
|
272 |
272 |
){
|
273 |
273 |
end_of_tracks = function(tracks) {
|
... | ... | |
311 |
311 |
indexes = c()
|
312 |
312 |
track_readers = c()
|
313 |
313 |
current_nuc = NULL
|
314 |
|
lod_score = lod_thres - 1
|
|
314 |
lod_score = lod_thres + 1
|
315 |
315 |
# Read nucs from TF outputs
|
316 |
316 |
tf_outs = list()
|
317 |
317 |
i = 1
|
... | ... | |
361 |
361 |
}
|
362 |
362 |
# print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
|
363 |
363 |
if (is.na(lod_score)) {
|
364 |
|
lod_score = lod_thres - 1
|
|
364 |
lod_score = lod_thres + 1
|
365 |
365 |
}
|
366 |
|
if (lod_score > lod_thres) {
|
367 |
|
# Store lod_score
|
368 |
|
new_nuc$lod_score = lod_score
|
|
366 |
# Store lod_score
|
|
367 |
new_nuc$lod_score = lod_score
|
|
368 |
if (lod_score < lod_thres) {
|
369 |
369 |
# aggregate to current cluster
|
370 |
370 |
# update bound
|
371 |
371 |
if (new_nuc$upper_bound > new_cluster$upper_bound) {
|
... | ... | |
456 |
456 |
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
|
457 |
457 |
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
|
458 |
458 |
corr_thres=0.5, ##<< Correlation threshold.
|
459 |
|
lod_thres=-100, ##<< LOD cut off.
|
|
459 |
lod_thres=100, ##<< LOD cut off.
|
460 |
460 |
config=NULL, ##<< GLOBAL config variable
|
461 |
461 |
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
|
462 |
462 |
) {
|
... | ... | |
544 |
544 |
lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2)
|
545 |
545 |
lod_scores = c(lod_scores, lod_score)
|
546 |
546 |
# Filtering on LOD Score
|
547 |
|
if (lod_score > lod_thres) {
|
|
547 |
if (lod_score < lod_thres) {
|
548 |
548 |
tmp_nuc = list()
|
549 |
549 |
# strain_ref1
|
550 |
550 |
tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
|
... | ... | |
1730 |
1730 |
}
|
1731 |
1731 |
}
|
1732 |
1732 |
|
|
1733 |
|
1733 |
1734 |
print(length(rois_3rd_round[,1]))
|
1734 |
1735 |
print(sum(rois_3rd_round$length))
|
1735 |
1736 |
rois = rois_3rd_round
|
... | ... | |
2061 |
2062 |
plot_anovas = FALSE, ##<< Plot (or not) scatter for each nuc.
|
2062 |
2063 |
plot_anova_boxes = FALSE, ##<< Plot (or not) boxplot for each nuc.
|
2063 |
2064 |
plot_wp_nucs_4_nonmnase = FALSE, ##<< Plot (or not) clusters for non inputs samples.
|
|
2065 |
plot_chain = FALSE, ##<< Plot (or not) clusterised nuceosomes between mnase samples.
|
2064 |
2066 |
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
|
2065 |
2067 |
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
|
2066 |
2068 |
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
|
... | ... | |
2196 |
2198 |
}
|
2197 |
2199 |
}
|
2198 |
2200 |
}
|
|
2201 |
|
2199 |
2202 |
# Plot wp nucs
|
2200 |
|
if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_common_nucs)) {
|
|
2203 |
if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_common_nucs | plot_chain)) {
|
2201 |
2204 |
if (samples[[1]]$marker == "Mnase_Seq") {
|
2202 |
2205 |
if (is.null(aggregated_intra_strain_nucs)) {
|
2203 |
2206 |
wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
|
|
2207 |
foo <<- wp_nucs
|
2204 |
2208 |
} else {
|
2205 |
2209 |
wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
|
2206 |
2210 |
}
|
2207 |
2211 |
} else {
|
2208 |
2212 |
wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
|
2209 |
2213 |
}
|
2210 |
|
replicates_wp_nucs[[replicate_rank]] = wp_nucs
|
2211 |
|
for (wp_nuc in wp_nucs) {
|
2212 |
|
if (wp_nuc$wp){
|
2213 |
|
rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
|
2214 |
|
all_original_reads = c()
|
2215 |
|
for(initial_nuc in wp_nuc$nucs) {
|
2216 |
|
all_original_reads = c(all_original_reads, initial_nuc$original_reads)
|
2217 |
|
}
|
2218 |
|
delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
|
2219 |
|
if (FALSE) {
|
2220 |
|
rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col="#EEEEEE", border=1)
|
2221 |
|
}
|
2222 |
|
if (plot_wp_nuc_model) {
|
2223 |
|
lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
|
2224 |
|
}
|
2225 |
|
}
|
2226 |
|
}
|
|
2214 |
|
|
2215 |
if (plot_chain) {
|
|
2216 |
tf_nucs = lapply(wp_nucs, function(nuc) {
|
|
2217 |
bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
|
|
2218 |
tmp_nuc = tmp_nuc[[1]]
|
|
2219 |
tmp_nuc$inputs = NULL
|
|
2220 |
tmp_nuc$original_reads = NULL
|
|
2221 |
tmp_nuc$wp = nuc$wp
|
|
2222 |
# print(tmp_nuc)
|
|
2223 |
return(tmp_nuc)
|
|
2224 |
})
|
|
2225 |
return(do.call(rbind, bar))
|
|
2226 |
})
|
|
2227 |
tf_nucs = data.frame(do.call(rbind, tf_nucs))
|
|
2228 |
|
|
2229 |
points((unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2 , y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank, cex=4, pch=16, col="white")
|
|
2230 |
points((unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2 , y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank, cex=4)
|
|
2231 |
text((unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2 , y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank, 1:nrow(tf_nucs))
|
|
2232 |
|
|
2233 |
xs = (1:nrow(tf_nucs) - 1) * (x_max_glo - x_min_glo) / (nrow(tf_nucs)) + lb
|
|
2234 |
points(xs, rep(0, nrow(tf_nucs)), cex=2, pch=16, col="white")
|
|
2235 |
points(xs, rep(0, nrow(tf_nucs)), cex=2, col = unlist(tf_nucs$wp)+2)
|
|
2236 |
text(xs, 0, 1:nrow(tf_nucs), cex=0.5)
|
|
2237 |
xs = (1:nrow(tf_nucs) - 1.5) * (x_max_glo - x_min_glo) / (nrow(tf_nucs)) + lb
|
|
2238 |
text(xs, 0, signif(unlist(tf_nucs$lod_score)), cex=0.5, col=(unlist(tf_nucs$lod_score) < 150) + 2)
|
|
2239 |
}
|
|
2240 |
|
|
2241 |
if (plot_wp_nucs | plot_common_nucs ) {
|
|
2242 |
replicates_wp_nucs[[replicate_rank]] = wp_nucs
|
|
2243 |
for (wp_nuc in wp_nucs) {
|
|
2244 |
if (wp_nuc$wp){
|
|
2245 |
rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
|
|
2246 |
all_original_reads = c()
|
|
2247 |
for(initial_nuc in wp_nuc$nucs) {
|
|
2248 |
all_original_reads = c(all_original_reads, initial_nuc$original_reads)
|
|
2249 |
}
|
|
2250 |
delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
|
|
2251 |
if (FALSE) {
|
|
2252 |
rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col="#EEEEEE", border=1)
|
|
2253 |
}
|
|
2254 |
if (plot_wp_nuc_model) {
|
|
2255 |
lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
|
|
2256 |
}
|
|
2257 |
}
|
|
2258 |
}
|
|
2259 |
}
|
2227 |
2260 |
}
|
2228 |
2261 |
}
|
2229 |
2262 |
|