Révision 729c934e src/R/nucleominer.R

b/src/R/nucleominer.R
124 124
  llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
125 125
  llY = sum(log(dnorm(y,mean=meanY,sd=sdY)))
126 126
  llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY)))
127
  ratio = -(llX + llY - llXY)
127
  ratio = llX + llY - llXY
128 128
  return(ratio)
129 129
### Returns the likelihood ratio.
130 130
}, ex=function(){
......
267 267
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes. 
268 268
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
269 269
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
270
lod_thres=-20, ##<< Log likelihood ration threshold.
270
lod_thres=20, ##<< Log likelihood ration threshold.
271 271
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
272 272
){
273 273
	end_of_tracks = function(tracks) {
......
311 311
  indexes = c()
312 312
  track_readers = c()
313 313
  current_nuc = NULL
314
	lod_score = lod_thres - 1 
314
	lod_score = lod_thres + 1 
315 315
  # Read nucs from TF outputs  
316 316
  tf_outs = list()
317 317
	i = 1
......
361 361
		}
362 362
		# print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
363 363
		if (is.na(lod_score)) {
364
			lod_score = lod_thres - 1 
364
			lod_score = lod_thres + 1 
365 365
		}
366
	  if (lod_score > lod_thres) {
367
			# Store lod_score
368
			new_nuc$lod_score = lod_score 			
366
		# Store lod_score
367
		new_nuc$lod_score = lod_score 			
368
	  if (lod_score < lod_thres) {
369 369
      # aggregate to current cluster
370 370
      #   update bound
371 371
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
......
456 456
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
457 457
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
458 458
corr_thres=0.5, ##<< Correlation threshold.
459
lod_thres=-100, ##<< LOD cut off.
459
lod_thres=100, ##<< LOD cut off.
460 460
config=NULL, ##<< GLOBAL config variable
461 461
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.	
462 462
) {
......
544 544
								lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2)
545 545
								lod_scores = c(lod_scores, lod_score)
546 546
								# Filtering on LOD Score						
547
								if (lod_score > lod_thres) {
547
								if (lod_score < lod_thres) {
548 548
									tmp_nuc = list()
549 549
									# strain_ref1
550 550
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
......
1730 1730
		}
1731 1731
	}
1732 1732

  
1733

  
1733 1734
	print(length(rois_3rd_round[,1]))
1734 1735
	print(sum(rois_3rd_round$length))
1735 1736
	rois = rois_3rd_round
......
2061 2062
plot_anovas = FALSE,  ##<< Plot (or not) scatter for each nuc.
2062 2063
plot_anova_boxes =  FALSE,  ##<< Plot (or not) boxplot for each nuc.
2063 2064
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
2065
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
2064 2066
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed. 
2065 2067
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
2066 2068
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
......
2196 2198
		    }
2197 2199
			}
2198 2200
	  }
2201
    
2199 2202
	  # Plot wp nucs
2200
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_common_nucs)) {
2203
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_common_nucs | plot_chain)) {
2201 2204
			if (samples[[1]]$marker == "Mnase_Seq") {
2202 2205
				if (is.null(aggregated_intra_strain_nucs)) {
2203 2206
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]			
2207
          foo <<- wp_nucs
2204 2208
				} else {
2205 2209
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
2206 2210
				}
2207 2211
		  } else {
2208 2212
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
2209 2213
		  }
2210
			replicates_wp_nucs[[replicate_rank]] = wp_nucs
2211
			for (wp_nuc in wp_nucs) {
2212
				if (wp_nuc$wp){
2213
					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2214
					all_original_reads = c()
2215
					for(initial_nuc in wp_nuc$nucs) {
2216
						all_original_reads = c(all_original_reads, initial_nuc$original_reads)						
2217
					}
2218
					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2219
					if (FALSE) {
2220
					  rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col="#EEEEEE", border=1)
2221
				  }
2222
					if (plot_wp_nuc_model) {
2223
					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)				
2224
				  }
2225
				}
2226
			}
2214
      
2215
      if (plot_chain) {
2216
        tf_nucs = lapply(wp_nucs, function(nuc) {      
2217
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
2218
            tmp_nuc = tmp_nuc[[1]]
2219
            tmp_nuc$inputs = NULL
2220
            tmp_nuc$original_reads = NULL
2221
            tmp_nuc$wp = nuc$wp
2222
            # print(tmp_nuc)
2223
            return(tmp_nuc)
2224
            }) 
2225
            return(do.call(rbind, bar))
2226
          })
2227
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
2228

  
2229
        points((unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2 , y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank, cex=4, pch=16, col="white")
2230
        points((unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2 , y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank, cex=4)
2231
        text((unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2 , y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank, 1:nrow(tf_nucs))
2232

  
2233
        xs = (1:nrow(tf_nucs) - 1) * (x_max_glo - x_min_glo) / (nrow(tf_nucs)) + lb
2234
        points(xs, rep(0, nrow(tf_nucs)), cex=2, pch=16, col="white")
2235
        points(xs, rep(0, nrow(tf_nucs)), cex=2, col = unlist(tf_nucs$wp)+2)
2236
        text(xs, 0, 1:nrow(tf_nucs), cex=0.5)
2237
        xs = (1:nrow(tf_nucs) - 1.5) * (x_max_glo - x_min_glo) / (nrow(tf_nucs)) + lb
2238
        text(xs, 0, signif(unlist(tf_nucs$lod_score)), cex=0.5, col=(unlist(tf_nucs$lod_score) < 150) + 2)  
2239
      }
2240
  
2241
      if (plot_wp_nucs | plot_common_nucs ) {
2242
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
2243
  			for (wp_nuc in wp_nucs) {
2244
  				if (wp_nuc$wp){
2245
  					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2246
  					all_original_reads = c()
2247
  					for(initial_nuc in wp_nuc$nucs) {
2248
  						all_original_reads = c(all_original_reads, initial_nuc$original_reads)						
2249
  					}
2250
  					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2251
  					if (FALSE) {
2252
  					  rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col="#EEEEEE", border=1)
2253
  				  }
2254
  					if (plot_wp_nuc_model) {
2255
  					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)				
2256
  				  }
2257
  				}
2258
  			}
2259
      }
2227 2260
		}
2228 2261
	}	
2229 2262
  

Formats disponibles : Unified diff