/src/R/nucleominer.R - Diff - NucleoMiner - Forge du Centre Blaise Pascal

Révision 7e2d37e1 src/R/nucleominer.R

     lod_score_vecs = structure(function # Likelihood ratio
     ### Compute the likelihood log of two set of value from two models Vs. a unique model.
     llr_score_nvecs = structure(function # Likelihood ratio
     ### Compute the log likelihood ratio of two or more set of value.
+    (
     x ,##<< First vector.
     y ##<< Second vector.
       xs ##<< list of vectors.
     ) {
     	if (length(x) <=1 | length(y) <= 1) {
     		return(NA)
+    	}
       meanX = mean(x)
       sdX = sd(x)
       meanY = mean(y)
       sdY = sd(y)
       meanXY = mean(c(x,y))
       sdXY = sd(c(x,y))
       llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
       llY = sum(log(dnorm(y,mean=meanY,sd=sdY)))
       llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY)))
       ratio = llX + llY - llXY
       l = length(xs)
       if (l < 1) {
         return(NA)
+      }
       if (l == 1) {
         return(1)
+      }
       sumllX = 0
       for (i in 1:l) {
         x = xs[[i]]
       	if (length(x) <= 1) {
       		return(NA)
+      	}
         meanX = mean(x)
         sdX = sd(x)
         llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
         sumllX = sumllX + llX
+      }
       meanXglo = mean(unlist(xs))
       sdXglo = sd(unlist(xs))
       llXYZ = sum(log(dnorm(unlist(xs),mean=meanXglo,sd=sdXglo)))
       ratio = sumllX - llXYZ
       return(ratio)
     ### Returns the likelihood ratio.
     ### Returns the log likelihood ratio.
     }, ex=function(){
       # LOD score for 2 set of values
       mean1=5; sd1=2; card2 = 250
-...
       lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
       lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
       lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
       lod_score_vecs(x1,x2)
       llr_score_nvecs(list(x1,x2))
      })
     dfadd = structure(function# Adding list to a dataframe.
-...
+      }
       tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
       tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
       tf_outputs = tf_outputs[tf_outputs$correlation >= corr_thres,]
       tf_outputs = tf_outputs[order(tf_outputs$correlation,decreasing=TRUE),]
       tf_outputs = tf_outputs[tf_outputs$correlation.score >= corr_thres,]
       tf_outputs = tf_outputs[order(tf_outputs$correlation.score, decreasing=TRUE),]
       i = 1
       while (i <= length(tf_outputs[,1])) {
         lb = tf_outputs[i,]$low
         ub = tf_outputs[i,]$up
         tf_outputs = tf_outputs[!(tf_outputs$low <= (ub-ol_bp) & tf_outputs$up > ub) & !(tf_outputs$up >= (lb+ol_bp) & tf_outputs$low < lb),]
         lb = tf_outputs[i,]$lower_bound
         ub = tf_outputs[i,]$upper_bound
         tf_outputs = tf_outputs[!(tf_outputs$lower_bound <= (ub-ol_bp) & tf_outputs$upper_bound > ub) & !(tf_outputs$upper_bound >= (lb+ol_bp) & tf_outputs$lower_bound < lb),]
         i = i+1
+      }
       return(tf_outputs)
-...
     aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
     ### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
     ### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{llr_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
     samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
     lod_thres=20, ##<< Log likelihood ration threshold.
     llr_thres=20, ##<< Log likelihood ration threshold.
     coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
     ){
     	end_of_tracks = function(tracks) {
-...
     		return(clusters)
+    	}
     	strain = samples[[1]]$strain
     	lod_scores = c()
     	llr_scores = c()
       min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
     	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
       # compute clusters
-...
       indexes = c()
       track_readers = c()
       current_nuc = NULL
     	lod_score = lod_thres + 1
     	llr_score = llr_thres + 1
       # Read nucs from TF outputs
       tf_outs = list()
     	i = 1
-...
         new_upper_bound = new_nuc$upper_bound
         if (!is.null(current_nuc)) {
     			lod_score = lod_score_vecs(current_nuc$original_reads,new_nuc$original_reads)
     			lod_scores = c(lod_scores,lod_score)
     			llr_score = llr_score_nvecs(list(current_nuc$original_reads,new_nuc$original_reads))
     			llr_scores = c(llr_scores,llr_score)
+    		}
     		# print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
     		if (is.na(lod_score)) {
     			lod_score = lod_thres + 1
     		# print(paste(llr_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
     		if (is.na(llr_score)) {
     			llr_score = llr_thres + 1
+    		}
     		# Store lod_score
     		new_nuc$lod_score = lod_score
     	  if (lod_score < lod_thres) {
     		# Store llr_score
     		new_nuc$llr_score = llr_score
     	  if (llr_score < llr_thres) {
           # aggregate to current cluster
           #   update bound
           if (new_nuc$upper_bound > new_cluster$upper_bound) {
-...
         # store old cluster
         clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
+      }
     	return(list(clusters, lod_scores))
     ### Returns a list of clusterized nucleosomes, and all computed lod scores.
     	return(list(clusters, llr_scores))
     ### Returns a list of clusterized nucleosomes, and all computed llr scores.
     }, ex=function(){
     	# Dealing with a region of interest
     	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
-...
     			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
     			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
     			if (tmp_nuc$wp) {
     				tmp_nuc_as_list[["lod_1"]] = signif(tmp_nuc$nucs[[2]]$lod_score,5)
     				tmp_nuc_as_list[["lod_2"]] = signif(tmp_nuc$nucs[[3]]$lod_score,5)
     				tmp_nuc_as_list[["llr_1"]] = signif(tmp_nuc$nucs[[2]]$llr_score,5)
     				tmp_nuc_as_list[["llr_2"]] = signif(tmp_nuc$nucs[[3]]$llr_score,5)
     			} else {
     				tmp_nuc_as_list[["lod_1"]] = NA
     				tmp_nuc_as_list[["lod_2"]] = NA
     				tmp_nuc_as_list[["llr_1"]] = NA
     				tmp_nuc_as_list[["llr_2"]] = NA
+    			}
           return(tmp_nuc_as_list)
         })
-...
     wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
     wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
     corr_thres=0.5, ##<< Correlation threshold.
     lod_thres=100, ##<< LOD cut off.
     llr_thres=100, ##<< LOD cut off.
     config=NULL, ##<< GLOBAL config variable
     ... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
     ) {
-...
     		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
+    	}
     	common_nuc = NULL
     	lod_scores = c()
     	llr_scores = c()
     	chr = replicates[[1]][[1]]$roi$chr
       min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
     	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
-...
     								# tranlation of reads into strain 2 coords
     								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
     								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
     								lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2)
     								lod_scores = c(lod_scores, lod_score)
     								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
     								llr_scores = c(llr_scores, llr_score)
     								# Filtering on LOD Score
     								if (lod_score < lod_thres) {
     								if (llr_score < llr_thres) {
     									tmp_nuc = list()
     									# strain_ref1
     									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
-...
     									# tmp_nuc[[paste("corr2_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[2]]$corr,5)
     									# tmp_nuc[[paste("corr3_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[3]]$corr,5)
     									# common
     									tmp_nuc[["lod_score"]] = signif(lod_score,5)
     									tmp_nuc[["llr_score"]] = signif(llr_score,5)
     									# print(tmp_nuc)
     									common_nuc = dfadd(common_nuc, tmp_nuc)
+    								}
-...
     			common_nuc = common_nuc[-to_remove_list,]
+    		}
     		return(list(common_nuc, lod_scores))
     		return(list(common_nuc, llr_scores))
     	} else {
     		print("WARNING, no nucs for strain_ref1.")
     		return(NULL)
+    	}
     ### Returns a list of clusterized nucleosomes, and all computed lod scores.
     ### Returns a list of clusterized nucleosomes, and all computed llr scores.
     }, ex=function(){
         # Define new translate_cur function...
-...
             tmp_x_next = tmp_x[-1]
             need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
             tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
             tmp_lod_inter =signif(unlist(tf_nucs$lod_score)[-1], 2)
             tmp_llr_inter =signif(unlist(tf_nucs$llr_score)[-1], 2)
             new_tmp_x = c()
             new_tmp_y = c()
             index_odd = 1:length(tmp_x) * 2 - 1
-...
             } else {
               pos = config$LEGEND_LOD_POS
+            }
             col_lod = sapply(tmp_lod_inter, function(lod){if (lod < 20 ) return("green") else return("red")})
             text(tmp_x_inter, tmp_y_inter, tmp_lod_inter, cex=1.5, pos=pos, col=col_lod)
             col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
             text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr)
+          }

Formats disponibles : Unified diff

LBMC » NucleoMiner

Révision 7e2d37e1 src/R/nucleominer.R