/src/R/nucleominer.R - Diff - NucleoMiner - Forge du Centre Blaise Pascal

Révision e5603c3f src/R/nucleominer.R

       return(ratio)
     ### Returns the log likelihood ratio.
     }, ex=function(){
       # LOD score for 2 set of values
       # LLR score for 2 set of values
       mean1=5; sd1=2; card2 = 250
       mean2=6; sd2=3; card1 = 200
       x1 = rnorm(card1, mean1, sd1)
-...
     aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
     ### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{llr_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
     ### This function aggregates nucleosomes from replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center (dyad). A chain of nucleosomes is builts across all replicates. Adjacent nucleosomes of the chain are compared two by two. Comparison is based on a log likelihood ratio (LLR1). depending on the LLR1 value nucleosomes are merged (low LLR) or separated (high LLR). Finally the function returns a list of clusters and all computed llr_scores. Each cluster ows an attribute wp for “well positioned”. This attribute is set to TRUE if the cluster is composed of exactly one nucleosome of each sample.
     samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
     llr_thres=20, ##<< Log likelihood ration threshold.
     llr_thres=20, ##<< Log likelihood ratio threshold to decide between merging and separating
     coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
     ){
     	end_of_tracks = function(tracks) {
-...
+    }
     align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
     ### This function aligns nucs between two strains for a given genome region.
     ### This function aligns nucleosomes between two strains for a given genome region.
     replicates, ##<< Set of replicates, ideally 3 per strain.
     wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
     wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
     wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's NULL this list will be computed.
     wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's NULL this list will be computed.
     corr_thres=0.5, ##<< Correlation threshold.
     llr_thres=100, ##<< LOD cut off.
     llr_thres=100, ##<< Log likelihood ratio threshold to decide between merging and separating
     config=NULL, ##<< GLOBAL config variable
     ... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
     ) {
-...
     								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
     								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
     								llr_scores = c(llr_scores, llr_score)
     								# Filtering on LOD Score
     								# Filtering on LLR Score
                     if (llr_score < llr_thres) {
     									tmp_nuc = list()
     									# strain_ref1
-...
       return(non_inter_fuzzy)
+    }
     union_regions = function(# Aggregate regions that intersect themnselves.
     union_regions = function(# Aggregate regions that intersect themselves.
     ### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
     regions ##<< The Regions to be aggregated
     ) {
-...
     # }
     translate_regions = function(# Translate a list of regions from a strain ref to another.
     ### This function is an eloborated call to translate_cur.
     ### This function is an elaborated call to translate_cur.
     regions, ##<< Regions to be translated.
     combi, ##<< Combination of strains.
     cur_index, ##<< The region of interest index.
-...
       return(all_reads)
+    }
     get_design = function(# Build the design for deseq
     get_design = function(# Build the design for DESeq
     ### This function build the design according sample properties.
     marker, ##<< The marker to considere.
     combi, ##<< The starin combination to considere.
-...
+    }
     plot_dist_samples = function(# Plot the distribution of reads.
     ### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
     ### This fuxntion use the DESeq nomalization feature to compare qualitatively the distribution.
     strain, ##<< The strain to considere.
     marker, ##<< The marker to considere.
     res, ##<< Data
-...
       legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
+    }
     analyse_design = function(# Launch deseq methods.
     ### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
     snep_design, ##<< The design to considere.
     reads ##<< The data to considere.
     analyse_design = function(# Launch DESeq methods.
     ### This function is based on DESeq example. It normalizes data, fit data to GLM model with and without interaction term and compares the two models.
     snep_design, ##<< The design to consider.
     reads ##<< The data to consider.
     ) {
     	snep_count_table = reads[, rownames(snep_design)]
     	cdsFull = newCountDataSet(snep_count_table, snep_design)
-...
     combi, ##<< The strain combination involved.
     form, ##<< the nuc form involved.
     all_samples, ##<< Global list of samples.
     FDR = 0.0001, ## the specific False Discover Rate
     config=NULL ##<< GLOBAL config variable
     ) {
       # PRETREAT
-...
     	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
     	snep_design = tmp_analyse[[3]]
       # print(snep_design)
     	fdr = 0.0001
     	thres = FDR(reads$pvalsGLM, fdr)
     	thres = FDR(reads$pvalsGLM, FDR)
     	reads$snep_index = reads$pvalsGLM < thres
     	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = ""))
       return(reads)
-...
     ROM2ARAB = function(# Roman to Arabic pair list.
     ### Util to convert Roman to Arabic
     ### Utility to convert Roman numbers into Arabic numbers
     ){list(
       "I" = 1,
       "II" = 2,
-...
     })
     ARAB2ROM = function(# Arabic to Roman pair list.
     ### Util to convert Arabicto Roman
     ### Utility to convert Arabic numbers to Roman numbers
     ){switch_pairlist(ROM2ARAB())}
     c2c_extraction = function(# Extract a sub part of the corresponding c2c file
     ### This fonction allow to acces to a specific part of the c2c file.
     ### This fonction allows to access to a specific part of the c2c file.
     strain1, ##<< the key strain
     strain2, ##<< the target strain
     chr=NULL, ##<< if defined, the c2c will filtered according to the chromosome value
     lower_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome upper than lower_bound
     upper_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome lower than upper_bound
     chr=NULL, ##<< if defined, the c2c will be filtered according to the chromosome value
     lower_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome upper than lower_bound
     upper_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome lower than upper_bound
     config=NULL##<<  GLOBAL config variable
     ) {
       reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
-...
+    }
     build_replicates = structure(function(# Stage replicates data
     ### This function loads in memory data corresponding to the given experiments.
     expe, ##<< a list of vector corresponding to vector of replicates.
     ### This function loads in memory the data corresponding to the given experiments.
     expe, ##<< a list of vectors corresponding to replicates.
     roi, ##<< the region that we are interested in.
     only_fetch=FALSE, ##<< filter or not inputs.
     get_genome=FALSE,##<< Load or not corresponding genome.
-...
         # library(nucleominer)
+        #
         # # Read config file
         # json_conf_file = "nucleo_miner_config.json"
         # json_conf_file = "nucleominer_config.json"
         # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
         # # Read sample file
         # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
-...
             tmp_track_prev = tmp_track[-length(tmp_track)]
             tmp_track_next = tmp_track[-1]
             # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
             if (is.null(config$TRACK_LOD_OFFSET)) {
               config$TRACK_LOD_OFFSET = 0
             if (is.null(config$TRACK_LLR_OFFSET)) {
               config$TRACK_LLR_OFFSET = 0
+            }
             tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LOD_OFFSET * 25
             tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LLR_OFFSET * 25
             tmp_x_prev = tmp_x[-length(tmp_x)]
             tmp_x_next = tmp_x[-1]
             need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
-...
             points(tmp_x, tmp_y, cex=4, pch=16, col="white")
             points(tmp_x, tmp_y, cex=4, lwd=2)
             text(tmp_x, tmp_y, 1:nrow(tf_nucs))
             if (is.null(config$LEGEND_LOD_POS)) {
             if (is.null(config$LEGEND_LLR_POS)) {
               pos = 2
             } else {
               pos = config$LEGEND_LOD_POS
               pos = config$LEGEND_LLR_POS
+            }
             col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
             text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr)

Formats disponibles : Unified diff

LBMC » NucleoMiner

Révision e5603c3f src/R/nucleominer.R