/src/R/nucleominer.R - Diff - NucleoMiner - Forge du Centre Blaise Pascal

Révision 780f632a src/R/nucleominer.R

     marker,
     combi,
     form,
     cur_index
     cur_index,
     all_samples, ##<< A table that describe all our samples.
     config=NULL ##<< GLOBAL config variable.
     ) {
     	print(paste("Counting reads for ", form, " CUR ", cur_index, " in ", marker, " for [", combi[1], ",", combi[2], "].", sep=""))
     	nucs = list()
-...
         	manip = marker
+        	{
         		for (strain in combi) {
         			for(sample in all_samples[all_samples$marker == manip & all_samples$strain == strain, ]$id) {
         			for(sample_id in all_samples[all_samples$marker == manip & all_samples$strain == strain, ]$id) {
                 # tmp_filename = paste(config$ALIGN_DIR, "/TF/splited/sample_", sample, "_chr_", res[[paste("chr", strain, sep="_")]], "_splited_sample.tab.gz",sep="")
         				tmp_filename = paste(config$ALIGN_DIR, "/TF/sample_", sample, "_TF.txt", sep="")
         				tmp_unnorm_reads = mread.table(tmp_filename, stringsAsFactors=FALSE)
                 if ("tf_input" %in% names(all_samples)) {
                   sample_inputs_filename = all_samples$tf_input[all_samples$id==sample_id]
                 } else {
                   sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", sample_id, "_TF.txt", sep="")
+                }
         				tmp_unnorm_reads = mread.table(sample_inputs_filename, stringsAsFactors=FALSE)
                 names(tmp_unnorm_reads) = c("chr", "pos", "strand", "nb_reads")
         				orig_cur = list(name = "foo",
                                     begin = res[[paste("lower_bound", strain, sep="_")]],
-...
         														strain_ref = strain)
         				orig_cur$length = orig_cur$end - orig_cur$begin + 1
         				tmp_nuc_reads = filter_tf_inputs(tmp_unnorm_reads, orig_cur$chr, orig_cur$begin, orig_cur$end, orig_cur$length)
         				res[[paste(strain, manip, sample, sep="_")]] = sum(tmp_nuc_reads[,4])
         				res[[paste(strain, manip, sample_id, sep="_")]] = sum(tmp_nuc_reads[,4])
+        			}
+        		}
+        	}
-...
     combi, ##<< The combinations of strains that we want to build the count table.
     form, ##<< The nucleosome that we want to observe: "wp" for sel;l position and "unr" for UNR.
     curs, ##<< The list of CURs
     all_samples, ##<< A table that describe all our samples.
     config=NULL ##<< GLOBAL config variable.
     ) {
       dir.create(config$RESULTS_DIR, recursive=TRUE, showWarnings=FALSE)
     	all_res = apply(t(1:nrow(curs)), 2, function(cur_index) {
         res = count_reads_cur(marker=marker, combi=combi, form=form, cur_index=cur_index)
         res = count_reads_cur(marker=marker, combi=combi, form=form, cur_index=cur_index, all_samples, config)
     		return(res)
     	})
     	vec_names = names(all_res[[1]][[1]])
-...
     curs, ##<< The list of CURs
     config=NULL##<< GLOBAL config variable.
     ) {
       dir.create(config$RESULTS_DIR, recursive=TRUE, showWarnings=FALSE)
       ### build wp maps
     	glo_results = apply(t(1:1:nrow(curs)), 2, function(cur_index) {
       	dyad_shift = wp_llr = strain_maps = common_nuc_results = intra_llrs = inter_llrs = list()
-...
           sample$roi$genome = mread.fasta(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]])[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
+    		}
     		# Get inputs
     		sample$inputs = mread.table(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), stringsAsFactors=FALSE)
         if ("tf_input" %in% names(all_samples)) {
           sample_inputs_filename = all_samples$tf_input[all_samples$id==i]
         } else {
           sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
+        }
     		sample$inputs = mread.table(sample_inputs_filename, stringsAsFactors=FALSE)
     		sample$total_reads = sum(sample$inputs[,4])
     		if (!only_fetch) {
     		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
+    	  }
     	  # Get TF outputs for Mnase_Seq samples
     		if (sample$marker == "Mnase_Seq" & get_ouputs) {
     			sample$outputs = mread.table(paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep=""), header=TRUE, sep="\t")
           if ("tf_output" %in% names(all_samples)) {
             sample_outputs_filename = all_samples$tf_output[all_samples$id==i]
           } else {
             sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
+          }
           sample$outputs = mread.table(sample_outputs_filename, header=TRUE, sep="\t")
     			if (!only_fetch) {
     	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
+      		}
-...
       legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
+    }
     analyse_design = function(# Launch DESeq methods.
     ### This function is based on DESeq example. It normalizes data, fit data to GLM model with and without interaction term and compares the two models.
     snep_design, ##<< The design to consider.
     reads ##<< The data to consider.
     ) {
     	snep_count_table = reads[, rownames(snep_design)]
     	cdsFull = newCountDataSet(snep_count_table, snep_design)
     	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
     	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
     	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
     	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
     	pvalsGLM = nbinomGLMTest( fit1, fit0 )
     	return(list(fit1, fit0, snep_design, pvalsGLM))
+    }
     get_sneps = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
     analyse_count_table = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
     ### This function uses
     marker, ##<< The marker involved.
     combi, ##<< The strain combination involved.
-...
     ) {
       # PRETREAT
       snep_design = get_design(marker, combi, all_samples)
       reads = get_all_reads(marker, combi, form, config=config)
       # RUN ANALYSE
       tmp_analyse = analyse_design(snep_design, reads)
       # RESULTS
     	fit1 = tmp_analyse[[1]]
     	fit0 = tmp_analyse[[2]]
       mnase_design = snep_design[snep_design$manip == "Mnase_Seq", ]
       snep_reads = get_all_reads(marker, combi, form, config=config)
     	snep_count_table = snep_reads[, rownames(snep_design)]
       mnase_count_table = snep_reads[, rownames(mnase_design)]
       # RUN ANALYSE FOPR SNEPS
     	cdsFull = newCountDataSet(snep_count_table, snep_design)
     	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
     	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
     	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
     	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
     	pvalsGLM = nbinomGLMTest( fit1, fit0 )
       # RUN ANALYSE FOR MNASE
       mnase_cdsFull = newCountDataSet(mnase_count_table, mnase_design$strain)
       mnase_cdsFull1 = estimateDispersions(estimateSizeFactors(mnase_cdsFull), fitType="local", method="pooled", sharingMode="maximum")
       res_mnase = nbinomTest( mnase_cdsFull1, combi[1], combi[2])
       # GLOBAL RESULTS
       #   SNEPS
       k = names(fit1)
       reads[[k[2]]] = signif(fit1[[k[2]]], 5)
       reads[[k[3]]] = signif(fit1[[k[3]]], 5)
       reads[[k[4]]] = signif(fit1[[k[4]]], 5)
     	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
     	snep_design = tmp_analyse[[3]]
       snep_reads[[k[2]]] = signif(fit1[[k[2]]], 5)
       snep_reads[[k[3]]] = signif(fit1[[k[3]]], 5)
       snep_reads[[k[4]]] = signif(fit1[[k[4]]], 5)
     	snep_reads$pvalsGLM = signif(pvalsGLM, 5)
       # print(snep_design)
     	thres = FDR(reads$pvalsGLM, FDR)
     	reads$snep_index = reads$pvalsGLM < thres
     	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", FDR*100,"% of FDR.", sep = ""))
       return(reads)
     	thres = FDR(snep_reads$pvalsGLM, FDR)
     	snep_reads$snep_index = snep_reads$pvalsGLM < thres
       print(paste(sum(snep_reads$snep_index), " SNEPs found for ", length(snep_reads[,1])," nucs and ", FDR*100,"% of FDR.", sep = ""))
       #   MNASE
       snep_reads[["mnase_l2fc"]] = signif(res_mnase$log2FoldChange, 5)
       snep_reads[["mnase_l2fc_pval"]] = signif(res_mnase$pval, 5)
       # write results
     	snep_filename = paste(config$RESULTS_DIR, "/" ,combi[1],"_",combi[2],"_",marker,"_", form, "_snep.tab",sep="")
     	write.table(snep_reads, file=snep_filename, row.names=FALSE, quote=FALSE)
       return(snep_reads)
       },  ex=function(){
         marker = "H3K4me1"
         combi = c("BY", "YJM")
         form = "wpunr" # "wp" | "unr" | "wpunr"
         # foo = get_sneps(marker, combi, form)
         # foo = get_sneps("H4K12ac", c("BY", "RM"), "wp")
         # foo = analyse_count_table(marker, combi, form)
         # foo = analyse_count_table("H4K12ac", c("BY", "RM"), "wp")
     })
-...
     })
     compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
     compute_curs = function (# Compute Common Uninterrupted Regions (CUR)
     ### CURs are regions that can be aligned between the genomes
     diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
     min_cur_width = 4000, ##<< The minimum width of a CUR
     combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM")), ##<< list of strain than will be tested as uninterrupted regions
     config = NULL ##<< GLOBAL config variable
     ) {
       dir.create(config$RESULTS_DIR, recursive=TRUE, showWarnings=FALSE)
       check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
         c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
         check_homogeneity(c2c)
-...
           })
           return(do.call(rbind, rois))
+        }
         # foo_orig = compute_inter_all_strain_curs2(config=config)
         # foo_orig = compute_curs2(config=config)
         # foo = foo_orig
         STOP = FALSE
         nb_round = 0
-...
+      }
       if (length(combis)==1) {
         return(rois[[1]])
         curs = rois[[1]]
       } else {
         reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
         reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
-...
         reducted_rois = translate_curs(reducted_2_rois, "BY", config)
         reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
         squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
         return(squeezed_rois)
         curs = squeezed_rois
+      }
       cur_filename = paste(config$RESULTS_DIR, "/all_curs.tab", sep="")
       write.table(curs, file=cur_filename, row.names=FALSE, quote=FALSE)
       return(curs)
+    }
     intersect_region = function(# Returns the intersection of 2 list on regions.
-...
       			sample$roi$genome = mread.fasta(fasta_ref_filename, )[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
+      		}
       		# Get inputs
           if ("tf_input" %in% names(all_samples)) {
             sample_inputs_filename = all_samples$tf_input[all_samples$id==i]
           } else {
             sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
+          }
       		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
       		sample$inputs = mread.table(sample_inputs_filename, stringsAsFactors=FALSE)
       		sample$total_reads = sum(sample$inputs[,4])
-...
+      	  }
       	  # Get TF outputs for Mnase_Seq samples
       		if (sample$marker == "Mnase_Seq" & get_ouputs) {
       			sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
             if ("tf_output" %in% names(all_samples)) {
               sample_outputs_filename = all_samples$tf_output[all_samples$id==i]
             } else {
               sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")
+            }
       			sample$outputs = mread.table(sample_outputs_filename, header=TRUE, sep="\t")
       			if (!only_fetch) {
       	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)

Formats disponibles : Unified diff

LBMC » NucleoMiner

Révision 780f632a src/R/nucleominer.R