/src/R/nucleominer.R - Diff - NucleoMiner - Forge du Centre Blaise Pascal

Révision 1d833b97 src/R/nucleominer.R

     wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
     corr_thres=0.5, ##<< Correlation threshold.
     lod_thres=-100, ##<< LOD cut off.
     config=NULL, ##<< GLOBAL config variable
     ... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
     ) {
     	if (length(replicates) < 2) {
-...
     	if (is.null(wp_nucs_strain_ref2)) {
     	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
+      }
       # foo <<- wp_nucs_strain_ref1
       # print(apply(t(wp_nucs_strain_ref1), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
       # print(apply(t(wp_nucs_strain_ref2), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
     	# dealing with matching_nas
     	lws = c()
     	ups = c()
-...
     			# Filtering on Well Positionned
     			if (nuc_strain_ref1$wp) {
     				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
             # print(roi_strain_ref1)
     				roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi)
     				if (!is.null(roi_strain_ref2)){
     				roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi, config)
             if (!is.null(roi_strain_ref2)){
     					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
     					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
     					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
-...
+    	}
     ### Returns a list of clusterized nucleosomes, and all computed lod scores.
     }, ex=function(){
         # Define new translate_roi function...
         translate_roi = function(roi, strain2, big_roi=NULL, config=NULL) {
           return(roi)
+        }
         # Binding it by uncomment follwing lines.
         unlockBinding("translate_roi", as.environment("package:nucleominer"))
         unlockBinding("translate_roi", getNamespace("nucleominer"))
         assign("translate_roi", translate_roi, "package:nucleominer")
         assign("translate_roi", translate_roi, getNamespace("nucleominer"))
         lockBinding("translate_roi", getNamespace("nucleominer"))
         lockBinding("translate_roi", as.environment("package:nucleominer"))
     	# Dealing with a region of interest
     	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
     	roi2 = translate_roi(roi, roi$strain_ref1)
-...
     	print(align_inter_strain_nucs(replicates))
     })
     translate_roi = structure(function(# Translate coords of a genome region.
     ### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
     roi, ##<< Original genome region of interest.
     strain2, ##<< The strain in wich you want the genome region of interest.
     big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
     ) {
     ### This function translate a genome region of interest from a strain coord system to an other. This is a minimal fucntion that will be called by \emph{align_inter_strain_nucs} and its exemnple, you need to overwrite it by your own fucntion.
     	strain1 = roi$strain_ref
     	if (strain1 == strain2 | strain2 == "strain_ex2") {
     		return(roi)
     	} else {
     	  stop("ERROR, you need to overwrite your own function to convert convert strain coords. To binf the new function, have a
     		look in the translate_roi example.")
+    	}
     ### The translated genome region of interest.
     }, ex=function(){
     	# Define new translate_roi function...
     	translate_roi = function(roi, strain2) {
     		strain1 = roi$strain_ref
     		if (strain1 == strain2) {
     			return(roi)
     		} else {
     		  stop("Here is my new translate_roi function...")
+    		}
+    	}
     	# Binding it by uncomment follwing lines.
     	# unlockBinding("translate_roi", as.environment("package:nm"))
     	# unlockBinding("translate_roi", getNamespace("nm"))
     	# assign("translate_roi", translate_roi, "package:nm")
     	# assign("translate_roi", translate_roi, getNamespace("nm"))
     	# lockBinding("translate_roi", getNamespace("nm"))
     	# lockBinding("translate_roi", as.environment("package:nm"))
     })
-...
     strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
     roi, ##<< Region of interest.
     all_samples, ##<< Global list of samples.
     config, ##<< GLOBAL config variable
     config=NULL, ##<< GLOBAL config variable
     only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
     get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
     get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
-...
       samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
     	for (i in samples_ids) {
     		sample = as.list(all_samples[all_samples$id==i,])
         sample$roi = translate_roi(roi, sample$strain)
         sample$roi = translate_roi(roi, sample$strain, config)
     		if (get_genome) {
     			# Get Genome
           spl = function(l) {
           	ret = list()
           	for (name in names(l)) {
           		ret[[as.character(l[[name]])]] = name
+          	}
           	ret
+          }
           sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[spl(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
           sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
+    		}
     		# Get inputs
     		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
-...
     regions, ##<< Regions to be translated.
     combi, ##<< Combination of strains.
     roi_index, ##<< The region of interest index.
     config=NULL, ##<< GLOBAL config variable
     roi ##<< The region of interest.
     ) {
       tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
         tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
         big_roi =  translate_roi(roi, tmp_regions_ref2$strain_ref)
         big_roi =  translate_roi(roi, tmp_regions_ref2$strain_ref, config)
         tmp_min = min(big_roi$begin, big_roi$end)
         tmp_max = max(big_roi$begin, big_roi$end)
         big_roi$begin = tmp_min
         big_roi$end = tmp_max
         trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], big_roi)
         trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], config, big_roi)
         data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), roi_index=roi_index))
         })
       tr_regions = do.call("rbind", tr_regions)
-...
     ### The fucntion is no more necessary since we remove "big_roi" bug in translate_roi function.
     tmp_fuzzy_nucs, ##<< the regiuons to be croped.
     roi, ##<< The region of interest.
     strain ##<< The strain to consider.
     strain, ##<< The strain to consider.
     config=NULL ##<< GLOBAL config variable
     ) {
       tr_roi = translate_roi(roi, strain)
       tr_roi = translate_roi(roi, strain, config)
       tr_roi_begin = min(tr_roi$begin, tr_roi$end)
       tr_roi_end = max(tr_roi$begin, tr_roi$end)
       if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
-...
     roi, ##<< The region of interest.
     roi_index, ##<< The region of interest index.
     strain_maps, ##<< Nuc maps.
     common_nuc_results ##<< Common wp nuc maps
     common_nuc_results, ##<< Common wp nuc maps
     config=NULL ##<< GLOBAL config variable
     ) {
       print(roi_index)
       PLOT = FALSE
-...
       print(paste("Dealing with fuzzy from", combi[1]))
       tmp_fuzzy_nucs_1 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[1])
       tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1])
       tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1], config)
       if (length(tmp_fuzzy_nucs_1[,1]) == 0) {return(NULL)}
       agg_fuzzy_1 = union_regions(tmp_fuzzy_nucs_1)
       if (PLOT) for (i in 1:length(agg_fuzzy_1[,1])) {
-...
       tmp_fuzzy_nucs_2 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[2])
       if (length(tmp_fuzzy_nucs_2[,1]) == 0) {return(NULL)}
       agg_fuzzy_2 = union_regions(tmp_fuzzy_nucs_2)
       agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2])
       agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2], config)
       tr_agg_fuzzy_2 = translate_regions(agg_fuzzy_2, combi, roi_index, roi)
       tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2])
       tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2], config)
       # tr_agg_fuzzy_2 = union_regions(tr_agg_fuzzy_2)
       if (PLOT) for (i in 1:length(tr_agg_fuzzy_2[,1])) {
         lines(c(tr_agg_fuzzy_2[i,]$lower_bound, tr_agg_fuzzy_2[i,]$upper_bound), c(+3.3,+3.3), col=2)
-...
     ROM2ARAB = function(# Roman to Arabic pair list.
     ### Util to convert Roman to Arabic
     ){list(
       "I" = 1,
       "II" = 2,
       "III" = 3,
       "IV" = 4,
       "V" = 5,
       "VI" = 6,
       "VII" = 7,
       "VIII" = 8,
       "IX" = 9,
       "X" = 10,
       "XI" = 11,
       "XII" = 12,
       "XIII" = 13,
       "XIV" = 14,
       "XV" = 15,
       "XVI" = 16,
       "XVII" = 17,
       "XVIII" = 18,
       "XIX" = 19,
       "XX" = 20
     )}
     switch_pairlist = structure(function(# Switch a pairlist
     ### Take a pairlist key:value and return the switched pairlist value:key.
     l ##<< The pairlist to switch.
     ) {
     	ret = list()
     	for (name in names(l)) {
     		ret[[as.character(l[[name]])]] = name
+    	}
     	ret
     ### The switched pairlist.
     }, ex=function(){
     	l = list(key1 = "value1", key2 = "value2")
     	print(switch_pairlist(l))
     })
     ARAB2ROM = function(# Arabic to Roman pair list.
     ### Util to convert Arabicto Roman
     ){switch_pairlist(ROM2ARAB())}
     # translate_roi = structure(function(# Translate coords of a genome region.
     # ### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
     # roi, ##<< Original genome region of interest.
     # strain2, ##<< The strain in wich you want the genome region of interest.
     # big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
     # ) {
     # ### This function translate a genome region of interest from a strain coord system to an other. This is a minimal fucntion that will be called by \emph{align_inter_strain_nucs} and its exemnple, you need to overwrite it by your own fucntion.
     #   strain1 = roi$strain_ref
     #   if (strain1 == strain2 | strain2 == "strain_ex2") {
     #     return(roi)
     #   } else {
     #     stop("ERROR, you need to overwrite your own function to convert convert strain coords. To binf the new function, have a
     #     look in the translate_roi example.")
     #   }
     # ### The translated genome region of interest.
     # }, ex=function(){
     #   # Define new translate_roi function...
     #   translate_roi = function(roi, strain2) {
     #     strain1 = roi$strain_ref
     #     if (strain1 == strain2) {
     #       return(roi)
     #     } else {
     #       stop("Here is my new translate_roi function...")
     #     }
     #   }
     #   # Binding it by uncomment follwing lines.
     #   # unlockBinding("translate_roi", as.environment("package:nm"))
     #   # unlockBinding("translate_roi", getNamespace("nm"))
     #   # assign("translate_roi", translate_roi, "package:nm")
     #   # assign("translate_roi", translate_roi, getNamespace("nm"))
     #   # lockBinding("translate_roi", getNamespace("nm"))
     #   # lockBinding("translate_roi", as.environment("package:nm"))
     # })
+    #
+    #
     translate_roi = structure(function(# Translate coords of a genome region.
     ### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
     roi, ##<< Original genome region of interest.
     strain2, ##<< The strain in wich you want the genome region of interest.
     config=NULL, ##<< GLOBAL config variable
     big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
     ) {
     	strain1 = roi$strain_ref
       reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
     	if (strain1 == strain2) {
     		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
     		return(roi)
+    	}
     	# Launch c2c file
     	if (reverse) {
     		c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
     	} else {
     		c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
+    	}
     	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)
     	# filtering it
       c2c = c2c[c2c$V6=="-",]
     	# Reverse
     	if (reverse) {
     		tmp_col = c2c$V1
     		c2c$V1 = c2c$V7
     		c2c$V7 = tmp_col
     		tmp_col = c2c$V2
     		c2c$V2 = c2c$V9
     		c2c$V9 = tmp_col
     		tmp_col = c2c$V3
     		c2c$V3 = c2c$V10
     		c2c$V10 = tmp_col
+    	}
     	# Restrict c2c to big_roi
     	# if (FALSE) {
     	if (!is.null(big_roi)) {
     		if (roi$strain_ref == big_roi$strain_ref) {
     			if (strain1 == "BY") {
     				big_chro_1 = paste("chr", ARAB2ROM()[[big_roi$chr]], sep="")
     			} else if (strain1 == "RM") {
     			  big_chro_1 = paste("supercontig_1.",big_roi$chr,sep="")
     			} else if (strain1 == "YJM") {
     			  big_chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[big_roi$chr]]
+    			}
     			big_begin_1 = big_roi$begin
     		  big_end_1 = big_roi$end
     			c2c = c2c[c2c$V1==big_chro_1,]
           if (length(c2c[c2c$V3<big_begin_1, 1] > 0)) {c2c[c2c$V3 < big_begin_1,c("V2", "V3") ] = big_begin_1}
           if (length(c2c[c2c$V2>big_end_1, 1] > 0)) {c2c[c2c$V2 > big_end_1, c("V2", "V3")] = big_end_1}
           c2c = c2c[c2c$V2 - c2c$V3 != 0,]
     		} else {
     			stop("ERROR, big_roi and roi not in the same strain_ref")
+    		}
+    	}
       #	Convert initial roi$chr into c2c format
     	if (strain1 == "BY") {
     		chro_1 = paste("chr", ARAB2ROM()[[roi$chr]], sep="")
     	} else if (strain1 == "RM") {
     	  chro_1 = paste("supercontig_1.",roi$chr,sep="")
     	} else if (strain1 == "YJM") {
     	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[roi$chr]]
+    	}
     	begin_1 = roi$begin
       end_1 = roi$end
       # Computing equivalent strain_2 alignment coordinates
     	if (reverse) {
       	tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
         tmptransfostop = c2c[c2c$V1==chro_1 &  ((c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1)),]
     	} else {
     		tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
     	  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
+    	}
     	# Never happend conditions ...
+    	{
     		if (length(tmptransfostart$V8) == 0) {
     			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
     			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2>=begin_1,]
     			begin_1 = sort(tmp_c2c$V2)[1]
     			if (reverse) {
     		  	tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
     			} else {
     				tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
+    			}
     			if (length(tmptransfostart$V8) == 0) {
     				if (!is.null(big_roi)) {
     					return(NULL)
     					tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$begin & c2c$V2<=big_roi$begin,]
     				} else {
     					# return(NULL)
     					# print(c2c[c2c$V1==chro_1 & c2c$V2<=end_1 & c2c$V3>=begin_1,])
     					# print(c2c[c2c$V1==chro_1,])
     					print(tmptransfostart)
     					print(tmptransfostop)
     					stop("Never happend condition 1.")
+    				}
+    			}
+    		}
     		if (length(tmptransfostop$V8) == 0) {
     			# end_1 is between to lines: shift end_1 to the end of 2nd line.
     			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3<=end_1,]
     			end_1 = sort(tmp_c2c$V3)[length(tmp_c2c$V2)]
     			if (reverse) {
     		    tmptransfostop = c2c[c2c$V1==chro_1 &  ((c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1)),]
     			} else {
     			  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
+    			}
     			if (length(tmptransfostop$V8) == 0) {
     				if (!is.null(big_roi)) {
     					return(NULL)
     				  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$end & c2c$V2<=big_roi$end,]
     				} else {
     					# return(NULL)
     					print(c2c[c2c$V1==chro_1,])
     					print(tmptransfostart)
     					print(tmptransfostop)
     					stop("Never happend condition 2.")
+    				}
+    			}
+    		}
     		if (length(tmptransfostart$V8) != 1) {
     			# tmptransfostart = tmptransfostart[1,]
     			# print("many start")
     			# print(c2c[c2c$V1==chro_1,])
     			tmptransfostart = tmptransfostart[tmptransfostart$V1==chro_1 & tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
     			if (length(tmptransfostart$V8) != 1) {
     				# return(NULL)
     				print(tmptransfostart)
     				print(tmptransfostop)
       			stop("Never happend condition 3.")
+    			}
+    		}
     		if (length(tmptransfostop$V8) != 1) {
     			# tmptransfostop = tmptransfostop[length(tmptransfostop$V8),]
     			# print("many stop")
     			# print(tmptransfostop)
     			# print(roi)
     		  tmptransfostop = tmptransfostop[tmptransfostop$V1==chro_1 & tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
     			if (length(tmptransfostop$V8) != 1) {
     				# return(NULL)
     				print(tmptransfostart)
     				print(tmptransfostop)
       			stop("Never happend condition 4.")
+    			}
+    		}
     		if (tmptransfostart$V7 != tmptransfostop$V7) {
     			print(tmptransfostart)
     			print(tmptransfostop)
      			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
+    		}
+    	}
       # Deal with strand
       if (tmptransfostart$V8 == 1) {
         begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
         end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
       } else {
         begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
         end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
+      }
     	# Build returned roi
     	roi$strain_ref = strain2
     	if (roi$strain_ref == "BY") {
     		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
     	} else {
     		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
+    	}
       roi$begin = begin_2
       roi$end = end_2
     	if (sign(roi$end - roi$begin) == 0) {
     		roi$length = 1
     	} else {
     		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
+    	}
       return(roi)
     }, ex=function(){
     	# Define new translate_roi function...
     	translate_roi = function(roi, strain2, config) {
     		strain1 = roi$strain_ref
     		if (strain1 == strain2) {
     			return(roi)
     		} else {
     		  stop("Here is my new translate_roi function...")
+    		}
+    	}
     	# Binding it by uncomment follwing lines.
     	# unlockBinding("translate_roi", as.environment("package:nm"))
     	# unlockBinding("translate_roi", getNamespace("nm"))
     	# assign("translate_roi", translate_roi, "package:nm")
     	# assign("translate_roi", translate_roi, getNamespace("nm"))
     	# lockBinding("translate_roi", getNamespace("nm"))
     	# lockBinding("translate_roi", as.environment("package:nm"))
     })
     compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
     ### CURs are regions that can be aligned between the genomes
     diff_allowed = 10, ##<< the maximum indel width allowe din a CUR
     min_cur_width = 200, ##<< The minimum width of a CUR
     config=NULL, ##<< GLOBAL config variable
     plot = FALSE ##<< Plot CURs or not
     ) {
       get_inter_strain_rois = function(strain1, strain2, diff_allowed = 10, min_cur_width = 200, plot=FALSE) {
       	c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
       	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)
         # Filtering unagapped
         c2c = c2c[c2c$V6=="-",]
         # filtering some things (chr...)
         # c2c = c2c[c2c$V1 == "chrIV",]
         diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
         diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
       	# Plot diffs to define a threshold (diff_allowed)
       	# hist(abs(c(diff2, diff)),breaks=c(0:2000, 200000000000), xlim=c(0,100))
         # Filtering
       	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
       	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
         rois = NULL
       	for(i in 1:length(indexes_start)) {
       		start = indexes_start[i]
       		stop = indexes_stop[i]
       		sub_c2c = c2c[start:stop,]
       		if (strain1 == "BY") {
       			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
       		} else {
       			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
+      		}
       		roi = list(chr=chr, begin=sub_c2c[1,]$V2, end=sub_c2c[length(sub_c2c$V1),]$V3, strain_ref=strain1)
       		roi[["length"]] = roi$end - roi$begin
       		if (roi$length >= min_cur_width) {
             rois = dfadd(rois,roi)
+      	  }
       		if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
       			print("*************** ERROR, non homogenous region! ********************")
+      		}
       		# print(i)
       		# print(roi)
       		# print(sub_c2c)
       		# print("________________________________________________________________")
+      	}
       	if (plot) {
       		print(paste(length(indexes_stop), "area of interest."))
       	  # Plot rois
       	  fasta_ref = list(filename=config$FASTA_REFERENCE_GENOME_FILES[[strain1]])
       	  genome = get_content(fasta_ref$filename, "fasta")
       		plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
       		for (name in names(genome)) {
       			if (strain1 == "BY") {
       				chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[[strain1]][[name]]]], sep="")
       			} else {
       				chr_ref = name
+      			}
       			y_lev = as.integer(config$FASTA_INDEXES[[strain1]][[name]])
       			lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
       			text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)
+      		}
       	  col=1
       	  for (roi_index in 1:length(rois$chr)) {
       			roi = rois[roi_index,]
       			y_lev = as.integer(roi$chr) + 0.3
       			lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
       			text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
       	  	col = col + 1
+      	  }
+      	}
       	return(rois)
+      }
     	rois = NULL
     	rois_BY_RM = get_inter_strain_rois("BY", "RM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
     	rois_BY_YJM = get_inter_strain_rois("BY", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
     	for (roi_1_index in 1:length(rois_BY_RM[,1])) {
     		roi_1 = rois_BY_RM[roi_1_index,]
     		roi_2_candidates = rois_BY_YJM[rois_BY_YJM$chr== roi_1$chr & rois_BY_YJM$begin <= roi_1$end & rois_BY_YJM$end >= roi_1$begin , ] ;
     		# print(length(roi_2_candidates[,1]))
     		if (length(roi_2_candidates[,1]) > 0) {
     			for(roi_2_index in 1:length(roi_2_candidates[,1])) {
     				roi_2 = roi_2_candidates[roi_2_index,]
     				roi = list(chr=roi_1$chr, begin=max(roi_1$begin, roi_2$begin), end=min(roi_1$end, roi_2$end), strain_ref="BY")
     				roi[["length"]] = roi$end - roi$begin + 1
     				if (roi$length >= min_cur_width) {
     					# if (length(rois[,1]) == 153) {
     					# 	print(paste(length(rois[,1]), roi_1_index, roi_2_index ))
     					# 	print(roi_1)
     					# 	print(roi_2)
     					# 	print(roi)
     					# }
     			    rois = dfadd(rois,roi)
+    			  }
+    			}
+    		}
+    	}
     	print(length(rois[,1]))
     	print(sum(rois$length))
     	rois_1st_round = rois
     	rois_2nd_round = NULL
     	rois_RM_YJM = get_inter_strain_rois("RM", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
     	for (roi_1_index in 1:length(rois_1st_round[,1])) {
     		roi_1 = rois_1st_round[roi_1_index,]
     		translated_roi_1 = translate_roi(roi_1, "RM", config)
     		t_b = min(translated_roi_1$begin, translated_roi_1$end)
     		t_e = max(translated_roi_1$begin, translated_roi_1$end)
     		roi_2_candidates = rois_RM_YJM[rois_RM_YJM$chr== translated_roi_1$chr & rois_RM_YJM$begin <= t_e & rois_RM_YJM$end >= t_b , ] ;
     		if (length(roi_2_candidates[,1]) > 0) {
     			for(roi_2_index in 1:length(roi_2_candidates[,1])) {
     				roi_2 = roi_2_candidates[roi_2_index,]
     				roi = list(chr=translated_roi_1$chr, begin=max(t_b, roi_2$begin), end=min(t_e, roi_2$end), strain_ref="RM")
     				roi[["length"]] = roi$end - roi$begin + 1
     				if (roi$length >= min_cur_width) {
     			    rois_2nd_round = dfadd(rois_2nd_round,roi)
+    			  }
+    			}
+    		}
+    	}
     	print(length(rois_2nd_round[,1]))
     	print(sum(rois_2nd_round$length))
     	rois = rois_2nd_round
     	rois_translator_round = list()
     	for (roi_index in 1:length(rois[,1])) {
     		roi = rois[roi_index,]
     		BY_roi  = translate_roi(roi, "BY", config)
     		tmp_BY_roi = BY_roi
     		BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
     		BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
     		BY_roi$length = abs(BY_roi$length)
     		rois_translator_round = dfadd(rois_translator_round, BY_roi)
+    	}
     	rois = rois_translator_round
     	rois_3rd_round = NULL
     	for (roi_index in 1:length(rois[,1])) {
     	# for (roi_index in 1:2) {
     		current_roi = rois[roi_index,]
     		# print(roi_index)
     	  to_be_check_rois = dfadd(NULL, current_roi)
     		NEED_RERUN = TRUE
     		while (NEED_RERUN) {
     			# print("RERUN"),
     			NEED_RERUN = FALSE
     			to_be_check_again = NULL
     			for (to_be_check_roi_index in 1:length(to_be_check_rois[,1])) {
     				# print(to_be_check_rois)
     				to_be_check_roi = to_be_check_rois[to_be_check_roi_index,]
     		    combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"), c("RM", "BY"), c("YJM", "BY"), c("YJM", "RM"))
     			  for (combi in combis) {
     					# print(combi)
     			    strain1 = combi[1]
     		      strain2 = combi[2]
     					trans_roi = translate_roi(to_be_check_roi, strain1, config)
     					lower_bound=min(trans_roi$begin, trans_roi$end)
     					upper_bound=max(trans_roi$begin, trans_roi$end)
               check_overlaping = structure(function(strain1 = "BY", strain2 = "RM", chr = NULL, lower_bound=NULL, upper_bound=NULL) {
                 reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
               	if (strain1 == strain2) {
               		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
               		return(roi)
+              	}
               	# Launch c2c file
               	if (reverse) {
               		c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
               	} else {
               		c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
+              	}
               	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)
               	# filtering it
                 c2c = c2c[c2c$V6=="-",]
               	# Reverse
               	if (reverse) {
               		tmp_col = c2c$V1
               		c2c$V1 = c2c$V7
               		c2c$V7 = tmp_col
               		tmp_col = c2c$V2
               		c2c$V2 = c2c$V9
               		c2c$V9 = tmp_col
               		tmp_col = c2c$V3
               		c2c$V3 = c2c$V10
               		c2c$V10 = tmp_col
+              	}
               	if (strain1 == "BY") {
               		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
               	} else if (strain1 == "RM") {
               	  chro_1 = paste("supercontig_1.",chr,sep="")
               	} else if (strain1 == "YJM") {
               	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
+              	}
               	# print(chro_1)
               	if (!is.null(lower_bound) & !is.null(upper_bound)) {
                   if (reverse) {
               	  	tmp_c2c = c2c[c2c$V1==chro_1 & ((c2c$V3>=lower_bound & c2c$V2<=upper_bound & c2c$V8==1) | (c2c$V2>=lower_bound & c2c$V3<=upper_bound & c2c$V8==-1)),]
               		} else {
               			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=lower_bound & c2c$V2<=upper_bound,]
+              		}
               	} else {
                 	tmp_c2c = c2c[c2c$V1 == chr,]
+              	}
               	if (length(tmp_c2c[,1]) > 1) {
               		pbs = apply(t(1:(length(tmp_c2c[,1]) - 1)), 2, function(i){
               			# print(paste(i, "/", length(tmp_c2c[,1])))
               			apply(t((i+1):length(tmp_c2c[,1])), 2, function(j){
               				l1 = tmp_c2c[i,]
               				b1 = min(l1$V2, l1$V3)
               				e1 = max(l1$V2, l1$V3)
               				l2 = tmp_c2c[j,]
               				b2 = min(l2$V2, l2$V3)
               				e2 = max(l2$V2, l2$V3)
               				if ((e1>=b2 & b1<=e2) | (e2>=b1 & b2<=e1)) {
               					print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ",chr, " [", b1, ",", e1, "] [", b2, ",", e2, "]", sep=""))
               					pb = list(strain1, strain2, chr, b1, e1, b2, e2)
               					pb
               				} else {
               					NULL
+              				}
               			})
               		})
               		return(pbs)
+              	}
               }, ex=function(){
               	source("src/nucleo_miner/yeast_strain_conversion.R");
               	pbs1 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=TRUE)
               	pbs3 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=TRUE)
               	pbs5 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=TRUE)
               	pbs2 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=FALSE)
               	pbs4 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=FALSE)
               	pbs6 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=FALSE)
               })
     					res = check_overlaping(strain1 = strain1, strain2 = strain2, chr = trans_roi$chr, lower_bound=lower_bound, upper_bound=upper_bound)
     					if (!is.null(res)) {
     						df_res = data.frame(matrix(unlist(res), ncol = 7, byrow=TRUE), stringsAsFactors=FALSE)
     						interval = df_res[1,]
     						inter_min = as.numeric(max( min(interval$X4, interval$X5), min(interval$X6, interval$X7)))
     						inter_max = as.numeric(min( max(interval$X4, interval$X5), max(interval$X6, interval$X7)))
     						# print(paste("SPLIT ROI", roi_index, "for", combi[1], combi[2]))
     						new_roi1 = trans_roi
     						new_roi2 = trans_roi
     						new_roi1$begin = lower_bound
     						new_roi1$end = inter_min - 1
     						new_roi1$length = new_roi1$end - new_roi1$begin + 1
     						new_roi2$begin = inter_max + 1
     						new_roi2$end = upper_bound
     						new_roi2$length = new_roi2$end - new_roi2$begin + 1
     						if (new_roi1$length > min_cur_width) {
     							BY_roi  = translate_roi(new_roi1, "BY", config)
     							tmp_BY_roi = BY_roi
     							BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
     							BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
     							BY_roi$length = abs(BY_roi$length)
     							to_be_check_again = dfadd(to_be_check_again, BY_roi)
+    						}
     						if (new_roi2$length > min_cur_width) {
     							BY_roi  = translate_roi(new_roi2, "BY", config)
     							tmp_BY_roi = BY_roi
     							BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
     							BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
     							BY_roi$length = abs(BY_roi$length)
     							to_be_check_again = dfadd(to_be_check_again, BY_roi)
+    						}
     						if (to_be_check_roi_index < length(to_be_check_rois[,1])) {
     							for (i in (to_be_check_roi_index + 1):length(to_be_check_rois[,1])) {
     								to_be_check_again = dfadd(to_be_check_again, to_be_check_rois[i,])
+    							}
+    						}
     						NEED_RERUN = TRUE
     						break
+    					}
+    				}
     				if (NEED_RERUN) {
     					to_be_check_rois = to_be_check_again
     					break
+    				}
+    			}
+    		}
     		checked_rois = to_be_check_rois
     		for (checked_roi_index in 1:length(checked_rois[,1])) {
     			rois_3rd_round = dfadd(rois_3rd_round, checked_rois[checked_roi_index,])
+    		}
+    	}
     	print(length(rois_3rd_round[,1]))
     	print(sum(rois_3rd_round$length))
     	rois = rois_3rd_round
     	if (plot) {
     		print(paste(length(rois$chr), "area of interest."))
     	  # Plot rois
     	  fasta_ref = list(filename=config$FASTA_REFERENCE_GENOME_FILES[["BY"]])
     	  genome = get_content(fasta_ref$filename, "fasta")
     		plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
     		for (name in names(genome)) {
     			if (TRUE) {
     				chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[["BY"]][[name]]]], sep="")
     			} else {
     				chr_ref = name
+    			}
     			y_lev = as.integer(config$FASTA_INDEXES[["BY"]][[name]])
     			lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
     			text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)
+    		}
     	  col=1
     	  for (roi_index in 1:length(rois$chr)) {
     			roi = rois[roi_index,]
     			y_lev = as.integer(roi$chr) + 0.3
     			lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
     			text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
     	  	col = col + 1
+    	  }
+    	}
     	return (rois)
+    }
-...
     plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
     aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
     aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
     height = 10 ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
     height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
     config=NULL ##<< GLOBAL config variable
     ){
       returned_list = list()
       # Computing global display parameters
-...
     	if (plot_common_nucs | plot_anovas | plot_anova_boxes) {
     		if (is.null(aligned_inter_strain_nucs)) {
     			aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]])[[1]]
     			aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
+    		}
     		if (plot_common_nucs) {
           #Plot common wp nucs

Formats disponibles : Unified diff

LBMC » NucleoMiner

Révision 1d833b97 src/R/nucleominer.R