Statistiques
| Branche: | Révision :

root / src / R / nucleominer.R @ 780f632a

Historique | Voir | Annoter | Télécharger (104,05 ko)

1 b26ac9e7 Florent Chuffart
count_reads_cur = memoise(function(
2 b26ac9e7 Florent Chuffart
marker,
3 b26ac9e7 Florent Chuffart
combi,
4 b26ac9e7 Florent Chuffart
form,
5 780f632a Florent Chuffart
cur_index,
6 780f632a Florent Chuffart
all_samples, ##<< A table that describe all our samples.
7 780f632a Florent Chuffart
config=NULL ##<< GLOBAL config variable.
8 b26ac9e7 Florent Chuffart
) {
9 b26ac9e7 Florent Chuffart
	print(paste("Counting reads for ", form, " CUR ", cur_index, " in ", marker, " for [", combi[1], ",", combi[2], "].", sep=""))
10 b26ac9e7 Florent Chuffart
	nucs = list()
11 b26ac9e7 Florent Chuffart
	for (strain in combi) {
12 b26ac9e7 Florent Chuffart
    if (form == "unr") {
13 b26ac9e7 Florent Chuffart
  	  nucs[[strain]] = mread.table(paste(config$RESULTS_DIR, "/", strain, "_in_", combi[1], "_", combi[2], "_unr.tab", sep=""), header=TRUE)
14 b26ac9e7 Florent Chuffart
    } else {
15 b26ac9e7 Florent Chuffart
	    nucs[[strain]] = mread.table(paste(config$RESULTS_DIR, "/", strain, "_wp.tab", sep=""), header=TRUE)
16 b26ac9e7 Florent Chuffart
    }
17 b26ac9e7 Florent Chuffart
	}
18 b26ac9e7 Florent Chuffart
	common_nucs = mread.table(paste(config$RESULTS_DIR, "/", combi[1], "_", combi[2], "_common_", form, ".tab", sep=""), header=TRUE)    
19 b26ac9e7 Florent Chuffart
  if (form == "wp") {
20 b26ac9e7 Florent Chuffart
    common_nucs = common_nucs[common_nucs$llr_score < 100, ]
21 b26ac9e7 Florent Chuffart
  }
22 b26ac9e7 Florent Chuffart
	common_nucs = common_nucs[ common_nucs$cur_index == cur_index, ]
23 b26ac9e7 Florent Chuffart
	if ( length(common_nucs[,1]) != 0) {
24 b26ac9e7 Florent Chuffart
		all_res = apply(t(1:length(common_nucs[,1])), 2, function(i){
25 b26ac9e7 Florent Chuffart
    	if (i %% 10 == 1) {
26 b26ac9e7 Florent Chuffart
    	  print(paste(i, "/", length(common_nucs[,1]), sep=""))	
27 b26ac9e7 Florent Chuffart
      }
28 b26ac9e7 Florent Chuffart
    	common_nuc = common_nucs[i,]		
29 b26ac9e7 Florent Chuffart
    	res= list()
30 b26ac9e7 Florent Chuffart
      # Get nuc infos
31 b26ac9e7 Florent Chuffart
      for (strain in combi) {
32 b26ac9e7 Florent Chuffart
    	  tmp_nuc_details =  nucs[[strain]][nucs[[strain]]$cur_index == common_nuc$cur_index & nucs[[strain]]$index_nuc == common_nuc[[paste("index_nuc",strain, sep="_")]], ]
33 b26ac9e7 Florent Chuffart
        # print(tmp_nuc_details)
34 b26ac9e7 Florent Chuffart
    		res[[paste("chr", strain, sep="_")]] = tmp_nuc_details$chr
35 b26ac9e7 Florent Chuffart
    		res[[paste("lower_bound", strain, sep="_")]] = tmp_nuc_details$lower_bound
36 b26ac9e7 Florent Chuffart
    		res[[paste("upper_bound", strain, sep="_")]] = tmp_nuc_details$upper_bound
37 b26ac9e7 Florent Chuffart
    		res[[paste("index_nuc", strain, sep="_")]] = common_nuc[[paste("index_nuc",strain, sep="_")]]
38 b26ac9e7 Florent Chuffart
      }	
39 b26ac9e7 Florent Chuffart
    	res$cur_index =	common_nuc$cur_index
40 b26ac9e7 Florent Chuffart
      # Get normalized reads
41 b26ac9e7 Florent Chuffart
    	manip = marker
42 b26ac9e7 Florent Chuffart
    	{			
43 b26ac9e7 Florent Chuffart
    		for (strain in combi) {				
44 780f632a Florent Chuffart
    			for(sample_id in all_samples[all_samples$marker == manip & all_samples$strain == strain, ]$id) {
45 b26ac9e7 Florent Chuffart
            # tmp_filename = paste(config$ALIGN_DIR, "/TF/splited/sample_", sample, "_chr_", res[[paste("chr", strain, sep="_")]], "_splited_sample.tab.gz",sep="")
46 780f632a Florent Chuffart
            if ("tf_input" %in% names(all_samples)) {
47 780f632a Florent Chuffart
              sample_inputs_filename = all_samples$tf_input[all_samples$id==sample_id]
48 780f632a Florent Chuffart
            } else {
49 780f632a Florent Chuffart
              sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", sample_id, "_TF.txt", sep="")
50 780f632a Florent Chuffart
            }
51 780f632a Florent Chuffart
    				tmp_unnorm_reads = mread.table(sample_inputs_filename, stringsAsFactors=FALSE)					
52 b26ac9e7 Florent Chuffart
            names(tmp_unnorm_reads) = c("chr", "pos", "strand", "nb_reads")
53 b26ac9e7 Florent Chuffart
    				orig_cur = list(name = "foo", 
54 b26ac9e7 Florent Chuffart
                                begin = res[[paste("lower_bound", strain, sep="_")]], 
55 b26ac9e7 Florent Chuffart
    														end = res[[paste("upper_bound", strain, sep="_")]], 
56 b26ac9e7 Florent Chuffart
    														chr = res[[paste("chr", strain, sep="_")]], 
57 b26ac9e7 Florent Chuffart
    														strain_ref = strain)
58 b26ac9e7 Florent Chuffart
    				orig_cur$length = orig_cur$end - orig_cur$begin + 1
59 b26ac9e7 Florent Chuffart
    				tmp_nuc_reads = filter_tf_inputs(tmp_unnorm_reads, orig_cur$chr, orig_cur$begin, orig_cur$end, orig_cur$length) 
60 780f632a Florent Chuffart
    				res[[paste(strain, manip, sample_id, sep="_")]] = sum(tmp_nuc_reads[,4])
61 b26ac9e7 Florent Chuffart
    			}
62 b26ac9e7 Florent Chuffart
    		}
63 b26ac9e7 Florent Chuffart
    	}
64 b26ac9e7 Florent Chuffart
    	res
65 b26ac9e7 Florent Chuffart
    })   
66 b26ac9e7 Florent Chuffart
		return(all_res)
67 b26ac9e7 Florent Chuffart
	} else {
68 b26ac9e7 Florent Chuffart
		return(NULL)
69 b26ac9e7 Florent Chuffart
	}
70 b26ac9e7 Florent Chuffart
})
71 b26ac9e7 Florent Chuffart
72 b26ac9e7 Florent Chuffart
build_count_table = function(# Build count table for a set of samples.
73 b26ac9e7 Florent Chuffart
### This function build a count table for a set of sample.
74 b26ac9e7 Florent Chuffart
marker, ##<< The marker that we want to build the count table.
75 b26ac9e7 Florent Chuffart
combi, ##<< The combinations of strains that we want to build the count table.
76 b26ac9e7 Florent Chuffart
form, ##<< The nucleosome that we want to observe: "wp" for sel;l position and "unr" for UNR.
77 b26ac9e7 Florent Chuffart
curs, ##<< The list of CURs
78 780f632a Florent Chuffart
all_samples, ##<< A table that describe all our samples.
79 b26ac9e7 Florent Chuffart
config=NULL ##<< GLOBAL config variable.
80 b26ac9e7 Florent Chuffart
) {
81 780f632a Florent Chuffart
  dir.create(config$RESULTS_DIR, recursive=TRUE, showWarnings=FALSE)
82 b26ac9e7 Florent Chuffart
	all_res = apply(t(1:nrow(curs)), 2, function(cur_index) {
83 780f632a Florent Chuffart
    res = count_reads_cur(marker=marker, combi=combi, form=form, cur_index=cur_index, all_samples, config)
84 b26ac9e7 Florent Chuffart
		return(res)
85 b26ac9e7 Florent Chuffart
	})
86 b26ac9e7 Florent Chuffart
	vec_names = names(all_res[[1]][[1]])
87 b26ac9e7 Florent Chuffart
	all_res = data.frame(matrix(unlist(all_res), ncol= length(vec_names), byrow=TRUE))
88 b26ac9e7 Florent Chuffart
  names(all_res) = vec_names
89 b26ac9e7 Florent Chuffart
	out_filename = paste(config$RESULTS_DIR, "/", combi[1], "_", combi[2], "_", marker, "_", form, "_and_nbreads.tab",sep="")
90 b26ac9e7 Florent Chuffart
	write.table(all_res, file=out_filename, row.names=FALSE, quote=FALSE)				
91 b26ac9e7 Florent Chuffart
}
92 b26ac9e7 Florent Chuffart
93 b26ac9e7 Florent Chuffart
94 b26ac9e7 Florent Chuffart
95 b26ac9e7 Florent Chuffart
96 b26ac9e7 Florent Chuffart
97 b26ac9e7 Florent Chuffart
98 b26ac9e7 Florent Chuffart
extract_maps = memoise(function(# Extract maps from TemplateFilter  outputs
99 b26ac9e7 Florent Chuffart
### This function extracts from TemplateFilter outputs for a given CUR./ This is from there that aggregate_intra_strain_nucs and align_inter_strain_nucs fucntions are calles. This is an internal fucntion
100 b26ac9e7 Florent Chuffart
cur_index, ##<< The region of interest index.
101 b26ac9e7 Florent Chuffart
strains, ##<< The strains for which we want to extract intra strain information.
102 b26ac9e7 Florent Chuffart
combis, ##<< All combinations of strains for which we want to extract inter strain information.
103 b26ac9e7 Florent Chuffart
all_samples, ##<< A table that describe all our samples.
104 b26ac9e7 Florent Chuffart
config=NULL##<< GLOBAL config variable.
105 b26ac9e7 Florent Chuffart
) {
106 b26ac9e7 Florent Chuffart
  cur = as.list(curs[cur_index,])
107 b26ac9e7 Florent Chuffart
  print(paste("cur length", cur$length))
108 b26ac9e7 Florent Chuffart
109 b26ac9e7 Florent Chuffart
  mnase_replicates = list()
110 b26ac9e7 Florent Chuffart
  aggregated_intra_strain_nucs = list()
111 b26ac9e7 Florent Chuffart
  for (strain in strains) {
112 b26ac9e7 Florent Chuffart
    mnase_replicates[[strain]] = fetch_mnase_replicates(strain, as.list(curs[cur_index,]), all_samples, config)
113 b26ac9e7 Florent Chuffart
    aggregated_intra_strain_nucs[[strain]] = aggregate_intra_strain_nucs(mnase_replicates[[strain]])
114 b26ac9e7 Florent Chuffart
  }
115 b26ac9e7 Florent Chuffart
116 b26ac9e7 Florent Chuffart
  aligned_inter_strain_nucs = list()
117 b26ac9e7 Florent Chuffart
  for(combi in combis) {
118 b26ac9e7 Florent Chuffart
    if (!(length(aggregated_intra_strain_nucs[[combi[1]]][[1]]) == 0 | length(aggregated_intra_strain_nucs[[combi[2]]][[1]]) == 0 )) {
119 b26ac9e7 Florent Chuffart
      aligned_inter_strain_nucs[[paste(combi[1], combi[2], sep="_")]] = align_inter_strain_nucs(replicates = list(mnase_replicates[[combi[1]]], mnase_replicates[[combi[2]]]),
120 b26ac9e7 Florent Chuffart
                                                                                                wp_nucs_strain_ref1 = aggregated_intra_strain_nucs[[combi[1]]][[1]],
121 b26ac9e7 Florent Chuffart
                                                                                                wp_nucs_strain_ref2 = aggregated_intra_strain_nucs[[combi[2]]][[1]], config = config)
122 b26ac9e7 Florent Chuffart
    } else {
123 b26ac9e7 Florent Chuffart
      print("WARNING! no aggregated_intra_strain_nucs.")
124 b26ac9e7 Florent Chuffart
      aligned_inter_strain_nucs[[paste(combi[1], combi[2], sep="_")]] = NULL
125 b26ac9e7 Florent Chuffart
    }
126 b26ac9e7 Florent Chuffart
  }
127 b26ac9e7 Florent Chuffart
128 b26ac9e7 Florent Chuffart
  return(list(aggregated_intra_strain_nucs, aligned_inter_strain_nucs))
129 b26ac9e7 Florent Chuffart
})
130 b26ac9e7 Florent Chuffart
131 b26ac9e7 Florent Chuffart
132 b26ac9e7 Florent Chuffart
133 b26ac9e7 Florent Chuffart
build_maps = function(# Extract maps from TemplateFilter  outputs
134 b26ac9e7 Florent Chuffart
### This function extracts from TemplateFilter outputs./ This is from there that aggregate_intra_strain_nucs and align_inter_strain_nucs fucntions are calles. This fucntion write well positionned, fuzzy and both maps in the config$RESULTS_DIR directory.
135 b26ac9e7 Florent Chuffart
strains, ##<< The strains for which we want to extract intra strain information.
136 b26ac9e7 Florent Chuffart
combis, ##<< The combinations of strains for which we want to extract inter strain information.
137 b26ac9e7 Florent Chuffart
all_samples, ##<< A table that describe all our samples.
138 b26ac9e7 Florent Chuffart
curs, ##<< The list of CURs
139 b26ac9e7 Florent Chuffart
config=NULL##<< GLOBAL config variable.
140 b26ac9e7 Florent Chuffart
) {
141 780f632a Florent Chuffart
  dir.create(config$RESULTS_DIR, recursive=TRUE, showWarnings=FALSE)
142 b26ac9e7 Florent Chuffart
  ### build wp maps
143 b26ac9e7 Florent Chuffart
	glo_results = apply(t(1:1:nrow(curs)), 2, function(cur_index) {
144 b26ac9e7 Florent Chuffart
  	dyad_shift = wp_llr = strain_maps = common_nuc_results = intra_llrs = inter_llrs = list()
145 b26ac9e7 Florent Chuffart
		print(paste("Collecting maps", cur_index , "/", nrow(curs)))
146 b26ac9e7 Florent Chuffart
		extracted_maps = extract_maps(cur_index, strains, combis, all_samples, config)
147 b26ac9e7 Florent Chuffart
148 b26ac9e7 Florent Chuffart
		aggregated_intra_strain_nucs = extracted_maps[[1]]
149 b26ac9e7 Florent Chuffart
		for (strain in strains) {
150 b26ac9e7 Florent Chuffart
			print(paste("Collecting maps for", strain, "..."))
151 b26ac9e7 Florent Chuffart
			partial_strain_maps = aggregated_intra_strain_nucs[[strain]][[1]]
152 b26ac9e7 Florent Chuffart
      print(strain)
153 b26ac9e7 Florent Chuffart
      nb_tracks = sum(all_samples$strain == strain & all_samples$marker == "Mnase_Seq")
154 b26ac9e7 Florent Chuffart
      
155 b26ac9e7 Florent Chuffart
      tmp_nuc_map = flat_aggregated_intra_strain_nucs(partial_strain_maps, cur_index, nb_tracks)
156 b26ac9e7 Florent Chuffart
157 b26ac9e7 Florent Chuffart
      wp_indexes = which(tmp_nuc_map$wp == 1)
158 b26ac9e7 Florent Chuffart
      tmp_wp_llr = apply(t(wp_indexes), 2, function(wp_index){
159 b26ac9e7 Florent Chuffart
        tmp_wp = partial_strain_maps[[wp_index]]
160 b26ac9e7 Florent Chuffart
        l = lapply(tmp_wp$nucs, function(tmp_wp_nucs){
161 b26ac9e7 Florent Chuffart
          tmp_wp_nucs$original_reads
162 b26ac9e7 Florent Chuffart
        })  
163 b26ac9e7 Florent Chuffart
        res = llr_score_nvecs(l)
164 b26ac9e7 Florent Chuffart
        return(res)
165 b26ac9e7 Florent Chuffart
      })
166 b26ac9e7 Florent Chuffart
167 b26ac9e7 Florent Chuffart
      tmp_dyad_shift = apply(t(wp_indexes), 2, function(wp_index){
168 b26ac9e7 Florent Chuffart
        tmp_wp = partial_strain_maps[[wp_index]]  
169 b26ac9e7 Florent Chuffart
        c = sapply(tmp_wp$nucs, function(tmp_wp_nucs){
170 b26ac9e7 Florent Chuffart
          tmp_wp_nucs$center
171 b26ac9e7 Florent Chuffart
        })  
172 b26ac9e7 Florent Chuffart
        ds = max(c) - min(c)
173 b26ac9e7 Florent Chuffart
        return(ds)
174 b26ac9e7 Florent Chuffart
      })
175 b26ac9e7 Florent Chuffart
176 b26ac9e7 Florent Chuffart
      nb_rep = length(partial_strain_maps[[wp_indexes[1]]]$nucs)
177 b26ac9e7 Florent Chuffart
178 b26ac9e7 Florent Chuffart
      tmp_nuc_map$wp_llr = rep(NA,nrow(tmp_nuc_map))
179 b26ac9e7 Florent Chuffart
      tmp_nuc_map[which(tmp_nuc_map$wp == 1),]$wp_llr = signif(tmp_wp_llr,5)
180 b26ac9e7 Florent Chuffart
      tmp_nuc_map$wp_pval = signif(1-pchisq(2*tmp_nuc_map$wp_llr, df=(nb_rep * 2) - 2),5)
181 b26ac9e7 Florent Chuffart
      tmp_nuc_map$dyad_shift = rep(NA,nrow(tmp_nuc_map))
182 b26ac9e7 Florent Chuffart
      tmp_nuc_map[which(tmp_nuc_map$wp == 1),]$dyad_shift = tmp_dyad_shift
183 b26ac9e7 Florent Chuffart
      
184 b26ac9e7 Florent Chuffart
      strain_maps[[strain]] = tmp_nuc_map
185 b26ac9e7 Florent Chuffart
      intra_llrs[[strain]] = aggregated_intra_strain_nucs[[strain]][[2]]
186 b26ac9e7 Florent Chuffart
		}
187 b26ac9e7 Florent Chuffart
188 b26ac9e7 Florent Chuffart
    for(combi in combis) {
189 b26ac9e7 Florent Chuffart
      print(paste("Collecting results for", combi[1], combi[2], "..."))
190 b26ac9e7 Florent Chuffart
      partial_common_nuc_results = extracted_maps[[2]][[paste(combi[1], combi[2], sep="_")]][[1]]
191 b26ac9e7 Florent Chuffart
      if (length(partial_common_nuc_results) > 0) {
192 b26ac9e7 Florent Chuffart
        partial_common_nuc_results$cur_index = rep(cur_index, length(partial_common_nuc_results[,1]))
193 b26ac9e7 Florent Chuffart
        # partial_common_nuc_results$common_wp_pval = signif(1-pchisq(2*partial_common_nuc_results$llr_score, df=2),5)
194 b26ac9e7 Florent Chuffart
        common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = partial_common_nuc_results
195 b26ac9e7 Florent Chuffart
      } else {
196 b26ac9e7 Florent Chuffart
        print(paste("No partial_common_nuc_results for cur", cur_index, "ands combi", combi[1], combi[2], "." ))
197 b26ac9e7 Florent Chuffart
        common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = data.frame()
198 b26ac9e7 Florent Chuffart
      }
199 b26ac9e7 Florent Chuffart
      inter_llrs[[paste(combi[1], combi[2], sep="_")]] = extracted_maps[[2]][[paste(combi[1], combi[2], sep="_")]][[2]]
200 b26ac9e7 Florent Chuffart
    }
201 b26ac9e7 Florent Chuffart
202 b26ac9e7 Florent Chuffart
    return(list(strain_maps, common_nuc_results, intra_llrs, inter_llrs))
203 b26ac9e7 Florent Chuffart
  })
204 b26ac9e7 Florent Chuffart
205 b26ac9e7 Florent Chuffart
  glo_results = do.call("rbind", glo_results)
206 b26ac9e7 Florent Chuffart
207 b26ac9e7 Florent Chuffart
  llrs_intra = do.call("rbind", glo_results[,3])
208 b26ac9e7 Florent Chuffart
  llrs_inter = do.call("rbind", glo_results[,4])
209 b26ac9e7 Florent Chuffart
210 b26ac9e7 Florent Chuffart
  for (strain in strains) {
211 b26ac9e7 Florent Chuffart
    outpout_joint_filename = paste(config$RESULTS_DIR, "/", strain, "_llrs.tab", sep="")
212 b26ac9e7 Florent Chuffart
    write.table(unlist(llrs_intra[,strain]), file=outpout_joint_filename, row.names=FALSE, quote=FALSE)
213 b26ac9e7 Florent Chuffart
  }
214 b26ac9e7 Florent Chuffart
215 b26ac9e7 Florent Chuffart
  for(combi in combis) {
216 b26ac9e7 Florent Chuffart
    outpout_joint_filename = paste(config$RESULTS_DIR, "/", combi[1], "_", combi[2], "_llrs.tab", sep="")
217 b26ac9e7 Florent Chuffart
    write.table(unlist(llrs_inter[,paste(combi[1], combi[2], sep="_")]), file=outpout_joint_filename, row.names=FALSE, quote=FALSE)
218 b26ac9e7 Florent Chuffart
  }
219 b26ac9e7 Florent Chuffart
220 b26ac9e7 Florent Chuffart
  glo_strain_maps = do.call("rbind", glo_results[,1])
221 b26ac9e7 Florent Chuffart
  for (strain in strains) {
222 b26ac9e7 Florent Chuffart
      nuc_map_filename = paste(config$RESULTS_DIR, "/", strain, "_wp.tab", sep="")
223 b26ac9e7 Florent Chuffart
      write.table(do.call("rbind", glo_strain_maps[,strain]), file=nuc_map_filename, row.names=FALSE, quote=FALSE)
224 b26ac9e7 Florent Chuffart
  }
225 b26ac9e7 Florent Chuffart
226 b26ac9e7 Florent Chuffart
  glo_common_nuc_results = do.call("rbind", glo_results[,2])
227 b26ac9e7 Florent Chuffart
  for(combi in combis) {
228 b26ac9e7 Florent Chuffart
    outpout_joint_filename = paste(config$RESULTS_DIR, "/", combi[1], "_", combi[2], "_common_wp.tab", sep="")
229 b26ac9e7 Florent Chuffart
    write.table(do.call("rbind", glo_common_nuc_results[,paste(combi[1], combi[2], sep="_")])[, c("cur_index", paste("index_nuc", combi[1], sep="_"), paste("index_nuc", combi[2], sep="_"), "llr_score", "common_wp_pval", "diff")], file=outpout_joint_filename, row.names=FALSE, quote=FALSE)
230 b26ac9e7 Florent Chuffart
  }
231 b26ac9e7 Florent Chuffart
232 b26ac9e7 Florent Chuffart
  ### build fuzzy maps
233 b26ac9e7 Florent Chuffart
    wp_maps  = list()
234 b26ac9e7 Florent Chuffart
    for (strain in strains) {
235 b26ac9e7 Florent Chuffart
      wp_map_filename = paste(config$RESULTS_DIR, "/", strain, "_wp.tab", sep="")
236 b26ac9e7 Florent Chuffart
      wp_maps[[strain]] = mread.table(wp_map_filename, header=TRUE)
237 b26ac9e7 Florent Chuffart
    }
238 b26ac9e7 Florent Chuffart
    glo_fuzzy = list()
239 b26ac9e7 Florent Chuffart
    for (strain in strains) {
240 b26ac9e7 Florent Chuffart
      tmp_wp = wp_maps[[strain]]
241 b26ac9e7 Florent Chuffart
    	glo_fuzzy[[strain]] = apply(t(1:1:nrow(curs)), 2, function(cur_index) {
242 b26ac9e7 Florent Chuffart
        print(paste("Building fuzzy for", strain, cur_index, "/", nrow(curs)))
243 b26ac9e7 Florent Chuffart
        tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
244 b26ac9e7 Florent Chuffart
        tmp_fuzzy = get_intra_strain_fuzzy(tmp_wp, as.list(curs[cur_index,]), strain, config=config)
245 b26ac9e7 Florent Chuffart
        return(tmp_fuzzy)
246 b26ac9e7 Florent Chuffart
      })
247 b26ac9e7 Florent Chuffart
      fuzzy_map_filename = paste(config$RESULTS_DIR, "/", strain, "_fuzzy.tab", sep="")
248 b26ac9e7 Florent Chuffart
      write.table(do.call("rbind", glo_fuzzy[[strain]]), fuzzy_map_filename, row.names=FALSE, quote=FALSE)
249 b26ac9e7 Florent Chuffart
    }
250 b26ac9e7 Florent Chuffart
251 b26ac9e7 Florent Chuffart
  ### build unr maps
252 b26ac9e7 Florent Chuffart
  wp_maps  = list()
253 b26ac9e7 Florent Chuffart
  for (strain in strains) {
254 b26ac9e7 Florent Chuffart
    wp_map_filename = paste(config$RESULTS_DIR, "/", strain, "_wp.tab", sep="")
255 b26ac9e7 Florent Chuffart
    wp_maps[[strain]] = mread.table(wp_map_filename, header=TRUE)
256 b26ac9e7 Florent Chuffart
  }
257 b26ac9e7 Florent Chuffart
  fuzzy_maps = list()
258 b26ac9e7 Florent Chuffart
  for (strain in strains) {
259 b26ac9e7 Florent Chuffart
    fuzzy_map_filename = paste(config$RESULTS_DIR, "/", strain, "_fuzzy.tab", sep="")
260 b26ac9e7 Florent Chuffart
    fuzzy_maps[[strain]] = mread.table(fuzzy_map_filename, header=TRUE)
261 b26ac9e7 Florent Chuffart
  }
262 b26ac9e7 Florent Chuffart
  common_nuc_results = list()
263 b26ac9e7 Florent Chuffart
  for(combi in combis) {
264 b26ac9e7 Florent Chuffart
    outpout_joint_filename = paste(config$RESULTS_DIR, "/", combi[1], "_", combi[2], "_common_wp.tab", sep="")
265 b26ac9e7 Florent Chuffart
    tmp_common_wp = mread.table(outpout_joint_filename, header=TRUE)
266 b26ac9e7 Florent Chuffart
    common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = tmp_common_wp
267 b26ac9e7 Florent Chuffart
  }
268 b26ac9e7 Florent Chuffart
269 b26ac9e7 Florent Chuffart
  unr = list()
270 b26ac9e7 Florent Chuffart
  for(combi in combis) {
271 b26ac9e7 Florent Chuffart
    unr[[paste(combi[1], combi[2], sep="_")]] = do.call("rbind", apply(t(1:1:nrow(curs)), 2, function(cur_index) {
272 b26ac9e7 Florent Chuffart
      print(paste("Building UNRs", combi[1], combi[2], cur_index, "/", nrow(curs)))
273 b26ac9e7 Florent Chuffart
      cur = as.list(curs[cur_index,])
274 b26ac9e7 Florent Chuffart
      get_unrs(combi, cur, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config=config)
275 b26ac9e7 Florent Chuffart
    }))
276 b26ac9e7 Florent Chuffart
277 b26ac9e7 Florent Chuffart
    # unr for strain 1
278 b26ac9e7 Florent Chuffart
  	unr_map_filename_1 = paste(config$RESULTS_DIR, "/", combi[1], "_in_", combi[1], "_", combi[2], "_unr.tab", sep="")
279 b26ac9e7 Florent Chuffart
  	write.table(unr[[paste(combi[1], combi[2], sep="_")]][, c("chr", "lower_bound", "upper_bound", "cur_index", "index_nuc")], file=unr_map_filename_1, row.names=FALSE, quote=FALSE)
280 b26ac9e7 Florent Chuffart
281 b26ac9e7 Florent Chuffart
    # unr for strain 2
282 b26ac9e7 Florent Chuffart
    print(paste(combi[1], combi[2], "translation..."))
283 b26ac9e7 Florent Chuffart
    to_tr_unr = unr[[paste(combi[1], combi[2], sep="_")]]
284 b26ac9e7 Florent Chuffart
    tr_unr = apply(t(1:length(to_tr_unr[,1])), 2, function(i){
285 b26ac9e7 Florent Chuffart
      if (i %% 100 == 1) {print(paste(combi[1], combi[2], "translation... ", i , "/", length(to_tr_unr[,1])))}
286 b26ac9e7 Florent Chuffart
      tmp_unr = to_tr_unr[i,]
287 b26ac9e7 Florent Chuffart
      tmp_reg = list(name="foo", begin=tmp_unr$lower_bound, end=tmp_unr$upper_bound, chr=tmp_unr$chr, strain_ref = combi[1])
288 b26ac9e7 Florent Chuffart
      big_cur = as.list(curs[tmp_unr$cur_index,])
289 b26ac9e7 Florent Chuffart
      trs_tmp_reg = translate_cur(tmp_reg, combi[2], config=config, big_cur=big_cur)
290 b26ac9e7 Florent Chuffart
      if (!is.null(trs_tmp_reg)) {
291 b26ac9e7 Florent Chuffart
        to_tr_unr[i,]$chr = trs_tmp_reg$chr
292 b26ac9e7 Florent Chuffart
        to_tr_unr[i,]$lower_bound = min(trs_tmp_reg$begin, trs_tmp_reg$end)
293 b26ac9e7 Florent Chuffart
        to_tr_unr[i,]$upper_bound = max(trs_tmp_reg$begin, trs_tmp_reg$end)
294 b26ac9e7 Florent Chuffart
        return(to_tr_unr[i,])
295 b26ac9e7 Florent Chuffart
        } else {
296 b26ac9e7 Florent Chuffart
          return(NULL)
297 b26ac9e7 Florent Chuffart
        }
298 b26ac9e7 Florent Chuffart
      })
299 b26ac9e7 Florent Chuffart
    tr_unr = do.call("rbind", tr_unr)
300 b26ac9e7 Florent Chuffart
    # write it.
301 b26ac9e7 Florent Chuffart
  	unr_map_filename_2 = paste(config$RESULTS_DIR, "/", combi[2], "_in_", combi[1], "_", combi[2], "_unr.tab", sep="")
302 b26ac9e7 Florent Chuffart
  	write.table(tr_unr[, c("chr", "lower_bound", "upper_bound", "cur_index", "index_nuc")], file=unr_map_filename_2, row.names=FALSE, quote=FALSE)
303 b26ac9e7 Florent Chuffart
304 b26ac9e7 Florent Chuffart
    # joint the two tables
305 b26ac9e7 Florent Chuffart
    common_unr = cbind(tr_unr$cur_index, tr_unr$index_nuc, tr_unr$index_nuc)
306 b26ac9e7 Florent Chuffart
    common_unr = data.frame(common_unr, stringsAsFactors=FALSE)
307 b26ac9e7 Florent Chuffart
    names(common_unr) = c("cur_index", paste("index_nuc", combi[1], sep="_"), paste("index_nuc", combi[2], sep="_"))
308 b26ac9e7 Florent Chuffart
    outpout_joint_unr_filename = paste(config$RESULTS_DIR, "/", combi[1], "_", combi[2], "_common_unr.tab", sep="")
309 b26ac9e7 Florent Chuffart
    write.table(common_unr, file=outpout_joint_unr_filename, row.names=FALSE, quote=FALSE)
310 b26ac9e7 Florent Chuffart
  }
311 b26ac9e7 Florent Chuffart
312 b26ac9e7 Florent Chuffart
}
313 b26ac9e7 Florent Chuffart
314 b26ac9e7 Florent Chuffart
315 b26ac9e7 Florent Chuffart
316 b26ac9e7 Florent Chuffart
317 b26ac9e7 Florent Chuffart
318 b26ac9e7 Florent Chuffart
319 6b5a528c Florent Chuffart
FDR = structure(function#  False Discovery Rate
320 6b5a528c Florent Chuffart
### From a vector x of independent p-values, extract the cutoff corresponding to the specified FDR. See Benjamini & Hochberg 1995 paper
321 55c1cdff Florent Chuffart
##author<< Gael Yvert,
322 6b5a528c Florent Chuffart
(
323 6b5a528c Florent Chuffart
x, ##<< A vector x of independent p-values.
324 6b5a528c Florent Chuffart
FDR ##<< The specified FDR.
325 6b5a528c Florent Chuffart
) {
326 6b5a528c Florent Chuffart
  x <- sort(na.omit(x))
327 6b5a528c Florent Chuffart
  N = length(x)
328 6b5a528c Florent Chuffart
  i = 1;
329 6b5a528c Florent Chuffart
  while(N*x[i]/i < FDR & i <= N) i = i + 1; # we search for the highest i where Nrandom / Nobserved < FDR
330 6b5a528c Florent Chuffart
  if (i == 1)
331 6b5a528c Florent Chuffart
    return (NA)
332 6b5a528c Florent Chuffart
  else
333 6b5a528c Florent Chuffart
    return( x[i-1] )
334 6b5a528c Florent Chuffart
### Return the the corresponding cutoff.
335 6b5a528c Florent Chuffart
}, ex=function(){
336 6b5a528c Florent Chuffart
  print("example")
337 6b5a528c Florent Chuffart
})
338 6b5a528c Florent Chuffart
339 6b5a528c Florent Chuffart
340 6b5a528c Florent Chuffart
341 7e2d37e1 Florent Chuffart
llr_score_nvecs = structure(function # Likelihood ratio
342 7e2d37e1 Florent Chuffart
### Compute the log likelihood ratio of two or more set of value.
343 6b5a528c Florent Chuffart
(
344 7e2d37e1 Florent Chuffart
  xs ##<< list of vectors.
345 6b5a528c Florent Chuffart
) {
346 7e2d37e1 Florent Chuffart
  l = length(xs)
347 7e2d37e1 Florent Chuffart
  if (l < 1) {
348 7e2d37e1 Florent Chuffart
    return(NA)
349 7e2d37e1 Florent Chuffart
  }
350 7e2d37e1 Florent Chuffart
  if (l == 1) {
351 7e2d37e1 Florent Chuffart
    return(1)
352 7e2d37e1 Florent Chuffart
  }
353 7e2d37e1 Florent Chuffart
  sumllX = 0
354 7e2d37e1 Florent Chuffart
  for (i in 1:l) {
355 7e2d37e1 Florent Chuffart
    x = xs[[i]]
356 7e2d37e1 Florent Chuffart
  	if (length(x) <= 1) {
357 7e2d37e1 Florent Chuffart
  		return(NA)
358 7e2d37e1 Florent Chuffart
  	}
359 7e2d37e1 Florent Chuffart
    meanX = mean(x)
360 7e2d37e1 Florent Chuffart
    sdX = sd(x)
361 7e2d37e1 Florent Chuffart
    llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
362 7e2d37e1 Florent Chuffart
    sumllX = sumllX + llX
363 7e2d37e1 Florent Chuffart
  }
364 7e2d37e1 Florent Chuffart
  meanXglo = mean(unlist(xs))
365 7e2d37e1 Florent Chuffart
  sdXglo = sd(unlist(xs))
366 7e2d37e1 Florent Chuffart
  llXYZ = sum(log(dnorm(unlist(xs),mean=meanXglo,sd=sdXglo)))
367 7e2d37e1 Florent Chuffart
  ratio = sumllX - llXYZ
368 6b5a528c Florent Chuffart
  return(ratio)
369 7e2d37e1 Florent Chuffart
### Returns the log likelihood ratio.
370 6b5a528c Florent Chuffart
}, ex=function(){
371 e5603c3f Florent Chuffart
  # LLR score for 2 set of values
372 6b5a528c Florent Chuffart
  mean1=5; sd1=2; card2 = 250
373 6b5a528c Florent Chuffart
  mean2=6; sd2=3; card1 = 200
374 6b5a528c Florent Chuffart
  x1 = rnorm(card1, mean1, sd1)
375 55c1cdff Florent Chuffart
  x2 = rnorm(card2, mean2, sd2)
376 6b5a528c Florent Chuffart
  min = floor(min(c(x1,x2)))
377 6b5a528c Florent Chuffart
  max = ceiling(max(c(x1,x2)))
378 6b5a528c Florent Chuffart
  hist(c(x1,x2), xlim=c(min, max), breaks=min:max)
379 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
380 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
381 6b5a528c Florent Chuffart
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
382 7e2d37e1 Florent Chuffart
  llr_score_nvecs(list(x1,x2))
383 6b5a528c Florent Chuffart
 })
384 6b5a528c Florent Chuffart
385 6b5a528c Florent Chuffart
dfadd = structure(function# Adding list to a dataframe.
386 6b5a528c Florent Chuffart
### Add a list \emph{l} to a dataframe \emph{df}. Create it if \emph{df} is \emph{NULL}. Return the dataframe \emph{df}.
387 6b5a528c Florent Chuffart
	(df, ##<<  A dataframe
388 6b5a528c Florent Chuffart
		l ##<<  A list
389 6b5a528c Florent Chuffart
	) {
390 6b5a528c Florent Chuffart
  if (is.null(df)) {
391 6b5a528c Florent Chuffart
    df = data.frame(l,stringsAsFactors=FALSE)
392 6b5a528c Florent Chuffart
  } else {
393 6b5a528c Florent Chuffart
    df = rbind(df, data.frame(l,stringsAsFactors=FALSE))
394 6b5a528c Florent Chuffart
  }
395 6b5a528c Florent Chuffart
  return(df)
396 6b5a528c Florent Chuffart
### Return the dataframe \emph{df}.
397 6b5a528c Florent Chuffart
}, ex=function(){
398 6b5a528c Florent Chuffart
		## Here dataframe is NULL
399 6b5a528c Florent Chuffart
		print(df)
400 6b5a528c Florent Chuffart
		df = NULL
401 5bfac5a3 Florent Chuffart
402 6b5a528c Florent Chuffart
		# Initialize df
403 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1", key2 = "value2"))
404 6b5a528c Florent Chuffart
		print(df)
405 5bfac5a3 Florent Chuffart
406 6b5a528c Florent Chuffart
		# Adding elements to df
407 6b5a528c Florent Chuffart
		df = dfadd(df, list(key1 = "value1'", key2 = "value2'"))
408 6b5a528c Florent Chuffart
		print(df)
409 6b5a528c Florent Chuffart
})
410 6b5a528c Florent Chuffart
411 6b5a528c Florent Chuffart
412 6b5a528c Florent Chuffart
sign_from_strand = function(
413 55c1cdff Florent Chuffart
### Get the sign of strand
414 6b5a528c Florent Chuffart
strands) {
415 6b5a528c Florent Chuffart
	apply(t(strands), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
416 6b5a528c Florent Chuffart
### If strand in forward then returns 1 else returns -1
417 6b5a528c Florent Chuffart
}
418 6b5a528c Florent Chuffart
419 6b5a528c Florent Chuffart
flat_reads = function(
420 6b5a528c Florent Chuffart
### Extract reads coordinates from TempleteFilter input sequence
421 55c1cdff Florent Chuffart
reads, ##<< TemplateFilter input reads
422 6b5a528c Florent Chuffart
nuc_width ##<< Width used to shift F and R reads.
423 6b5a528c Florent Chuffart
) {
424 6b5a528c Florent Chuffart
	F_flatted_reads = unlist(apply(t(reads[reads$V3=="F",]),2,function(r){rep(as.integer(r[2]), r[4])}))
425 6b5a528c Florent Chuffart
	R_flatted_reads = unlist(apply(t(reads[reads$V3=="R",]),2,function(r){rep(as.integer(r[2]), r[4])}))
426 6b5a528c Florent Chuffart
	flatted_reads = c(F_flatted_reads + rep(nuc_width/2, length(F_flatted_reads)), R_flatted_reads - rep(nuc_width/2, length(R_flatted_reads))  )
427 6b5a528c Florent Chuffart
	return(list(F_flatted_reads, R_flatted_reads, flatted_reads))
428 6b5a528c Florent Chuffart
### Returns a list of F reads, R reads and joint/shifted F and R reads.
429 6b5a528c Florent Chuffart
}
430 6b5a528c Florent Chuffart
431 6b5a528c Florent Chuffart
432 6b5a528c Florent Chuffart
433 6b5a528c Florent Chuffart
filter_tf_outputs = function(# Filter TemplateFilter outputs
434 ec2936ea Florent Chuffart
### This function filters TemplateFilter outputs according, not only genome area observerved properties, but also correlation and overlapping threshold.
435 6b5a528c Florent Chuffart
tf_outputs, ##<< TemplateFilter outputs.
436 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
437 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
438 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
439 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
440 6b5a528c Florent Chuffart
ol_bp = 59, ##<< Overlap Threshold.
441 6b5a528c Florent Chuffart
corr_thres = 0.5 ##<< Correlation threshold.
442 6b5a528c Florent Chuffart
) {
443 6b5a528c Florent Chuffart
  if (x_min < 0) {
444 0d5fbf02 Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= -x_max & tf_outputs$center + tf_outputs$width/2 <=  -x_min,]
445 6b5a528c Florent Chuffart
	} else {
446 0d5fbf02 Florent Chuffart
    tf_outputs = tf_outputs[tf_outputs$chr == paste("chr", chr, sep="") & tf_outputs$center - tf_outputs$width/2 >= x_min & tf_outputs$center + tf_outputs$width/2 <= x_max,]
447 6b5a528c Florent Chuffart
  }
448 6b5a528c Florent Chuffart
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
449 6b5a528c Florent Chuffart
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
450 7e2d37e1 Florent Chuffart
  tf_outputs = tf_outputs[tf_outputs$correlation.score >= corr_thres,]
451 7e2d37e1 Florent Chuffart
  tf_outputs = tf_outputs[order(tf_outputs$correlation.score, decreasing=TRUE),]
452 6b5a528c Florent Chuffart
  i = 1
453 55c1cdff Florent Chuffart
  while (i <= length(tf_outputs[,1])) {
454 7e2d37e1 Florent Chuffart
    lb = tf_outputs[i,]$lower_bound
455 7e2d37e1 Florent Chuffart
    ub = tf_outputs[i,]$upper_bound
456 7e2d37e1 Florent Chuffart
    tf_outputs = tf_outputs[!(tf_outputs$lower_bound <= (ub-ol_bp) & tf_outputs$upper_bound > ub) & !(tf_outputs$upper_bound >= (lb+ol_bp) & tf_outputs$lower_bound < lb),]
457 6b5a528c Florent Chuffart
    i = i+1
458 6b5a528c Florent Chuffart
  }
459 6b5a528c Florent Chuffart
  return(tf_outputs)
460 6b5a528c Florent Chuffart
### Returns filtered TemplateFilter Outputs
461 6b5a528c Florent Chuffart
}
462 6b5a528c Florent Chuffart
463 6b5a528c Florent Chuffart
464 6b5a528c Florent Chuffart
465 6b5a528c Florent Chuffart
filter_tf_inputs = function(# Filter TemplateFilter inputs
466 6b5a528c Florent Chuffart
### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
467 6b5a528c Florent Chuffart
inputs, ##<< TF inputs to be filtered.
468 6b5a528c Florent Chuffart
chr, ##<< Chromosome observed, here chr is an integer.
469 6b5a528c Florent Chuffart
x_min, ##<< Coordinate of the first bp observed.
470 6b5a528c Florent Chuffart
x_max, ##<< Coordinate of the last bp observed.
471 6b5a528c Florent Chuffart
nuc_width = 160, ##<< Nucleosome width.
472 6b5a528c Florent Chuffart
only_f = FALSE, ##<< Filter only F reads.
473 b8a95426 Florent Chuffart
only_r = FALSE, ##<< Filter only R reads.
474 3761ede2 Florent Chuffart
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
475 6b5a528c Florent Chuffart
) {
476 3761ede2 Florent Chuffart
	if (only_f) {
477 3761ede2 Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
478 3761ede2 Florent Chuffart
	} else if (only_r) {
479 3761ede2 Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]			
480 3761ede2 Florent Chuffart
	} else {
481 3761ede2 Florent Chuffart
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
482 3761ede2 Florent Chuffart
	}
483 b8a95426 Florent Chuffart
  if (!filter_for_coverage) {
484 3761ede2 Florent Chuffart
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
485 3761ede2 Florent Chuffart
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]    
486 b8a95426 Florent Chuffart
  }
487 3761ede2 Florent Chuffart
	return(inputs)
488 6b5a528c Florent Chuffart
### Returns filtred inputs.
489 6b5a528c Florent Chuffart
}
490 6b5a528c Florent Chuffart
491 6b5a528c Florent Chuffart
492 6b5a528c Florent Chuffart
493 3761ede2 Florent Chuffart
# filter_tf_inputs = function(# Filter TemplateFilter inputs
494 3761ede2 Florent Chuffart
# ### This function filters TemplateFilter inputs according genome area observed properties. It takes into account reads that are at the frontier of this area and the strand of these reads.
495 3761ede2 Florent Chuffart
# inputs, ##<< TF inputs to be filtered.
496 3761ede2 Florent Chuffart
# chr, ##<< Chromosome observed, here chr is an integer.
497 3761ede2 Florent Chuffart
# x_min, ##<< Coordinate of the first bp observed.
498 3761ede2 Florent Chuffart
# x_max, ##<< Coordinate of the last bp observed.
499 3761ede2 Florent Chuffart
# nuc_width = 160, ##<< Nucleosome width.
500 3761ede2 Florent Chuffart
# only_f = FALSE, ##<< Filter only F reads.
501 3761ede2 Florent Chuffart
# only_r = FALSE, ##<< Filter only R reads.
502 3761ede2 Florent Chuffart
# filter_for_coverage = FALSE, ##<< Does it filter for plot coverage?
503 3761ede2 Florent Chuffart
# USE_DPLYR = FALSE ##<< Use dplyr lib to filter reads.
504 3761ede2 Florent Chuffart
# ) {
505 3761ede2 Florent Chuffart
#   n = names(inputs)
506 3761ede2 Florent Chuffart
#
507 3761ede2 Florent Chuffart
#   if (!USE_DPLYR) {
508 3761ede2 Florent Chuffart
#     if (only_f) {
509 3761ede2 Florent Chuffart
#       inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
510 3761ede2 Florent Chuffart
#     } else if (only_r) {
511 3761ede2 Florent Chuffart
#       inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
512 3761ede2 Florent Chuffart
#     } else {
513 3761ede2 Florent Chuffart
#       inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
514 3761ede2 Florent Chuffart
#     }
515 3761ede2 Florent Chuffart
#   } else {
516 3761ede2 Florent Chuffart
#     names(inputs) = c("chr", "pos", "str", "lev")
517 3761ede2 Florent Chuffart
#     if (only_f) {
518 3761ede2 Florent Chuffart
#       inputs_out = filter(inputs, chr == chr,  pos >= x_min - nuc_width, str == "F", pos <= x_max + nuc_width)
519 3761ede2 Florent Chuffart
#     } else if (only_r) {
520 3761ede2 Florent Chuffart
#       inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, str == "R" & pos <= x_max + nuc_width)
521 3761ede2 Florent Chuffart
#     } else {
522 3761ede2 Florent Chuffart
#       inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, pos <= x_max + nuc_width)
523 3761ede2 Florent Chuffart
#     }
524 3761ede2 Florent Chuffart
#       # if (!filter_for_coverage) {
525 3761ede2 Florent Chuffart
#       #   inputs$corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
526 3761ede2 Florent Chuffart
#       #   inputs = filter(inputs, chr == chr, corrected_inputs_coords >= x_min, corrected_inputs_coords <= x_max)
527 3761ede2 Florent Chuffart
#       #   inputs$corrected_inputs_coords = NULL
528 3761ede2 Florent Chuffart
#       # }
529 3761ede2 Florent Chuffart
#   }
530 3761ede2 Florent Chuffart
#
531 3761ede2 Florent Chuffart
#   if (!filter_for_coverage) {
532 3761ede2 Florent Chuffart
#     corrected_inputs_coords = inputs_out[,2] + nuc_width/2 * sign_from_strand(inputs_out[,3])
533 3761ede2 Florent Chuffart
#     inputs_out = inputs_out[inputs_out[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
534 3761ede2 Florent Chuffart
#   }
535 3761ede2 Florent Chuffart
#
536 3761ede2 Florent Chuffart
#   names(inputs_out) = n
537 3761ede2 Florent Chuffart
#   return(inputs_out)
538 3761ede2 Florent Chuffart
# ### Returns filtred inputs.
539 3761ede2 Florent Chuffart
# }
540 3761ede2 Florent Chuffart
541 3761ede2 Florent Chuffart
542 3761ede2 Florent Chuffart
543 6b5a528c Florent Chuffart
get_comp_strand = function(
544 6b5a528c Florent Chuffart
### Compute the complementatry strand.
545 6b5a528c Florent Chuffart
strand ##<< The original strand.
546 6b5a528c Florent Chuffart
) {
547 6b5a528c Florent Chuffart
	apply(t(strand),2, function(n){
548 55c1cdff Florent Chuffart
	  if (n=="a") {return("t")}
549 6b5a528c Florent Chuffart
		if (n=="t") {return("a")}
550 55c1cdff Florent Chuffart
		if (n=="c") {return("g")}
551 55c1cdff Florent Chuffart
		if (n=="g") {return("c")}
552 6b5a528c Florent Chuffart
	})
553 6b5a528c Florent Chuffart
### Returns the complementatry strand.
554 6b5a528c Florent Chuffart
}
555 6b5a528c Florent Chuffart
556 6b5a528c Florent Chuffart
557 55c1cdff Florent Chuffart
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
558 5badc2fd Florent Chuffart
### This function aggregates nucleosomes from replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center (dyad). A chain of nucleosomes is builts across all replicates. Adjacent nucleosomes of the chain are compared two by two. Comparison is based on a log likelihood ratio (LLR1). depending on the LLR1 value nucleosomes are merged (low LLR) or separated (high LLR). Finally the function returns a list of clusters and all computed llr_scores. Each cluster ows an attribute wp for "well positioned". This attribute is set to TRUE if the cluster is composed of exactly one nucleosome of each sample.
559 6b5a528c Florent Chuffart
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
560 e5603c3f Florent Chuffart
llr_thres=20, ##<< Log likelihood ratio threshold to decide between merging and separating
561 6b5a528c Florent Chuffart
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
562 6b5a528c Florent Chuffart
){
563 6b5a528c Florent Chuffart
	end_of_tracks = function(tracks) {
564 6b5a528c Florent Chuffart
		if (length(tracks) == 0) {
565 6b5a528c Florent Chuffart
			return(TRUE)
566 6b5a528c Florent Chuffart
		}
567 6b5a528c Florent Chuffart
	  for (lower_bound in tracks) {
568 6b5a528c Florent Chuffart
			if(!is.na(lower_bound)) {
569 6b5a528c Florent Chuffart
	      if (lower_bound < coord_max) {
570 6b5a528c Florent Chuffart
	        return(FALSE)
571 6b5a528c Florent Chuffart
	      }
572 6b5a528c Florent Chuffart
	  	}
573 6b5a528c Florent Chuffart
	  }
574 6b5a528c Florent Chuffart
	  return(TRUE)
575 6b5a528c Florent Chuffart
	}
576 d973538c Florent Chuffart
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center) {
577 6b5a528c Florent Chuffart
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
578 6b5a528c Florent Chuffart
			new_cluster$wp = TRUE
579 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
580 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
581 6b5a528c Florent Chuffart
		  	clusters[[length(clusters) + 1]] = new_cluster
582 6b5a528c Florent Chuffart
				# print(new_cluster)
583 6b5a528c Florent Chuffart
		  }
584 6b5a528c Florent Chuffart
		} else {
585 6b5a528c Florent Chuffart
			new_cluster$wp = FALSE
586 6b5a528c Florent Chuffart
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
587 6b5a528c Florent Chuffart
			if (is.null(min_nuc_center) | ((min_nuc_center <= center) & (center < max_nuc_center))) {
588 6b5a528c Florent Chuffart
			  clusters[[length(clusters) + 1]] = new_cluster
589 6b5a528c Florent Chuffart
			}
590 6b5a528c Florent Chuffart
		}
591 6b5a528c Florent Chuffart
		return(clusters)
592 6b5a528c Florent Chuffart
	}
593 6b5a528c Florent Chuffart
	strain = samples[[1]]$strain
594 7e2d37e1 Florent Chuffart
	llr_scores = c()
595 6b5a528c Florent Chuffart
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
596 6b5a528c Florent Chuffart
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
597 6b5a528c Florent Chuffart
  # compute clusters
598 6b5a528c Florent Chuffart
  clusters = list()
599 6b5a528c Florent Chuffart
  cluster_contents = list()
600 6b5a528c Florent Chuffart
  # Init reader
601 6b5a528c Florent Chuffart
  indexes = c()
602 6b5a528c Florent Chuffart
  track_readers = c()
603 6b5a528c Florent Chuffart
  current_nuc = NULL
604 7e2d37e1 Florent Chuffart
	llr_score = llr_thres + 1
605 55c1cdff Florent Chuffart
  # Read nucs from TF outputs
606 6b5a528c Florent Chuffart
  tf_outs = list()
607 6b5a528c Florent Chuffart
	i = 1
608 6b5a528c Florent Chuffart
  for (sample in samples) {
609 6b5a528c Florent Chuffart
		tf_outs[[i]] = sample$outputs
610 55c1cdff Florent Chuffart
		tf_outs[[i]] = tf_outs[[i]][order(tf_outs[[i]]$center),]
611 6b5a528c Florent Chuffart
    indexes[i] = 1
612 6b5a528c Florent Chuffart
		if (is.na(tf_outs[[i]][indexes[i],]$center)) {
613 5bfac5a3 Florent Chuffart
      track_readers[i] = coord_max
614 6b5a528c Florent Chuffart
	  } else {
615 5bfac5a3 Florent Chuffart
      track_readers[i] = tf_outs[[i]][indexes[i],]$center
616 6b5a528c Florent Chuffart
		}
617 5bfac5a3 Florent Chuffart
		i = i+1
618 6b5a528c Florent Chuffart
  }
619 d973538c Florent Chuffart
  nb_tracks = length(tf_outs)
620 6b5a528c Florent Chuffart
	# print(track_readers)
621 6b5a528c Florent Chuffart
  new_cluster = NULL
622 6b5a528c Florent Chuffart
  nb_nucs_in_cluster = 0
623 6b5a528c Florent Chuffart
  nuc_from_track = c()
624 d973538c Florent Chuffart
  for (i in 1:nb_tracks){
625 6b5a528c Florent Chuffart
    nuc_from_track[i] = FALSE
626 6b5a528c Florent Chuffart
  }
627 6b5a528c Florent Chuffart
  # Start clustering
628 6b5a528c Florent Chuffart
  while (!end_of_tracks(track_readers)) {
629 6b5a528c Florent Chuffart
    new_center = min(track_readers)
630 5bfac5a3 Florent Chuffart
		current_track = which(track_readers == new_center)[1]
631 6b5a528c Florent Chuffart
    new_nuc = as.list(tf_outs[[current_track]][indexes[current_track],])
632 6b5a528c Florent Chuffart
		new_nuc$chr = substr(new_nuc$chr,4,1000000L)
633 6b5a528c Florent Chuffart
		new_nuc$inputs = samples[[current_track]]$inputs
634 6b5a528c Florent Chuffart
		new_nuc$chr = samples[[current_track]]$roi$chr
635 6b5a528c Florent Chuffart
		new_nuc$track = current_track
636 5bfac5a3 Florent Chuffart
637 55c1cdff Florent Chuffart
		new_nuc$inputs = filter_tf_inputs(samples[[current_track]]$inputs, new_nuc$chr, new_nuc$lower_bound, new_nuc$upper_bound, new_nuc$width)
638 6b5a528c Florent Chuffart
		flatted_reads = flat_reads(new_nuc$inputs, new_nuc$width)
639 6b5a528c Florent Chuffart
		new_nuc$original_reads = flatted_reads[[3]]
640 6b5a528c Florent Chuffart
641 6b5a528c Florent Chuffart
    new_upper_bound = new_nuc$upper_bound
642 6b5a528c Florent Chuffart
643 6b5a528c Florent Chuffart
    if (!is.null(current_nuc)) {
644 7e2d37e1 Florent Chuffart
			llr_score = llr_score_nvecs(list(current_nuc$original_reads,new_nuc$original_reads))
645 7e2d37e1 Florent Chuffart
			llr_scores = c(llr_scores,llr_score)
646 6b5a528c Florent Chuffart
		}
647 7e2d37e1 Florent Chuffart
		# print(paste(llr_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
648 7e2d37e1 Florent Chuffart
		if (is.na(llr_score)) {
649 7e2d37e1 Florent Chuffart
			llr_score = llr_thres + 1
650 6b5a528c Florent Chuffart
		}
651 7e2d37e1 Florent Chuffart
		# Store llr_score
652 7e2d37e1 Florent Chuffart
		new_nuc$llr_score = llr_score
653 7e2d37e1 Florent Chuffart
	  if (llr_score < llr_thres) {
654 6b5a528c Florent Chuffart
      # aggregate to current cluster
655 6b5a528c Florent Chuffart
      #   update bound
656 6b5a528c Florent Chuffart
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
657 55c1cdff Florent Chuffart
        new_cluster$upper_bound = new_nuc$upper_bound
658 6b5a528c Florent Chuffart
      }
659 6b5a528c Florent Chuffart
      if (new_nuc$lower_bound < new_cluster$lower_bound) {
660 55c1cdff Florent Chuffart
        new_cluster$lower_bound = new_nuc$lower_bound
661 6b5a528c Florent Chuffart
      }
662 6b5a528c Florent Chuffart
      #   add nucleosome to current cluster
663 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
664 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
665 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
666 6b5a528c Florent Chuffart
    } else {
667 6b5a528c Florent Chuffart
			if (!is.null(new_cluster)) {
668 6b5a528c Florent Chuffart
        # store old cluster
669 d973538c Florent Chuffart
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
670 6b5a528c Florent Chuffart
			}
671 6b5a528c Florent Chuffart
      # Reinit current cluster composition stuff
672 6b5a528c Florent Chuffart
      nb_nucs_in_cluster = 0
673 6b5a528c Florent Chuffart
      nuc_from_track = c()
674 d973538c Florent Chuffart
      for (i in 1:nb_tracks){
675 6b5a528c Florent Chuffart
        nuc_from_track[i] = FALSE
676 6b5a528c Florent Chuffart
      }
677 55c1cdff Florent Chuffart
      # create new cluster
678 6b5a528c Florent Chuffart
      new_cluster = list(lower_bound=new_nuc$low, upper_bound=new_nuc$up, chr=new_nuc$chr, strain_ref=strain , nucs=list())
679 6b5a528c Florent Chuffart
      # update upper bound
680 6b5a528c Florent Chuffart
      current_upper_bound = new_upper_bound
681 6b5a528c Florent Chuffart
      # add nucleosome to current cluster
682 55c1cdff Florent Chuffart
      nb_nucs_in_cluster = nb_nucs_in_cluster + 1
683 6b5a528c Florent Chuffart
      nuc_from_track[current_track] = TRUE
684 6b5a528c Florent Chuffart
			new_cluster$nucs[[length(new_cluster$nucs)+1]] = new_nuc
685 5bfac5a3 Florent Chuffart
686 6b5a528c Florent Chuffart
		}
687 5bfac5a3 Florent Chuffart
688 6b5a528c Florent Chuffart
		current_nuc = new_nuc
689 6b5a528c Florent Chuffart
690 6b5a528c Florent Chuffart
    # update indexes
691 55c1cdff Florent Chuffart
    if (indexes[current_track] < length(tf_outs[[current_track]]$center)) {
692 6b5a528c Florent Chuffart
      indexes[current_track] = indexes[current_track] + 1
693 6b5a528c Florent Chuffart
      # update track
694 6b5a528c Florent Chuffart
      track_readers[current_track] = tf_outs[[current_track]][indexes[current_track],]$center
695 6b5a528c Florent Chuffart
    } else {
696 6b5a528c Florent Chuffart
      # update track
697 6b5a528c Florent Chuffart
      track_readers[current_track] = coord_max
698 6b5a528c Florent Chuffart
    }
699 6b5a528c Florent Chuffart
  }
700 6b5a528c Florent Chuffart
  # store last cluster
701 6b5a528c Florent Chuffart
  if (!is.null(new_cluster)) {
702 6b5a528c Florent Chuffart
    # store old cluster
703 d973538c Florent Chuffart
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
704 6b5a528c Florent Chuffart
  }
705 7e2d37e1 Florent Chuffart
	return(list(clusters, llr_scores))
706 7e2d37e1 Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed llr scores.
707 5bfac5a3 Florent Chuffart
}, ex=function(){
708 6b5a528c Florent Chuffart
	# Dealing with a region of interest
709 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
710 6b5a528c Florent Chuffart
	samples = list()
711 6b5a528c Florent Chuffart
	for (i in 1:3) {
712 6b5a528c Florent Chuffart
		# Create TF output
713 6b5a528c Florent Chuffart
		tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
714 6b5a528c Florent Chuffart
		outputs = dfadd(NULL,tf_nuc)
715 6b5a528c Florent Chuffart
		outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
716 6b5a528c Florent Chuffart
		# Generate corresponding reads
717 6b5a528c Florent Chuffart
		nb_reads = round(runif(1,170,230))
718 6b5a528c Florent Chuffart
		reads = round(rnorm(nb_reads, tf_nuc$center,20))
719 6b5a528c Florent Chuffart
		u_reads = sort(unique(reads))
720 6b5a528c Florent Chuffart
		strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
721 6b5a528c Florent Chuffart
		counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
722 6b5a528c Florent Chuffart
		shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
723 6b5a528c Florent Chuffart
		u_reads = u_reads + shifts
724 55c1cdff Florent Chuffart
		inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
725 55c1cdff Florent Chuffart
		                         "V2" = u_reads,
726 55c1cdff Florent Chuffart
														 "V3" = strands,
727 6b5a528c Florent Chuffart
														 "V4" = counts), stringsAsFactors=FALSE)
728 6b5a528c Florent Chuffart
		samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain="strain_ex", total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
729 6b5a528c Florent Chuffart
	}
730 6b5a528c Florent Chuffart
	print(aggregate_intra_strain_nucs(samples))
731 6b5a528c Florent Chuffart
})
732 6b5a528c Florent Chuffart
733 7646593d Florent Chuffart
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
734 7646593d Florent Chuffart
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
735 7646593d Florent Chuffart
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
736 d973538c Florent Chuffart
cur_index, ##<< the index of the roi involved
737 d973538c Florent Chuffart
nb_tracks=3 ##<< the number of replicates
738 7646593d Florent Chuffart
) {
739 7646593d Florent Chuffart
	if  (length(partial_strain_maps) == 0 ){
740 4016229d Florent Chuffart
		print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." ))
741 55c1cdff Florent Chuffart
    tmp_strain_maps = list()
742 7646593d Florent Chuffart
	} else {
743 7646593d Florent Chuffart
		tmp_strain_map = apply(t(1:length(partial_strain_maps)), 2, function(i){
744 7646593d Florent Chuffart
			tmp_nuc = partial_strain_maps[[i]]
745 7646593d Florent Chuffart
			tmp_nuc_as_list = list()
746 7646593d Florent Chuffart
			tmp_nuc_as_list[["chr"]] = tmp_nuc[["chr"]]
747 7646593d Florent Chuffart
			tmp_nuc_as_list[["lower_bound"]] = ceiling(tmp_nuc[["lower_bound"]])
748 7646593d Florent Chuffart
			tmp_nuc_as_list[["upper_bound"]] = floor(tmp_nuc[["upper_bound"]])
749 4016229d Florent Chuffart
			tmp_nuc_as_list[["cur_index"]] = cur_index
750 7646593d Florent Chuffart
			tmp_nuc_as_list[["index_nuc"]] = i
751 7646593d Florent Chuffart
			tmp_nuc_as_list[["wp"]] = as.integer(tmp_nuc$wp)
752 7646593d Florent Chuffart
			all_original_reads = c()
753 7646593d Florent Chuffart
			for (j in 1:length(tmp_nuc$nucs)) {
754 7646593d Florent Chuffart
				all_original_reads = c(all_original_reads, tmp_nuc$nucs[[j]]$original_reads)
755 7646593d Florent Chuffart
			}
756 7646593d Florent Chuffart
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
757 0d5fbf02 Florent Chuffart
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
758 7646593d Florent Chuffart
			if (tmp_nuc$wp) {
759 d973538c Florent Chuffart
        for (i in 1:(nb_tracks-1)) {
760 d973538c Florent Chuffart
  				tmp_nuc_as_list[[paste("llr", i, sep="_")]] = signif(tmp_nuc$nucs[[i + 1]]$llr_score,5)          
761 d973538c Florent Chuffart
        }
762 7646593d Florent Chuffart
			} else {
763 d973538c Florent Chuffart
        for (i in 1:(nb_tracks-1)) {
764 d973538c Florent Chuffart
  				tmp_nuc_as_list[[paste("llr", i, sep="_")]] = NA
765 d973538c Florent Chuffart
        }
766 7646593d Florent Chuffart
			}
767 7646593d Florent Chuffart
      return(tmp_nuc_as_list)
768 7646593d Florent Chuffart
    })
769 7646593d Florent Chuffart
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
770 7646593d Florent Chuffart
	}
771 21b8928f Florent Chuffart
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
772 7646593d Florent Chuffart
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
773 7646593d Florent Chuffart
}
774 6b5a528c Florent Chuffart
775 6b5a528c Florent Chuffart
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
776 e5603c3f Florent Chuffart
### This function aligns nucleosomes between two strains for a given genome region.
777 55c1cdff Florent Chuffart
replicates, ##<< Set of replicates, ideally 3 per strain.
778 e5603c3f Florent Chuffart
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's NULL this list will be computed.
779 e5603c3f Florent Chuffart
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's NULL this list will be computed.
780 6b5a528c Florent Chuffart
corr_thres=0.5, ##<< Correlation threshold.
781 e5603c3f Florent Chuffart
llr_thres=100, ##<< Log likelihood ratio threshold to decide between merging and separating
782 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
783 5bfac5a3 Florent Chuffart
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
784 6b5a528c Florent Chuffart
) {
785 6b5a528c Florent Chuffart
	if (length(replicates) < 2) {
786 6b5a528c Florent Chuffart
		stop("ERROR, align_inter_strain_nucs needs 2 replicate sets.")
787 6b5a528c Florent Chuffart
	} else if (length(replicates) > 2) {
788 5bfac5a3 Florent Chuffart
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
789 6b5a528c Florent Chuffart
	}
790 6b5a528c Florent Chuffart
	common_nuc = NULL
791 7e2d37e1 Florent Chuffart
	llr_scores = c()
792 6b5a528c Florent Chuffart
	chr = replicates[[1]][[1]]$roi$chr
793 6b5a528c Florent Chuffart
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
794 6b5a528c Florent Chuffart
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
795 6b5a528c Florent Chuffart
	strain_ref1 = replicates[[1]][[1]]$strain
796 6b5a528c Florent Chuffart
	strain_ref2 = replicates[[2]][[1]]$strain
797 4016229d Florent Chuffart
	big_cur = replicates[[1]][[1]]$roi
798 4016229d Florent Chuffart
  orig_big_cur = replicates[[1]][[1]]$orig_roi
799 4016229d Florent Chuffart
	if(big_cur$end - big_cur$begin < 0) {
800 4016229d Florent Chuffart
		tmp_begin = big_cur$begin
801 4016229d Florent Chuffart
		big_cur$begin =  big_cur$end
802 4016229d Florent Chuffart
		big_cur$end =  tmp_begin
803 6b5a528c Florent Chuffart
	}
804 6b5a528c Florent Chuffart
	# GO!
805 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref1)) {
806 6b5a528c Florent Chuffart
		wp_nucs_strain_ref1 = aggregate_intra_strain_nucs(replicates[[1]], ...)[[1]]
807 6b5a528c Florent Chuffart
	}
808 6b5a528c Florent Chuffart
	if (is.null(wp_nucs_strain_ref2)) {
809 6b5a528c Florent Chuffart
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
810 6b5a528c Florent Chuffart
  }
811 6b5a528c Florent Chuffart
	lws = c()
812 6b5a528c Florent Chuffart
	ups = c()
813 6b5a528c Florent Chuffart
	for (na in wp_nucs_strain_ref2) {
814 6b5a528c Florent Chuffart
		lws = c(lws, na$lower_bound)
815 6b5a528c Florent Chuffart
		ups = c(ups, na$upper_bound)
816 6b5a528c Florent Chuffart
	}
817 6b5a528c Florent Chuffart
818 5bfac5a3 Florent Chuffart
	print(paste("Exploring chr" , chr , ", " , length(wp_nucs_strain_ref1) , ", [" , min_nuc_center , ", " , max_nuc_center , "] nucs...", sep=""))
819 6b5a528c Florent Chuffart
	roi_strain_ref1 = NULL
820 6b5a528c Florent Chuffart
	roi_strain_ref2 = NULL
821 6b5a528c Florent Chuffart
	if (length(wp_nucs_strain_ref1) > 0) {
822 6b5a528c Florent Chuffart
		for(index_nuc_strain_ref1 in 1:length(wp_nucs_strain_ref1)){
823 6b5a528c Florent Chuffart
			# print(paste("" , index_nuc_strain_ref1 , "/" , length(wp_nucs_strain_ref1), sep=""))
824 6b5a528c Florent Chuffart
			nuc_strain_ref1 = wp_nucs_strain_ref1[[index_nuc_strain_ref1]]
825 6b5a528c Florent Chuffart
			# Filtering on Well Positionned
826 883439d6 Florent Chuffart
			if (nuc_strain_ref1$wp) {
827 6b5a528c Florent Chuffart
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
828 4016229d Florent Chuffart
				roi_strain_ref2 = translate_cur(roi_strain_ref1, strain_ref2, big_cur=orig_big_cur, config=config)
829 1d833b97 Florent Chuffart
        if (!is.null(roi_strain_ref2)){
830 6b5a528c Florent Chuffart
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
831 6b5a528c Florent Chuffart
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
832 6b5a528c Florent Chuffart
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
833 6b5a528c Florent Chuffart
					matching_nas = which( lower_bound_roi_strain_ref2 <= ups & lws <= upper_bound_roi_strain_ref2)
834 6b5a528c Florent Chuffart
					for (index_nuc_strain_ref2 in matching_nas) {
835 6b5a528c Florent Chuffart
						nuc_strain_ref2 = wp_nucs_strain_ref2[[index_nuc_strain_ref2]]
836 6b5a528c Florent Chuffart
						# Filtering on Well Positionned
837 55c1cdff Florent Chuffart
    				nuc_strain_ref2_to_roi = list(begin=nuc_strain_ref2$lower_bound, end=nuc_strain_ref2$upper_bound, chr=nuc_strain_ref2$chr, strain_ref = strain_ref2)
838 4016229d Florent Chuffart
						if (!is.null(translate_cur(nuc_strain_ref2_to_roi, strain_ref1, big_cur=orig_big_cur, config=config)) &
839 1e34bb1f Florent Chuffart
                nuc_strain_ref2$wp) {
840 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score and collecting reads
841 6b5a528c Florent Chuffart
							SKIP = FALSE
842 6b5a528c Florent Chuffart
							# TODO: This for loop could be done before working on strain_ref2. Isn't it?
843 6b5a528c Florent Chuffart
							reads_strain_ref1 = c()
844 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref1$nucs){
845 5bfac5a3 Florent Chuffart
								reads_strain_ref1 = c(reads_strain_ref1, nuc$original_reads)
846 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
847 6b5a528c Florent Chuffart
									SKIP = TRUE
848 6b5a528c Florent Chuffart
								}
849 6b5a528c Florent Chuffart
							}
850 6b5a528c Florent Chuffart
							reads_strain_ref2 = c()
851 6b5a528c Florent Chuffart
							for (nuc in nuc_strain_ref2$nucs){
852 5bfac5a3 Florent Chuffart
								reads_strain_ref2 = c(reads_strain_ref2, nuc$original_reads)
853 6b5a528c Florent Chuffart
								if (nuc$corr < corr_thres) {
854 6b5a528c Florent Chuffart
									SKIP = TRUE
855 6b5a528c Florent Chuffart
								}
856 6b5a528c Florent Chuffart
							}
857 5bfac5a3 Florent Chuffart
							# Filtering on correlation Score
858 6b5a528c Florent Chuffart
							if (!SKIP) {
859 6b5a528c Florent Chuffart
								# tranlation of reads into strain 2 coords
860 6b5a528c Florent Chuffart
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
861 6b5a528c Florent Chuffart
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
862 7e2d37e1 Florent Chuffart
								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
863 7e2d37e1 Florent Chuffart
								llr_scores = c(llr_scores, llr_score)
864 e5603c3f Florent Chuffart
								# Filtering on LLR Score
865 67157457 Florent Chuffart
                if (llr_score < llr_thres) {
866 6b5a528c Florent Chuffart
									tmp_nuc = list()
867 6b5a528c Florent Chuffart
									# strain_ref1
868 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
869 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$lower_bound
870 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref1, sep="")]] = nuc_strain_ref1$upper_bound
871 6b5a528c Florent Chuffart
									tmp_nuc[[paste("mean_", strain_ref1, sep="")]] = signif(mean(reads_strain_ref1),5)
872 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref1, sep="")]] = signif(sd(reads_strain_ref1),5)
873 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref1, sep="")]] = length(reads_strain_ref1)
874 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref1, sep="")]] = index_nuc_strain_ref1
875 6b5a528c Florent Chuffart
									# strain_ref2
876 6b5a528c Florent Chuffart
									tmp_nuc[[paste("chr_", strain_ref2, sep="")]] = roi_strain_ref2$chr
877 6b5a528c Florent Chuffart
									tmp_nuc[[paste("lower_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$lower_bound
878 6b5a528c Florent Chuffart
									tmp_nuc[[paste("upper_bound_", strain_ref2, sep="")]] = nuc_strain_ref2$upper_bound
879 6b5a528c Florent Chuffart
									tmp_nuc[[paste("means_", strain_ref2, sep="")]] = signif(mean(reads_strain_ref2),5)
880 6b5a528c Florent Chuffart
									tmp_nuc[[paste("sd_", strain_ref2, sep="")]] = signif(sd(reads_strain_ref2),5)
881 6b5a528c Florent Chuffart
									tmp_nuc[[paste("nb_reads_", strain_ref2, sep="")]] = length(reads_strain_ref2)
882 6b5a528c Florent Chuffart
									tmp_nuc[[paste("index_nuc_", strain_ref2, sep="")]] = index_nuc_strain_ref2
883 6b5a528c Florent Chuffart
									# common
884 7e2d37e1 Florent Chuffart
									tmp_nuc[["llr_score"]] = signif(llr_score,5)
885 dadb6a4d Florent Chuffart
                  tmp_nuc[["common_wp_pval"]] = signif(1-pchisq(2*tmp_nuc[["llr_score"]], df=2),5)
886 dadb6a4d Florent Chuffart
									tmp_nuc[["diff"]] = abs(abs((roi_strain_ref2$begin - roi_strain_ref2$end)) / 2 - abs((nuc_strain_ref2$lower_bound - nuc_strain_ref2$upper_bound)) / 2)
887 6b5a528c Florent Chuffart
									common_nuc = dfadd(common_nuc, tmp_nuc)
888 ec2936ea Florent Chuffart
                }
889 6b5a528c Florent Chuffart
							}
890 6b5a528c Florent Chuffart
						}
891 5bfac5a3 Florent Chuffart
					}
892 6b5a528c Florent Chuffart
				} else {
893 6b5a528c Florent Chuffart
		      print("WARNING! No roi for strain ref 2.")
894 6b5a528c Florent Chuffart
			  }
895 6b5a528c Florent Chuffart
		  }
896 6b5a528c Florent Chuffart
		}
897 5bfac5a3 Florent Chuffart
898 67157457 Florent Chuffart
    if(length(unique(common_nuc[,1:3])[,1]) != length((common_nuc[,1:3])[,1])) {
899 67157457 Florent Chuffart
      index_redundant = which(apply(common_nuc[,1:3][-length(common_nuc[,1]),] ==  common_nuc[,1:3][-1,] ,1,sum) == 3)
900 67157457 Florent Chuffart
      to_remove_list = c()
901 67157457 Florent Chuffart
      for (i in 1:length(index_redundant)) {
902 67157457 Florent Chuffart
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
903 67157457 Florent Chuffart
          to_remove = index_redundant[i]
904 67157457 Florent Chuffart
        }   else {
905 67157457 Florent Chuffart
          to_remove = index_redundant[i] + 1
906 67157457 Florent Chuffart
        }
907 67157457 Florent Chuffart
        to_remove_list = c(to_remove_list, to_remove)
908 67157457 Florent Chuffart
      }
909 67157457 Florent Chuffart
      common_nuc = common_nuc[-to_remove_list,]
910 67157457 Florent Chuffart
    }
911 5bfac5a3 Florent Chuffart
912 67157457 Florent Chuffart
    if(length(unique(common_nuc[,8:10])[,1]) != length((common_nuc[,8:10])[,1])) {
913 67157457 Florent Chuffart
      index_redundant = which(apply(common_nuc[,8:10][-length(common_nuc[,1]),] == common_nuc[,8:10][-1,] ,1,sum) == 3)
914 67157457 Florent Chuffart
      to_remove_list = c()
915 67157457 Florent Chuffart
      for (i in 1:length(index_redundant)) {
916 67157457 Florent Chuffart
        if (common_nuc[index_redundant[i],15] < common_nuc[index_redundant[i]+1,15]) {
917 67157457 Florent Chuffart
          to_remove = index_redundant[i]
918 67157457 Florent Chuffart
        }   else {
919 67157457 Florent Chuffart
          to_remove = index_redundant[i] + 1
920 67157457 Florent Chuffart
        }
921 67157457 Florent Chuffart
        to_remove_list = c(to_remove_list, to_remove)
922 67157457 Florent Chuffart
      }
923 67157457 Florent Chuffart
      common_nuc = common_nuc[-to_remove_list,]
924 67157457 Florent Chuffart
    }
925 5bfac5a3 Florent Chuffart
926 7e2d37e1 Florent Chuffart
		return(list(common_nuc, llr_scores))
927 6b5a528c Florent Chuffart
	} else {
928 6b5a528c Florent Chuffart
		print("WARNING, no nucs for strain_ref1.")
929 6b5a528c Florent Chuffart
		return(NULL)
930 6b5a528c Florent Chuffart
	}
931 7e2d37e1 Florent Chuffart
### Returns a list of clusterized nucleosomes, and all computed llr scores.
932 5bfac5a3 Florent Chuffart
}, ex=function(){
933 1d833b97 Florent Chuffart
934 4016229d Florent Chuffart
    # Define new translate_cur function...
935 4016229d Florent Chuffart
    translate_cur = function(roi, strain2, big_cur=NULL, config=NULL) {
936 1d833b97 Florent Chuffart
      return(roi)
937 1d833b97 Florent Chuffart
    }
938 1d833b97 Florent Chuffart
    # Binding it by uncomment follwing lines.
939 4016229d Florent Chuffart
    unlockBinding("translate_cur", as.environment("package:nucleominer"))
940 4016229d Florent Chuffart
    unlockBinding("translate_cur", getNamespace("nucleominer"))
941 4016229d Florent Chuffart
    assign("translate_cur", translate_cur, "package:nucleominer")
942 4016229d Florent Chuffart
    assign("translate_cur", translate_cur, getNamespace("nucleominer"))
943 4016229d Florent Chuffart
    lockBinding("translate_cur", getNamespace("nucleominer"))
944 4016229d Florent Chuffart
    lockBinding("translate_cur", as.environment("package:nucleominer"))
945 1d833b97 Florent Chuffart
946 6b5a528c Florent Chuffart
	# Dealing with a region of interest
947 6b5a528c Florent Chuffart
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
948 4016229d Florent Chuffart
	roi2 = translate_cur(roi, roi$strain_ref1)
949 6b5a528c Florent Chuffart
	replicates = list()
950 6b5a528c Florent Chuffart
	for (j in 1:2) {
951 6b5a528c Florent Chuffart
		samples = list()
952 6b5a528c Florent Chuffart
		for (i in 1:3) {
953 6b5a528c Florent Chuffart
			# Create TF output
954 6b5a528c Florent Chuffart
			tf_nuc = list("chr"=paste("chr", roi$chr, sep=""), "center"=(roi$end + roi$begin)/2, "width"= 150, "correlation.score"= 0.9)
955 6b5a528c Florent Chuffart
			outputs = dfadd(NULL,tf_nuc)
956 6b5a528c Florent Chuffart
			outputs = filter_tf_outputs(outputs, roi$chr, roi$begin, roi$end)
957 6b5a528c Florent Chuffart
			# Generate corresponding reads
958 6b5a528c Florent Chuffart
			nb_reads = round(runif(1,170,230))
959 6b5a528c Florent Chuffart
			reads = round(rnorm(nb_reads, tf_nuc$center,20))
960 6b5a528c Florent Chuffart
			u_reads = sort(unique(reads))
961 6b5a528c Florent Chuffart
			strands = sample(c(rep("R",ceiling(length(u_reads)/2)),rep("F",floor(length(u_reads)/2))))
962 6b5a528c Florent Chuffart
			counts = apply(t(u_reads), 2, function(r) { sum(reads == r)})
963 6b5a528c Florent Chuffart
			shifts = apply(t(strands), 2, function(s) { if (s == "F") return(-tf_nuc$width/2) else return(tf_nuc$width/2)})
964 6b5a528c Florent Chuffart
			u_reads = u_reads + shifts
965 55c1cdff Florent Chuffart
			inputs = data.frame(list("V1" = rep(roi$chr, length(u_reads)),
966 55c1cdff Florent Chuffart
			                         "V2" = u_reads,
967 55c1cdff Florent Chuffart
															 "V3" = strands,
968 6b5a528c Florent Chuffart
															 "V4" = counts), stringsAsFactors=FALSE)
969 6b5a528c Florent Chuffart
			samples[[length(samples) + 1]] = list(id=1, marker="Mnase_Seq", strain=paste("strain_ex",j,sep=""), total_reads = 10000000, roi=roi, inputs=inputs, outputs=outputs)
970 6b5a528c Florent Chuffart
		}
971 6b5a528c Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
972 6b5a528c Florent Chuffart
	}
973 6b5a528c Florent Chuffart
	print(align_inter_strain_nucs(replicates))
974 6b5a528c Florent Chuffart
})
975 6b5a528c Florent Chuffart
976 6b5a528c Florent Chuffart
977 6b5a528c Florent Chuffart
978 6b5a528c Florent Chuffart
979 6b5a528c Florent Chuffart
980 6b5a528c Florent Chuffart
981 6b5a528c Florent Chuffart
982 6b5a528c Florent Chuffart
983 6b5a528c Florent Chuffart
984 6b5a528c Florent Chuffart
985 6b5a528c Florent Chuffart
986 6b5a528c Florent Chuffart
987 6b5a528c Florent Chuffart
988 6b5a528c Florent Chuffart
989 6b5a528c Florent Chuffart
990 6b5a528c Florent Chuffart
991 6b5a528c Florent Chuffart
992 6b5a528c Florent Chuffart
993 6b5a528c Florent Chuffart
994 6b5a528c Florent Chuffart
995 6b5a528c Florent Chuffart
996 6b5a528c Florent Chuffart
997 6b5a528c Florent Chuffart
998 b26ac9e7 Florent Chuffart
mread.table = 
999 b26ac9e7 Florent Chuffart
### memoised version of read.table
1000 b26ac9e7 Florent Chuffart
memoise(
1001 b26ac9e7 Florent Chuffart
  function(file, ...){
1002 b26ac9e7 Florent Chuffart
  print(paste("Memoising ", file, "...", sep=""))
1003 b26ac9e7 Florent Chuffart
  read.table(file, ...)
1004 b26ac9e7 Florent Chuffart
})
1005 b26ac9e7 Florent Chuffart
mread.fasta = memoise(function( 
1006 b26ac9e7 Florent Chuffart
file, ...){
1007 b26ac9e7 Florent Chuffart
  print(paste("Memoising ", file, "...", sep=""))
1008 b26ac9e7 Florent Chuffart
  read.fasta(file, ...)
1009 b26ac9e7 Florent Chuffart
})
1010 6b5a528c Florent Chuffart
1011 6b5a528c Florent Chuffart
1012 6b5a528c Florent Chuffart
fetch_mnase_replicates = function(# Prefetch data
1013 6b5a528c Florent Chuffart
### Fetch and filter inputs and outpouts per region of interest. Organize it per replicates.
1014 6b5a528c Florent Chuffart
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
1015 6b5a528c Florent Chuffart
roi, ##<< Region of interest.
1016 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
1017 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
1018 6b5a528c Florent Chuffart
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
1019 6b5a528c Florent Chuffart
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
1020 6b5a528c Florent Chuffart
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
1021 6b5a528c Florent Chuffart
) {
1022 6b5a528c Florent Chuffart
	samples=list()
1023 6b5a528c Florent Chuffart
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
1024 6b5a528c Florent Chuffart
	for (i in samples_ids) {
1025 6b5a528c Florent Chuffart
		sample = as.list(all_samples[all_samples$id==i,])
1026 a0b91fee Florent Chuffart
    sample$orig_roi = roi
1027 4016229d Florent Chuffart
    sample$roi = translate_cur(roi, sample$strain, config = config)
1028 6b5a528c Florent Chuffart
		if (get_genome) {
1029 6b5a528c Florent Chuffart
			# Get Genome
1030 b26ac9e7 Florent Chuffart
      sample$roi$genome = mread.fasta(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]])[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1031 6b5a528c Florent Chuffart
		}
1032 6b5a528c Florent Chuffart
		# Get inputs
1033 780f632a Florent Chuffart
    if ("tf_input" %in% names(all_samples)) {
1034 780f632a Florent Chuffart
      sample_inputs_filename = all_samples$tf_input[all_samples$id==i]
1035 780f632a Florent Chuffart
    } else {
1036 780f632a Florent Chuffart
      sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1037 780f632a Florent Chuffart
    }
1038 780f632a Florent Chuffart
		sample$inputs = mread.table(sample_inputs_filename, stringsAsFactors=FALSE)
1039 5bfac5a3 Florent Chuffart
		sample$total_reads = sum(sample$inputs[,4])
1040 6b5a528c Florent Chuffart
		if (!only_fetch) {
1041 6b5a528c Florent Chuffart
		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1042 6b5a528c Florent Chuffart
	  }
1043 6b5a528c Florent Chuffart
	  # Get TF outputs for Mnase_Seq samples
1044 5bfac5a3 Florent Chuffart
		if (sample$marker == "Mnase_Seq" & get_ouputs) {
1045 780f632a Florent Chuffart
      if ("tf_output" %in% names(all_samples)) {
1046 780f632a Florent Chuffart
        sample_outputs_filename = all_samples$tf_output[all_samples$id==i]
1047 780f632a Florent Chuffart
      } else {
1048 780f632a Florent Chuffart
        sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")                
1049 780f632a Florent Chuffart
      }
1050 780f632a Florent Chuffart
      sample$outputs = mread.table(sample_outputs_filename, header=TRUE, sep="\t")
1051 6b5a528c Florent Chuffart
			if (!only_fetch) {
1052 6b5a528c Florent Chuffart
	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
1053 6b5a528c Florent Chuffart
  		}
1054 6b5a528c Florent Chuffart
		}
1055 5bfac5a3 Florent Chuffart
		samples[[length(samples) + 1]] = sample
1056 6b5a528c Florent Chuffart
	}
1057 6b5a528c Florent Chuffart
  return(samples)
1058 6b5a528c Florent Chuffart
}
1059 6b5a528c Florent Chuffart
1060 6b5a528c Florent Chuffart
substract_region = function(# Substract to a list of regions an other list of regions that intersect it.
1061 6b5a528c Florent Chuffart
### This fucntion embed a recursive part. It occurs when a substracted region split an original region on two.
1062 55c1cdff Florent Chuffart
region1, ##<< Original regions.
1063 6b5a528c Florent Chuffart
region2 ##<< Regions to substract.
1064 6b5a528c Florent Chuffart
) {
1065 6b5a528c Florent Chuffart
  rec_substract_region = function(region1, region2) {
1066 6b5a528c Florent Chuffart
  non_inter_fuzzy = apply(t(1:length(region1[,1])), 2, function(i) {
1067 6b5a528c Florent Chuffart
    cur_fuzzy = region1[i,]
1068 6b5a528c Florent Chuffart
    inter_wp = region2[region2$lower_bound <= cur_fuzzy$upper_bound & region2$upper_bound >= cur_fuzzy$lower_bound,]
1069 6b5a528c Florent Chuffart
    if (length(inter_wp[,1]) > 0) {
1070 6b5a528c Florent Chuffart
      ret = c()
1071 6b5a528c Florent Chuffart
      for (j in 1:length(inter_wp[,1])) {
1072 6b5a528c Florent Chuffart
        cur_wp = inter_wp[j,]
1073 6b5a528c Florent Chuffart
        if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
1074 6b5a528c Florent Chuffart
          # remove cur_fuzzy
1075 6b5a528c Florent Chuffart
          ret = c()
1076 6b5a528c Florent Chuffart
          break
1077 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound <= cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
1078 55c1cdff Florent Chuffart
          # crop fuzzy
1079 55c1cdff Florent Chuffart
          cur_fuzzy$lower_bound = cur_wp$upper_bound + 1
1080 6b5a528c Florent Chuffart
          ret = cur_fuzzy
1081 6b5a528c Florent Chuffart
        } else if (cur_fuzzy$lower_bound < cur_wp$lower_bound & cur_fuzzy$upper_bound <= cur_wp$upper_bound) {
1082 55c1cdff Florent Chuffart
          # crop fuzzy
1083 55c1cdff Florent Chuffart
          cur_fuzzy$upper_bound = cur_wp$lower_bound - 1
1084 6b5a528c Florent Chuffart
          ret = cur_fuzzy
1085 6b5a528c Florent Chuffart
        } else if (cur_wp$lower_bound > cur_fuzzy$lower_bound & cur_wp$upper_bound < cur_fuzzy$upper_bound) {
1086 6b5a528c Florent Chuffart
          # split fuzzy
1087 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_1 = cur_fuzzy
1088 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_1$upper_bound = cur_wp$lower_bound - 1
1089 6b5a528c Florent Chuffart
          tmp_ret_fuzzy_2 = cur_fuzzy
1090 55c1cdff Florent Chuffart
          tmp_ret_fuzzy_2$lower_bound = cur_wp$upper_bound + 1
1091 6b5a528c Florent Chuffart
          ret = rec_substract_region(rbind(tmp_ret_fuzzy_1, tmp_ret_fuzzy_2), inter_wp)
1092 6b5a528c Florent Chuffart
          # print(ret)
1093 6b5a528c Florent Chuffart
          # ret = cur_fuzzy
1094 6b5a528c Florent Chuffart
          break
1095 6b5a528c Florent Chuffart
        } else {
1096 6b5a528c Florent Chuffart
          stop("WARNING NO ADAPTED CASE!")
1097 55c1cdff Florent Chuffart
        }
1098 6b5a528c Florent Chuffart
      }
1099 6b5a528c Florent Chuffart
      return(ret)
1100 6b5a528c Florent Chuffart
    } else {
1101 6b5a528c Florent Chuffart
      return(cur_fuzzy)
1102 6b5a528c Florent Chuffart
    }
1103 6b5a528c Florent Chuffart
  })
1104 6b5a528c Florent Chuffart
  }
1105 21b8928f Florent Chuffart
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
1106 6b5a528c Florent Chuffart
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
1107 b20637ed Florent Chuffart
  tmp_ulist = unlist(non_inter_fuzzy)
1108 b20637ed Florent Chuffart
  tmp_names = names(tmp_ulist)[1:4]
1109 55c1cdff Florent Chuffart
  non_inter_fuzzy = data.frame(matrix(tmp_ulist, ncol=4, byrow=TRUE), stringsAsFactors=FALSE)
1110 b20637ed Florent Chuffart
  names(non_inter_fuzzy) = tmp_names
1111 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.character(non_inter_fuzzy$chr)
1112 55c1cdff Florent Chuffart
  non_inter_fuzzy$chr = as.numeric(non_inter_fuzzy$chr)
1113 b20637ed Florent Chuffart
  non_inter_fuzzy$lower_bound = as.numeric(non_inter_fuzzy$lower_bound)
1114 b20637ed Florent Chuffart
  non_inter_fuzzy$upper_bound = as.numeric(non_inter_fuzzy$upper_bound)
1115 55c1cdff Florent Chuffart
  non_inter_fuzzy = non_inter_fuzzy[order(non_inter_fuzzy$lower_bound),]
1116 b20637ed Florent Chuffart
  return(non_inter_fuzzy)
1117 6b5a528c Florent Chuffart
}
1118 6b5a528c Florent Chuffart
1119 e5603c3f Florent Chuffart
union_regions = function(# Aggregate regions that intersect themselves.
1120 55c1cdff Florent Chuffart
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
1121 6b5a528c Florent Chuffart
regions ##<< The Regions to be aggregated
1122 6b5a528c Florent Chuffart
) {
1123 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {return(regions)}
1124 0303dbe8 Florent Chuffart
  if (nrow(regions) == 0) {return(regions)}
1125 6b5a528c Florent Chuffart
  old_length = length(regions[,1])
1126 6b5a528c Florent Chuffart
  new_length = 0
1127 55c1cdff Florent Chuffart
1128 6b5a528c Florent Chuffart
  while (old_length != new_length) {
1129 6b5a528c Florent Chuffart
    regions = regions[order(regions$lower_bound), ]
1130 55c1cdff Florent Chuffart
    regions$stop = !c(regions$lower_bound[-1] - regions$upper_bound[-length(regions$lower_bound)] <= 1, TRUE)
1131 6b5a528c Florent Chuffart
    vec_end_1 = which(regions$stop)
1132 6b5a528c Florent Chuffart
    if (length(vec_end_1) == 0) {
1133 55c1cdff Florent Chuffart
      vec_end_1 = c(length(regions$stop))
1134 55c1cdff Florent Chuffart
    }
1135 6b5a528c Florent Chuffart
    if (vec_end_1[length(vec_end_1)] != length(regions$stop)) {
1136 6b5a528c Florent Chuffart
      vec_end_1 = c(vec_end_1, length(regions$stop))
1137 6b5a528c Florent Chuffart
    }
1138 6b5a528c Florent Chuffart
    vec_beg_1 = c(1, vec_end_1[-length(vec_end_1)] + 1)
1139 6b5a528c Florent Chuffart
    union = apply(t(1:length(vec_beg_1)), 2, function(i) {
1140 6b5a528c Florent Chuffart
      chr = regions$chr[vec_beg_1[i]]
1141 6b5a528c Florent Chuffart
      lower_bound = min(regions$lower_bound[vec_beg_1[i]:vec_end_1[i]])
1142 6b5a528c Florent Chuffart
      upper_bound = max(regions$upper_bound[vec_beg_1[i]:vec_end_1[i]])
1143 4016229d Florent Chuffart
      cur_index = regions$cur_index[vec_beg_1[i]]
1144 4016229d Florent Chuffart
      data.frame(list(chr=chr, lower_bound=lower_bound, upper_bound=upper_bound, cur_index=cur_index))
1145 6b5a528c Florent Chuffart
      })
1146 59ad95ca Florent Chuffart
    union = collapse_regions(union)
1147 6b5a528c Florent Chuffart
    old_length = length(regions[,1])
1148 6b5a528c Florent Chuffart
    new_length = length(union[,1])
1149 6b5a528c Florent Chuffart
    regions = union
1150 6b5a528c Florent Chuffart
  }
1151 6b5a528c Florent Chuffart
  return(union)
1152 6b5a528c Florent Chuffart
}
1153 6b5a528c Florent Chuffart
1154 0303dbe8 Florent Chuffart
# remove_aligned_wp = function(# Remove wp nucs from common nucs list.
1155 0303dbe8 Florent Chuffart
# ### It is based on common wp nucs index on nucs and region.
1156 0303dbe8 Florent Chuffart
# strain_maps, ##<< Nuc maps.
1157 4016229d Florent Chuffart
# cur_index, ##<< The region of interest index.
1158 0303dbe8 Florent Chuffart
# tmp_common_nucs, ##<< the list of wp nucs.
1159 0303dbe8 Florent Chuffart
# strain##<< The strain to consider.
1160 0303dbe8 Florent Chuffart
# ){
1161 0303dbe8 Florent Chuffart
#   fuzzy_nucs = strain_maps[[strain]]
1162 4016229d Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[fuzzy_nucs$cur_index == cur_index,]
1163 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[order(fuzzy_nucs$index_nuc),]
1164 0303dbe8 Florent Chuffart
#   if (length(fuzzy_nucs[,1]) == 0) {return(fuzzy_nucs)}
1165 0303dbe8 Florent Chuffart
#   if (sum(fuzzy_nucs$index_nuc == min(fuzzy_nucs$index_nuc):max(fuzzy_nucs$index_nuc)) != max(fuzzy_nucs$index_nuc)) {"Warning in index!"}
1166 0303dbe8 Florent Chuffart
#   anti_index_1 = tmp_common_nucs[[paste("index_nuc", strain, sep="_")]]
1167 0303dbe8 Florent Chuffart
#   fuzzy_nucs = fuzzy_nucs[-anti_index_1,]
1168 0303dbe8 Florent Chuffart
#   return(fuzzy_nucs)
1169 0303dbe8 Florent Chuffart
# }
1170 6b5a528c Florent Chuffart
1171 6b5a528c Florent Chuffart
translate_regions = function(# Translate a list of regions from a strain ref to another.
1172 e5603c3f Florent Chuffart
### This function is an elaborated call to translate_cur.
1173 55c1cdff Florent Chuffart
regions, ##<< Regions to be translated.
1174 6b5a528c Florent Chuffart
combi, ##<< Combination of strains.
1175 4016229d Florent Chuffart
cur_index, ##<< The region of interest index.
1176 1d833b97 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
1177 6b5a528c Florent Chuffart
roi ##<< The region of interest.
1178 6b5a528c Florent Chuffart
) {
1179 6b5a528c Florent Chuffart
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
1180 6b5a528c Florent Chuffart
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
1181 4016229d Florent Chuffart
    big_cur =  roi
1182 4016229d Florent Chuffart
    trs_tmp_regions_ref2 = translate_cur(tmp_regions_ref2, combi[1], config = config, big_cur = big_cur)
1183 5bfac5a3 Florent Chuffart
    if (is.null(trs_tmp_regions_ref2)) {
1184 5bfac5a3 Florent Chuffart
      return(NULL)
1185 5bfac5a3 Florent Chuffart
    } else {
1186 4016229d Florent Chuffart
      return(data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), cur_index=cur_index)))
1187 5bfac5a3 Florent Chuffart
    }
1188 5bfac5a3 Florent Chuffart
  })
1189 5bfac5a3 Florent Chuffart
1190 59ad95ca Florent Chuffart
  return(collapse_regions(tr_regions))
1191 59ad95ca Florent Chuffart
}
1192 59ad95ca Florent Chuffart
1193 59ad95ca Florent Chuffart
collapse_regions = function(# reformat an "apply  manipulated" list of regions
1194 59ad95ca Florent Chuffart
### Utils to reformat an "apply  manipulated" list of regions
1195 59ad95ca Florent Chuffart
regions ##< a list of regions
1196 59ad95ca Florent Chuffart
) {
1197 5bfac5a3 Florent Chuffart
  if (is.null(regions)) {
1198 5bfac5a3 Florent Chuffart
    return(NULL)
1199 5bfac5a3 Florent Chuffart
  } else {
1200 5bfac5a3 Florent Chuffart
    regions = do.call(rbind, regions)
1201 5bfac5a3 Florent Chuffart
    regions$chr = as.character(regions$chr)
1202 5bfac5a3 Florent Chuffart
    regions$chr = as.numeric(regions$chr)
1203 5bfac5a3 Florent Chuffart
    regions$lower_bound = as.numeric(regions$lower_bound)
1204 5bfac5a3 Florent Chuffart
    regions$upper_bound = as.numeric(regions$upper_bound)
1205 5bfac5a3 Florent Chuffart
    regions = regions[order(regions$lower_bound),]
1206 5bfac5a3 Florent Chuffart
    return(regions)
1207 5bfac5a3 Florent Chuffart
  }
1208 6b5a528c Florent Chuffart
}
1209 6b5a528c Florent Chuffart
1210 6b5a528c Florent Chuffart
1211 6b5a528c Florent Chuffart
crop_fuzzy = function(# Crop bound of regions according to region of interest bound
1212 4016229d Florent Chuffart
### The fucntion is no more necessary since we remove "big_cur" bug in translate_cur function.
1213 6b5a528c Florent Chuffart
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
1214 6b5a528c Florent Chuffart
roi, ##<< The region of interest.
1215 1d833b97 Florent Chuffart
strain, ##<< The strain to consider.
1216 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1217 6b5a528c Florent Chuffart
) {
1218 4016229d Florent Chuffart
  tr_roi = translate_cur(roi, strain, config = config)
1219 6b5a528c Florent Chuffart
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
1220 6b5a528c Florent Chuffart
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
1221 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
1222 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,]$lower_bound = tr_roi_begin
1223 6b5a528c Florent Chuffart
  }
1224 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,1]) > 0) {
1225 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound < tr_roi_begin,]$upper_bound = tr_roi_begin
1226 6b5a528c Florent Chuffart
  }
1227 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,1]) > 0) {
1228 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound > tr_roi_end,]$lower_bound = tr_roi_end
1229 6b5a528c Florent Chuffart
  }
1230 6b5a528c Florent Chuffart
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,1]) > 0) {
1231 6b5a528c Florent Chuffart
    tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound > tr_roi_end,]$upper_bound = tr_roi_end
1232 6b5a528c Florent Chuffart
  }
1233 6b5a528c Florent Chuffart
  tmp_fuzzy_nucs = tmp_fuzzy_nucs[tmp_fuzzy_nucs$upper_bound != tmp_fuzzy_nucs$lower_bound,]
1234 6b5a528c Florent Chuffart
  return(tmp_fuzzy_nucs)
1235 6b5a528c Florent Chuffart
}
1236 6b5a528c Florent Chuffart
1237 55c1cdff Florent Chuffart
get_all_reads = function(# Retrieve Reads
1238 6b5a528c Florent Chuffart
### Retrieve reads for a given marker, combi, form.
1239 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
1240 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
1241 cc54c799 Florent Chuffart
form="wp", ##<< The nuc form to considere.
1242 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1243 6b5a528c Florent Chuffart
) {
1244 6b5a528c Florent Chuffart
	all_reads = NULL
1245 6b5a528c Florent Chuffart
  for (manip in c("Mnase_Seq", marker)) {
1246 4016229d Florent Chuffart
    if (form == "unr") {
1247 4016229d Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
1248 b26ac9e7 Florent Chuffart
  		tmp_res = mread.table(file=out_filename, header=TRUE)
1249 6b5a528c Florent Chuffart
			tmp_res = tmp_res[tmp_res[,3] - tmp_res[,2] > 75,]
1250 6b5a528c Florent Chuffart
      tmp_res$form = form
1251 6b5a528c Florent Chuffart
    } else if (form == "wp") {
1252 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
1253 b26ac9e7 Florent Chuffart
  		tmp_res = mread.table(file=out_filename, header=TRUE)
1254 6b5a528c Florent Chuffart
      tmp_res$form = form
1255 4016229d Florent Chuffart
    } else if (form == "wpunr") {
1256 cc54c799 Florent Chuffart
		 	out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_wp_and_nbreads.tab",sep="")
1257 b26ac9e7 Florent Chuffart
  		tmp_res = mread.table(file=out_filename, header=TRUE)
1258 6b5a528c Florent Chuffart
      tmp_res$form = "wp"
1259 4016229d Florent Chuffart
		  out_filename = paste(config$RESULTS_DIR, "/",combi[1],"_",combi[2],"_",manip,"_unr_and_nbreads.tab",sep="")
1260 b26ac9e7 Florent Chuffart
  		tmp_res2 = mread.table(file=out_filename, header=TRUE)
1261 6b5a528c Florent Chuffart
			tmp_res2 = tmp_res2[tmp_res2[,3] - tmp_res2[,2] > 75,]
1262 4016229d Florent Chuffart
      tmp_res2$form = "unr"
1263 6b5a528c Florent Chuffart
      tmp_res = rbind(tmp_res, tmp_res2)
1264 6b5a528c Florent Chuffart
    }
1265 6b5a528c Florent Chuffart
		if (is.null(all_reads)) {
1266 6b5a528c Florent Chuffart
			all_reads = tmp_res[,c(1:9,length(tmp_res))]
1267 55c1cdff Florent Chuffart
		}
1268 6b5a528c Florent Chuffart
		tmp_res = tmp_res[,-c(1:9,length(tmp_res))]
1269 6b5a528c Florent Chuffart
		all_reads = cbind(all_reads, tmp_res)
1270 6b5a528c Florent Chuffart
  }
1271 5bfac5a3 Florent Chuffart
  return(all_reads)
1272 6b5a528c Florent Chuffart
}
1273 6b5a528c Florent Chuffart
1274 e5603c3f Florent Chuffart
get_design = function(# Build the design for DESeq
1275 6b5a528c Florent Chuffart
### This function build the design according sample properties.
1276 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
1277 6b5a528c Florent Chuffart
combi, ##<< The starin combination to considere.
1278 6b5a528c Florent Chuffart
all_samples ##<< Global list of samples.
1279 6b5a528c Florent Chuffart
) {
1280 6b5a528c Florent Chuffart
  off1 = 0
1281 6b5a528c Florent Chuffart
  off2 = 0
1282 6b5a528c Florent Chuffart
	manips = c("Mnase_Seq", marker)
1283 6b5a528c Florent Chuffart
	design_rownames = c()
1284 6b5a528c Florent Chuffart
	design_manip = c()
1285 6b5a528c Florent Chuffart
	design_strain = c()
1286 5bfac5a3 Florent Chuffart
  off2index = function(off) {
1287 55c1cdff Florent Chuffart
  	switch(toString(off),
1288 6b5a528c Florent Chuffart
  		"1"=c(0,1,1),
1289 6b5a528c Florent Chuffart
  	  "2"=c(1,0,1),
1290 55c1cdff Florent Chuffart
    	"3"=c(1,1,0),
1291 6b5a528c Florent Chuffart
  		c(1,1,1)
1292 5bfac5a3 Florent Chuffart
  		)
1293 6b5a528c Florent Chuffart
  }
1294 6b5a528c Florent Chuffart
	for (manip in manips) {
1295 6b5a528c Florent Chuffart
		tmp_samples = all_samples[ ((all_samples$strain == combi[1] | all_samples$strain == combi[2]) &  all_samples$marker == manip), ]
1296 6b5a528c Florent Chuffart
		tmp_samples = tmp_samples[order(tmp_samples$strain), ]
1297 6b5a528c Florent Chuffart
		if (manip == "H3K4me1" & (off1 != 0 & off2 ==0 )) {
1298 6b5a528c Florent Chuffart
			tmp_samples = tmp_samples[c(off2index(off1), c(1,1)) == 1,]
1299 6b5a528c Florent Chuffart
		} else {
1300 6b5a528c Florent Chuffart
			if (manip != "Mnase_Seq" & (off1 != 0 | off2 !=0)) {
1301 6b5a528c Florent Chuffart
				tmp_samples = tmp_samples[c(off2index(off1), off2index(off2)) == 1,]
1302 6b5a528c Florent Chuffart
			}
1303 6b5a528c Florent Chuffart
		}
1304 5bfac5a3 Florent Chuffart
		design_manip = c(design_manip, rep(manip, length(tmp_samples$id)))
1305 5bfac5a3 Florent Chuffart
		for (strain in combi) {
1306 6b5a528c Florent Chuffart
			cols = apply(t(tmp_samples[ (tmp_samples$strain == strain &  tmp_samples$marker == manip), ]$id), 2, function(i){paste(strain, manip, i, sep="_")})
1307 6b5a528c Florent Chuffart
			design_strain = c(design_strain, rep(strain, length(cols)))
1308 6b5a528c Florent Chuffart
			design_rownames = c(design_rownames, cols)
1309 6b5a528c Florent Chuffart
		}
1310 6b5a528c Florent Chuffart
	}
1311 6b5a528c Florent Chuffart
	snep_design = data.frame( row.names=design_rownames, manip=design_manip, strain=design_strain)
1312 5bfac5a3 Florent Chuffart
	return(snep_design)
1313 6b5a528c Florent Chuffart
}
1314 6b5a528c Florent Chuffart
1315 6b5a528c Florent Chuffart
plot_dist_samples = function(# Plot the distribution of reads.
1316 e5603c3f Florent Chuffart
### This fuxntion use the DESeq nomalization feature to compare qualitatively the distribution.
1317 6b5a528c Florent Chuffart
strain, ##<< The strain to considere.
1318 6b5a528c Florent Chuffart
marker, ##<< The marker to considere.
1319 6b5a528c Florent Chuffart
res, ##<< Data
1320 6b5a528c Florent Chuffart
all_samples, ##<< Global list of samples.
1321 6b5a528c Florent Chuffart
NEWPLOT = TRUE ##<< If FALSE the curve will be add to the current plot.
1322 5bfac5a3 Florent Chuffart
) {
1323 6b5a528c Florent Chuffart
	cols = apply(t(all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id), 2, function(i){paste(strain, marker, i, sep="_")})
1324 6b5a528c Florent Chuffart
	snepCountTable = res[,cols]
1325 6b5a528c Florent Chuffart
	snepDesign = data.frame(
1326 6b5a528c Florent Chuffart
		row.names = cols,
1327 6b5a528c Florent Chuffart
		manip = rep(marker, length(cols)),
1328 6b5a528c Florent Chuffart
		strain = rep(strain, length(cols))
1329 6b5a528c Florent Chuffart
		)
1330 6b5a528c Florent Chuffart
	cdsFull = newCountDataSet(snepCountTable, snepDesign)
1331 6b5a528c Florent Chuffart
	sizeFactors = estimateSizeFactors(cdsFull)
1332 6b5a528c Florent Chuffart
	# print(sizeFactors[[1]])
1333 6b5a528c Florent Chuffart
	sample_ids = all_samples[ (all_samples$strain == strain &  all_samples$marker == marker), ]$id
1334 6b5a528c Florent Chuffart
	if (NEWPLOT) {
1335 5bfac5a3 Florent Chuffart
		plot(density(res[,paste(strain, marker, sample_ids[1], sep="_")] / sizeFactors[[1]][1]), col=0, main=paste(strain, marker))
1336 6b5a528c Florent Chuffart
		NEWPLOT = FALSE
1337 6b5a528c Florent Chuffart
	}
1338 6b5a528c Florent Chuffart
	for (it in 1:length(sample_ids)) {
1339 6b5a528c Florent Chuffart
		sample_id = sample_ids[it]
1340 6b5a528c Florent Chuffart
		lines(density(res[,paste(strain, marker, sample_id, sep="_")] / sizeFactors[[1]][it]), col = it + 1, lty = it)
1341 6b5a528c Florent Chuffart
	}
1342 7164d3ac Florent Chuffart
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1343 6b5a528c Florent Chuffart
}
1344 6b5a528c Florent Chuffart
1345 6b5a528c Florent Chuffart
1346 6b5a528c Florent Chuffart
1347 6b5a528c Florent Chuffart
1348 780f632a Florent Chuffart
analyse_count_table = structure(function(# Compute the list of SNEPs for a given set of marker, strain combination and nuc form.
1349 55c1cdff Florent Chuffart
### This function uses
1350 6b5a528c Florent Chuffart
marker, ##<< The marker involved.
1351 6b5a528c Florent Chuffart
combi, ##<< The strain combination involved.
1352 55c1cdff Florent Chuffart
form, ##<< the nuc form involved.
1353 cc54c799 Florent Chuffart
all_samples, ##<< Global list of samples.
1354 e5603c3f Florent Chuffart
FDR = 0.0001, ## the specific False Discover Rate
1355 cc54c799 Florent Chuffart
config=NULL ##<< GLOBAL config variable
1356 6b5a528c Florent Chuffart
) {
1357 6b5a528c Florent Chuffart
  # PRETREAT
1358 4016229d Florent Chuffart
  snep_design = get_design(marker, combi, all_samples)
1359 780f632a Florent Chuffart
  mnase_design = snep_design[snep_design$manip == "Mnase_Seq", ]
1360 780f632a Florent Chuffart
  snep_reads = get_all_reads(marker, combi, form, config=config)
1361 780f632a Florent Chuffart
	snep_count_table = snep_reads[, rownames(snep_design)]
1362 780f632a Florent Chuffart
  mnase_count_table = snep_reads[, rownames(mnase_design)]
1363 780f632a Florent Chuffart
  # RUN ANALYSE FOPR SNEPS
1364 780f632a Florent Chuffart
	cdsFull = newCountDataSet(snep_count_table, snep_design)
1365 780f632a Florent Chuffart
	cdsFull1 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1366 780f632a Florent Chuffart
	fit1 = fitNbinomGLMs(cdsFull1, count ~ manip * strain)
1367 780f632a Florent Chuffart
	cdsFull0 = estimateDispersions(estimateSizeFactors(cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1368 780f632a Florent Chuffart
	fit0 = fitNbinomGLMs(cdsFull0, count ~ manip + strain)
1369 780f632a Florent Chuffart
	pvalsGLM = nbinomGLMTest( fit1, fit0 )
1370 780f632a Florent Chuffart
  # RUN ANALYSE FOR MNASE
1371 780f632a Florent Chuffart
  mnase_cdsFull = newCountDataSet(mnase_count_table, mnase_design$strain)
1372 780f632a Florent Chuffart
  mnase_cdsFull1 = estimateDispersions(estimateSizeFactors(mnase_cdsFull), fitType="local", method="pooled", sharingMode="maximum")
1373 780f632a Florent Chuffart
  res_mnase = nbinomTest( mnase_cdsFull1, combi[1], combi[2])
1374 780f632a Florent Chuffart
  # GLOBAL RESULTS
1375 780f632a Florent Chuffart
  #   SNEPS
1376 6b5a528c Florent Chuffart
  k = names(fit1)
1377 780f632a Florent Chuffart
  snep_reads[[k[2]]] = signif(fit1[[k[2]]], 5)
1378 780f632a Florent Chuffart
  snep_reads[[k[3]]] = signif(fit1[[k[3]]], 5)
1379 780f632a Florent Chuffart
  snep_reads[[k[4]]] = signif(fit1[[k[4]]], 5)
1380 780f632a Florent Chuffart
	snep_reads$pvalsGLM = signif(pvalsGLM, 5)
1381 4016229d Florent Chuffart
  # print(snep_design)
1382 780f632a Florent Chuffart
	thres = FDR(snep_reads$pvalsGLM, FDR)
1383 780f632a Florent Chuffart
	snep_reads$snep_index = snep_reads$pvalsGLM < thres
1384 780f632a Florent Chuffart
  print(paste(sum(snep_reads$snep_index), " SNEPs found for ", length(snep_reads[,1])," nucs and ", FDR*100,"% of FDR.", sep = ""))
1385 780f632a Florent Chuffart
  #   MNASE
1386 780f632a Florent Chuffart
  snep_reads[["mnase_l2fc"]] = signif(res_mnase$log2FoldChange, 5)
1387 780f632a Florent Chuffart
  snep_reads[["mnase_l2fc_pval"]] = signif(res_mnase$pval, 5)
1388 780f632a Florent Chuffart
  # write results
1389 780f632a Florent Chuffart
	snep_filename = paste(config$RESULTS_DIR, "/" ,combi[1],"_",combi[2],"_",marker,"_", form, "_snep.tab",sep="")
1390 780f632a Florent Chuffart
	write.table(snep_reads, file=snep_filename, row.names=FALSE, quote=FALSE)      
1391 780f632a Florent Chuffart
  return(snep_reads)
1392 5bfac5a3 Florent Chuffart
  },  ex=function(){
1393 6b5a528c Florent Chuffart
    marker = "H3K4me1"
1394 55c1cdff Florent Chuffart
    combi = c("BY", "YJM")
1395 4016229d Florent Chuffart
    form = "wpunr" # "wp" | "unr" | "wpunr"
1396 780f632a Florent Chuffart
    # foo = analyse_count_table(marker, combi, form)
1397 780f632a Florent Chuffart
    # foo = analyse_count_table("H4K12ac", c("BY", "RM"), "wp")
1398 6b5a528c Florent Chuffart
})
1399 6b5a528c Florent Chuffart
1400 6b5a528c Florent Chuffart
1401 6b5a528c Florent Chuffart
1402 6b5a528c Florent Chuffart
1403 6b5a528c Florent Chuffart
1404 6b5a528c Florent Chuffart
1405 6b5a528c Florent Chuffart
1406 6b5a528c Florent Chuffart
1407 6b5a528c Florent Chuffart
1408 6b5a528c Florent Chuffart
1409 6b5a528c Florent Chuffart
1410 6b5a528c Florent Chuffart
1411 6b5a528c Florent Chuffart
1412 6b5a528c Florent Chuffart
1413 6b5a528c Florent Chuffart
1414 6b5a528c Florent Chuffart
1415 6b5a528c Florent Chuffart
1416 6b5a528c Florent Chuffart
1417 6b5a528c Florent Chuffart
1418 6b5a528c Florent Chuffart
1419 6b5a528c Florent Chuffart
1420 6b5a528c Florent Chuffart
1421 6b5a528c Florent Chuffart
1422 6b5a528c Florent Chuffart
1423 1d833b97 Florent Chuffart
ROM2ARAB = function(# Roman to Arabic pair list.
1424 e5603c3f Florent Chuffart
### Utility to convert Roman numbers into Arabic numbers
1425 1d833b97 Florent Chuffart
){list(
1426 1d833b97 Florent Chuffart
  "I" = 1,
1427 1d833b97 Florent Chuffart
  "II" = 2,
1428 1d833b97 Florent Chuffart
  "III" = 3,
1429 1d833b97 Florent Chuffart
  "IV" = 4,
1430 1d833b97 Florent Chuffart
  "V" = 5,
1431 1d833b97 Florent Chuffart
  "VI" = 6,
1432 1d833b97 Florent Chuffart
  "VII" = 7,
1433 1d833b97 Florent Chuffart
  "VIII" = 8,
1434 1d833b97 Florent Chuffart
  "IX" = 9,
1435 1d833b97 Florent Chuffart
  "X" = 10,
1436 1d833b97 Florent Chuffart
  "XI" = 11,
1437 1d833b97 Florent Chuffart
  "XII" = 12,
1438 1d833b97 Florent Chuffart
  "XIII" = 13,
1439 1d833b97 Florent Chuffart
  "XIV" = 14,
1440 1d833b97 Florent Chuffart
  "XV" = 15,
1441 1d833b97 Florent Chuffart
  "XVI" = 16,
1442 1d833b97 Florent Chuffart
  "XVII" = 17,
1443 1d833b97 Florent Chuffart
  "XVIII" = 18,
1444 1d833b97 Florent Chuffart
  "XIX" = 19,
1445 1d833b97 Florent Chuffart
  "XX" = 20
1446 1d833b97 Florent Chuffart
)}
1447 1d833b97 Florent Chuffart
1448 1d833b97 Florent Chuffart
switch_pairlist = structure(function(# Switch a pairlist
1449 1d833b97 Florent Chuffart
### Take a pairlist key:value and return the switched pairlist value:key.
1450 55c1cdff Florent Chuffart
l ##<< The pairlist to switch.
1451 1d833b97 Florent Chuffart
) {
1452 1d833b97 Florent Chuffart
	ret = list()
1453 1d833b97 Florent Chuffart
	for (name in names(l)) {
1454 1d833b97 Florent Chuffart
		ret[[as.character(l[[name]])]] = name
1455 1d833b97 Florent Chuffart
	}
1456 1d833b97 Florent Chuffart
	ret
1457 1d833b97 Florent Chuffart
### The switched pairlist.
1458 1d833b97 Florent Chuffart
}, ex=function(){
1459 1d833b97 Florent Chuffart
	l = list(key1 = "value1", key2 = "value2")
1460 1d833b97 Florent Chuffart
	print(switch_pairlist(l))
1461 1d833b97 Florent Chuffart
})
1462 1d833b97 Florent Chuffart
1463 1d833b97 Florent Chuffart
ARAB2ROM = function(# Arabic to Roman pair list.
1464 e5603c3f Florent Chuffart
### Utility to convert Arabic numbers to Roman numbers
1465 1d833b97 Florent Chuffart
){switch_pairlist(ROM2ARAB())}
1466 1d833b97 Florent Chuffart
1467 1d833b97 Florent Chuffart
1468 1d833b97 Florent Chuffart
1469 1d833b97 Florent Chuffart
1470 5bfac5a3 Florent Chuffart
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1471 e5603c3f Florent Chuffart
### This fonction allows to access to a specific part of the c2c file.
1472 5bfac5a3 Florent Chuffart
strain1, ##<< the key strain
1473 5bfac5a3 Florent Chuffart
strain2, ##<< the target strain
1474 e5603c3f Florent Chuffart
chr=NULL, ##<< if defined, the c2c will be filtered according to the chromosome value
1475 e5603c3f Florent Chuffart
lower_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome upper than lower_bound
1476 e5603c3f Florent Chuffart
upper_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome lower than upper_bound
1477 5bfac5a3 Florent Chuffart
config=NULL##<<  GLOBAL config variable
1478 1d833b97 Florent Chuffart
) {
1479 1d833b97 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1480 1d833b97 Florent Chuffart
	# Launch c2c file
1481 1d833b97 Florent Chuffart
	if (reverse) {
1482 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]
1483 1d833b97 Florent Chuffart
	} else {
1484 5bfac5a3 Florent Chuffart
		c2c_filename = config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]
1485 1d833b97 Florent Chuffart
	}
1486 b26ac9e7 Florent Chuffart
	c2c = mread.table(c2c_filename, stringsAsFactors=FALSE)
1487 5bfac5a3 Florent Chuffart
  # Filtering unagapped
1488 1d833b97 Florent Chuffart
  c2c = c2c[c2c$V6=="-",]
1489 1d833b97 Florent Chuffart
	# Reverse
1490 1d833b97 Florent Chuffart
	if (reverse) {
1491 1d833b97 Florent Chuffart
		tmp_col = c2c$V1
1492 1d833b97 Florent Chuffart
		c2c$V1 = c2c$V7
1493 1d833b97 Florent Chuffart
		c2c$V7 = tmp_col
1494 1d833b97 Florent Chuffart
		tmp_col = c2c$V2
1495 1d833b97 Florent Chuffart
		c2c$V2 = c2c$V9
1496 1d833b97 Florent Chuffart
		c2c$V9 = tmp_col
1497 1d833b97 Florent Chuffart
		tmp_col = c2c$V3
1498 1d833b97 Florent Chuffart
		c2c$V3 = c2c$V10
1499 1d833b97 Florent Chuffart
		c2c$V10 = tmp_col
1500 1d833b97 Florent Chuffart
	}
1501 5bfac5a3 Florent Chuffart
  if (!is.null(chr)) {
1502 5bfac5a3 Florent Chuffart
  	if (strain1 == "BY") {
1503 5bfac5a3 Florent Chuffart
  		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1504 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "RM") {
1505 5bfac5a3 Florent Chuffart
  	  chro_1 = paste("supercontig_1.",chr,sep="")
1506 5bfac5a3 Florent Chuffart
  	} else if (strain1 == "YJM") {
1507 5bfac5a3 Florent Chuffart
  	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1508 5bfac5a3 Florent Chuffart
  	}
1509 5bfac5a3 Florent Chuffart
  	c2c = c2c[c2c$V1 == chro_1,]
1510 5bfac5a3 Florent Chuffart
    if (!is.null(lower_bound)) {
1511 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V3 < lower_bound & c2c$V2 < c2c$V3,c("V2", "V3") ] = lower_bound}
1512 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V2 < lower_bound & c2c$V3 < c2c$V2,c("V2", "V3") ] = lower_bound}      
1513 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1514 5bfac5a3 Florent Chuffart
    }
1515 5bfac5a3 Florent Chuffart
    if (!is.null(upper_bound)) {
1516 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, 1] > 0)) {c2c[c2c$V2 > upper_bound & c2c$V2 < c2c$V3, c("V2", "V3")] = upper_bound}
1517 5bfac5a3 Florent Chuffart
      if (length(c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, 1] > 0)) {c2c[c2c$V3 > upper_bound & c2c$V3 < c2c$V2, c("V2", "V3")] = upper_bound}      
1518 5bfac5a3 Florent Chuffart
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1519 5bfac5a3 Florent Chuffart
    }
1520 5bfac5a3 Florent Chuffart
  }
1521 5bfac5a3 Florent Chuffart
  return(c2c)
1522 5bfac5a3 Florent Chuffart
# It retruns the appropriate c2c file part.
1523 5bfac5a3 Florent Chuffart
}
1524 5bfac5a3 Florent Chuffart
1525 4016229d Florent Chuffart
translate_cur = structure(function(# Translate coords of a genome region.
1526 5bfac5a3 Florent Chuffart
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1527 5bfac5a3 Florent Chuffart
roi, ##<< Original genome region of interest.
1528 5bfac5a3 Florent Chuffart
strain2, ##<< The strain in wich you want the genome region of interest.
1529 5bfac5a3 Florent Chuffart
config=NULL, ##<< GLOBAL config variable
1530 4016229d Florent Chuffart
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1531 5bfac5a3 Florent Chuffart
) {
1532 d973538c Florent Chuffart
	strain1 = roi$strain_ref  
1533 d973538c Florent Chuffart
  # Do something or nothing?
1534 d973538c Florent Chuffart
  if (is.null(config$NEUTRAL_TRANSLATE_CUR)) {
1535 d973538c Florent Chuffart
    config$NEUTRAL_TRANSLATE_CUR = FALSE
1536 d973538c Florent Chuffart
  }
1537 d973538c Florent Chuffart
	if (strain1 == strain2 | config$NEUTRAL_TRANSLATE_CUR) {
1538 d973538c Florent Chuffart
    roi$strain_ref = strain2
1539 d973538c Florent Chuffart
    roi$length = roi$end - roi$begin + sign(roi$end - roi$begin)
1540 5bfac5a3 Florent Chuffart
		return(roi)
1541 5bfac5a3 Florent Chuffart
	}
1542 d973538c Florent Chuffart
  
1543 5bfac5a3 Florent Chuffart
	# Extract c2c file
1544 4016229d Florent Chuffart
	if (!is.null(big_cur)) {
1545 4016229d Florent Chuffart
  	# Dealing with big_cur
1546 4016229d Florent Chuffart
		if (roi$strain_ref != big_cur$strain_ref) {
1547 4016229d Florent Chuffart
      big_cur = translate_cur(big_cur, roi$strain_ref, config=config)
1548 62d69d53 Florent Chuffart
    }
1549 4016229d Florent Chuffart
    if (big_cur$end < big_cur$begin) {
1550 4016229d Florent Chuffart
      tmp_var = big_cur$begin
1551 4016229d Florent Chuffart
      big_cur$begin = big_cur$end
1552 4016229d Florent Chuffart
      big_cur$end = tmp_var
1553 4016229d Florent Chuffart
      big_cur$length = big_cur$end - big_cur$begin + 1
1554 55c1cdff Florent Chuffart
    }
1555 4016229d Florent Chuffart
    if (big_cur$chr!=roi$chr | roi$end > big_cur$end | roi$end < big_cur$begin | roi$begin > big_cur$end | roi$begin < big_cur$begin) {
1556 4016229d Florent Chuffart
      print("WARNING! Trying to translate a roi not included in a big_cur.")
1557 ded736c8 Florent Chuffart
      return(NULL)
1558 ded736c8 Florent Chuffart
    }
1559 4016229d Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, big_cur$chr, big_cur$begin, big_cur$end, config=config)
1560 5bfac5a3 Florent Chuffart
  } else {
1561 4016229d Florent Chuffart
    # No big_cur
1562 5bfac5a3 Florent Chuffart
  	c2c = c2c_extraction(strain1, strain2, roi$chr, config=config)    
1563 5bfac5a3 Florent Chuffart
  }
1564 5bfac5a3 Florent Chuffart
  
1565 1d833b97 Florent Chuffart
  #	Convert initial roi$chr into c2c format
1566 5bfac5a3 Florent Chuffart
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1567 1d833b97 Florent Chuffart
	begin_1 = roi$begin
1568 1d833b97 Florent Chuffart
  end_1 = roi$end
1569 5bfac5a3 Florent Chuffart
  if (reverse) {
1570 5bfac5a3 Florent Chuffart
  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1571 5bfac5a3 Florent Chuffart
    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1572 1d833b97 Florent Chuffart
	} else {
1573 5bfac5a3 Florent Chuffart
		tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1574 5bfac5a3 Florent Chuffart
	  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1575 1d833b97 Florent Chuffart
	}
1576 5bfac5a3 Florent Chuffart
1577 5bfac5a3 Florent Chuffart
	# Never happend conditions ...
1578 1d833b97 Florent Chuffart
	{
1579 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) == 0) {
1580 1d833b97 Florent Chuffart
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1581 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1582 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2>=begin_1,]
1583 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V2)
1584 54cc8283 Florent Chuffart
      } else {
1585 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3>=begin_1,]
1586 f56c14b2 Florent Chuffart
  			begin_1 = min(tmp_c2c$V3)
1587 54cc8283 Florent Chuffart
      }
1588 1d833b97 Florent Chuffart
			if (reverse) {
1589 5bfac5a3 Florent Chuffart
		  	tmptransfostart = c2c[(c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1),]
1590 1d833b97 Florent Chuffart
			} else {
1591 5bfac5a3 Florent Chuffart
				tmptransfostart = c2c[c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1592 1d833b97 Florent Chuffart
			}
1593 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) == 0) {
1594 4016229d Florent Chuffart
				if (!is.null(big_cur)) {
1595 1d833b97 Florent Chuffart
					return(NULL)
1596 4016229d Florent Chuffart
					tmptransfostart = c2c[c2c$V3>=big_cur$begin & c2c$V2<=big_cur$begin,]
1597 55c1cdff Florent Chuffart
				} else {
1598 1d833b97 Florent Chuffart
					print(tmptransfostart)
1599 1d833b97 Florent Chuffart
					print(tmptransfostop)
1600 5bfac5a3 Florent Chuffart
					stop("Never happend condition 1.")
1601 1d833b97 Florent Chuffart
				}
1602 1d833b97 Florent Chuffart
			}
1603 55c1cdff Florent Chuffart
		}
1604 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) == 0) {
1605 1d833b97 Florent Chuffart
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1606 8c87ee64 Florent Chuffart
      if (sum(c2c$V3 >= c2c$V2) != 0) {
1607 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V3<=end_1,]
1608 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V3)
1609 54cc8283 Florent Chuffart
      } else {
1610 5bfac5a3 Florent Chuffart
  			tmp_c2c = c2c[c2c$V2<=end_1,]
1611 f56c14b2 Florent Chuffart
  			end_1 = max(tmp_c2c$V2)
1612 54cc8283 Florent Chuffart
      }
1613 1d833b97 Florent Chuffart
			if (reverse) {
1614 5bfac5a3 Florent Chuffart
		    tmptransfostop = c2c[(c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1),]
1615 1d833b97 Florent Chuffart
			} else {
1616 5bfac5a3 Florent Chuffart
			  tmptransfostop = c2c[c2c$V3>=end_1 & c2c$V2<=end_1,]
1617 1d833b97 Florent Chuffart
			}
1618 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) == 0) {
1619 4016229d Florent Chuffart
				if (!is.null(big_cur)) {
1620 1d833b97 Florent Chuffart
					return(NULL)
1621 4016229d Florent Chuffart
				  tmptransfostop = c2c[c2c$V3>=big_cur$end & c2c$V2<=big_cur$end,]
1622 1d833b97 Florent Chuffart
				} else {
1623 1d833b97 Florent Chuffart
					print(tmptransfostart)
1624 1d833b97 Florent Chuffart
					print(tmptransfostop)
1625 1d833b97 Florent Chuffart
					stop("Never happend condition 2.")
1626 55c1cdff Florent Chuffart
				}
1627 55c1cdff Florent Chuffart
			}
1628 1d833b97 Florent Chuffart
		}
1629 1d833b97 Florent Chuffart
		if (length(tmptransfostart$V8) != 1) {
1630 1d833b97 Florent Chuffart
			# print("many start")
1631 5bfac5a3 Florent Chuffart
			tmptransfostart = tmptransfostart[tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1632 1d833b97 Florent Chuffart
			if (length(tmptransfostart$V8) != 1) {
1633 1d833b97 Florent Chuffart
				print(tmptransfostart)
1634 1d833b97 Florent Chuffart
				print(tmptransfostop)
1635 1d833b97 Florent Chuffart
  			stop("Never happend condition 3.")
1636 1d833b97 Florent Chuffart
			}
1637 1d833b97 Florent Chuffart
		}
1638 1d833b97 Florent Chuffart
		if (length(tmptransfostop$V8) != 1) {
1639 1d833b97 Florent Chuffart
			# print("many stop")
1640 5bfac5a3 Florent Chuffart
		  tmptransfostop = tmptransfostop[tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1641 1d833b97 Florent Chuffart
			if (length(tmptransfostop$V8) != 1) {
1642 1d833b97 Florent Chuffart
				print(tmptransfostart)
1643 1d833b97 Florent Chuffart
				print(tmptransfostop)
1644 1d833b97 Florent Chuffart
  			stop("Never happend condition 4.")
1645 1d833b97 Florent Chuffart
			}
1646 5bfac5a3 Florent Chuffart
		}
1647 1d833b97 Florent Chuffart
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1648 1d833b97 Florent Chuffart
			print(tmptransfostart)
1649 5bfac5a3 Florent Chuffart
			print(tmptransfostop)
1650 1d833b97 Florent Chuffart
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1651 1d833b97 Florent Chuffart
		}
1652 55c1cdff Florent Chuffart
	}
1653 5bfac5a3 Florent Chuffart
  
1654 1d833b97 Florent Chuffart
  # Deal with strand
1655 1d833b97 Florent Chuffart
  if (tmptransfostart$V8 == 1) {
1656 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1657 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1658 1d833b97 Florent Chuffart
  } else {
1659 1d833b97 Florent Chuffart
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1660 1d833b97 Florent Chuffart
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1661 1d833b97 Florent Chuffart
  }
1662 1d833b97 Florent Chuffart
	# Build returned roi
1663 1d833b97 Florent Chuffart
	roi$strain_ref = strain2
1664 1d833b97 Florent Chuffart
	if (roi$strain_ref == "BY") {
1665 1d833b97 Florent Chuffart
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1666 1d833b97 Florent Chuffart
	} else {
1667 1d833b97 Florent Chuffart
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1668 1d833b97 Florent Chuffart
	}
1669 1d833b97 Florent Chuffart
  roi$begin = begin_2
1670 1d833b97 Florent Chuffart
  roi$end = end_2
1671 1d833b97 Florent Chuffart
	if (sign(roi$end - roi$begin) == 0) {
1672 5bfac5a3 Florent Chuffart
		roi$length = 1
1673 5bfac5a3 Florent Chuffart
	} else {
1674 5bfac5a3 Florent Chuffart
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1675 1d833b97 Florent Chuffart
	}
1676 55c1cdff Florent Chuffart
  return(roi)
1677 1d833b97 Florent Chuffart
}, ex=function(){
1678 4016229d Florent Chuffart
	# Define new translate_cur function...
1679 4016229d Florent Chuffart
	translate_cur = function(roi, strain2, config) {
1680 1d833b97 Florent Chuffart
		strain1 = roi$strain_ref
1681 1d833b97 Florent Chuffart
		if (strain1 == strain2) {
1682 1d833b97 Florent Chuffart
			return(roi)
1683 1d833b97 Florent Chuffart
		} else {
1684 4016229d Florent Chuffart
		  stop("Here is my new translate_cur function...")
1685 5bfac5a3 Florent Chuffart
		}
1686 1d833b97 Florent Chuffart
	}
1687 1d833b97 Florent Chuffart
	# Binding it by uncomment follwing lines.
1688 4016229d Florent Chuffart
	# unlockBinding("translate_cur", as.environment("package:nm"))
1689 4016229d Florent Chuffart
	# unlockBinding("translate_cur", getNamespace("nm"))
1690 4016229d Florent Chuffart
	# assign("translate_cur", translate_cur, "package:nm")
1691 4016229d Florent Chuffart
	# assign("translate_cur", translate_cur, getNamespace("nm"))
1692 4016229d Florent Chuffart
	# lockBinding("translate_cur", getNamespace("nm"))
1693 4016229d Florent Chuffart
	# lockBinding("translate_cur", as.environment("package:nm"))
1694 1d833b97 Florent Chuffart
})
1695 1d833b97 Florent Chuffart
1696 1d833b97 Florent Chuffart
1697 780f632a Florent Chuffart
compute_curs = function (# Compute Common Uninterrupted Regions (CUR)
1698 5bfac5a3 Florent Chuffart
### CURs are regions that can be aligned between the genomes
1699 5bfac5a3 Florent Chuffart
diff_allowed = 30, ##<< the maximum indel width allowe din a CUR
1700 5bfac5a3 Florent Chuffart
min_cur_width = 4000, ##<< The minimum width of a CUR
1701 b26ac9e7 Florent Chuffart
combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM")), ##<< list of strain than will be tested as uninterrupted regions
1702 5bfac5a3 Florent Chuffart
config = NULL ##<< GLOBAL config variable
1703 5bfac5a3 Florent Chuffart
) {
1704 780f632a Florent Chuffart
  dir.create(config$RESULTS_DIR, recursive=TRUE, showWarnings=FALSE)
1705 5bfac5a3 Florent Chuffart
  check_overlaping = function(strain1, strain2, chr, lower_bound, upper_bound, config=NULL) {
1706 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1707 5bfac5a3 Florent Chuffart
    check_homogeneity(c2c)
1708 5bfac5a3 Florent Chuffart
  	if (length(c2c[,1]) == 0 ) {
1709 5bfac5a3 Florent Chuffart
      stop("WARNING! checking overlapping for a region corresponding to an empty c2c.")
1710 5bfac5a3 Florent Chuffart
    } else {
1711 5bfac5a3 Florent Chuffart
  		lower_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; min(l$V2, l$V3)})
1712 5bfac5a3 Florent Chuffart
  		upper_bounds = apply(t(1:nrow(c2c)), 2,function(i){l = c2c[i,]; max(l$V2, l$V3)})
1713 5bfac5a3 Florent Chuffart
  		tmp_index = order(lower_bounds)
1714 5bfac5a3 Florent Chuffart
      lower_bounds = lower_bounds[tmp_index]
1715 5bfac5a3 Florent Chuffart
      upper_bounds = upper_bounds[tmp_index]
1716 5bfac5a3 Florent Chuffart
      tmp_diff = lower_bounds[-1] - upper_bounds[-length(upper_bounds)]
1717 5bfac5a3 Florent Chuffart
      ov_index = which(tmp_diff < 0)
1718 5bfac5a3 Florent Chuffart
      if(length(ov_index < 0) !=0 ) {
1719 5bfac5a3 Florent Chuffart
        ov_index = ov_index[1]
1720 5bfac5a3 Florent Chuffart
        print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ", c2c[1,]$V1, sep=""))
1721 5bfac5a3 Florent Chuffart
        c2c_corrupted = c2c[tmp_index,][c(ov_index, ov_index + 1),]
1722 5bfac5a3 Florent Chuffart
        print(c2c_corrupted)
1723 5bfac5a3 Florent Chuffart
        return(list(lower_bounds[ov_index+1] - 1, upper_bounds[ov_index] + 1))
1724 5bfac5a3 Florent Chuffart
      }
1725 5bfac5a3 Florent Chuffart
      return(NULL)
1726 5bfac5a3 Florent Chuffart
    }
1727 5bfac5a3 Florent Chuffart
  }
1728 1d833b97 Florent Chuffart
1729 5bfac5a3 Florent Chuffart
  check_homogeneity = function(sub_c2c) {
1730 5bfac5a3 Florent Chuffart
    tmp_signs = sign(sub_c2c$V2 - sub_c2c$V3)
1731 5bfac5a3 Florent Chuffart
    tmp_signs = tmp_signs[tmp_signs != 0]
1732 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs[1]  != tmp_signs)) {
1733 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign)! ********************"))
1734 5bfac5a3 Florent Chuffart
      print(tmp_signs)
1735 5bfac5a3 Florent Chuffart
  	}
1736 5bfac5a3 Florent Chuffart
    tmp_signs2 = sign(sub_c2c$V9 - sub_c2c$V10)
1737 5bfac5a3 Florent Chuffart
    tmp_signs2 = tmp_signs2[tmp_signs2 != 0]
1738 5bfac5a3 Florent Chuffart
  	if (sum(tmp_signs2[1]  != tmp_signs2)) {
1739 5bfac5a3 Florent Chuffart
  		print(paste("*************** ERROR, non homogenous region (sign2)! ********************"))
1740 5bfac5a3 Florent Chuffart
      print(tmp_signs2)
1741 5bfac5a3 Florent Chuffart
  	}
1742 5bfac5a3 Florent Chuffart
  	if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1743 5bfac5a3 Florent Chuffart
  		print("*************** ERROR, non homogenous region chrs or V8! ********************")
1744 5bfac5a3 Florent Chuffart
  	}
1745 5bfac5a3 Florent Chuffart
  }
1746 1d833b97 Florent Chuffart
1747 5bfac5a3 Florent Chuffart
  test_and_squeeze_rois = function(foo, config=NULL) {
1748 5bfac5a3 Florent Chuffart
    is_it_ok = function(list1, list2) {
1749 5bfac5a3 Florent Chuffart
      bar = cbind(list1$begin, list2$begin, abs(list1$begin - list2$begin), list1$end, list2$end, abs(list1$end - list2$end), list1$length, list2$length, abs(list1$length - list2$length))
1750 5bfac5a3 Florent Chuffart
      ok = length(bar[bar[,3] != 0 | bar[,6] != 0, ]) == 0
1751 5bfac5a3 Florent Chuffart
      if (!ok) {
1752 5bfac5a3 Florent Chuffart
        print(bar[bar[,3] != 0 | bar[,6] != 0, ])
1753 5bfac5a3 Florent Chuffart
      }
1754 5bfac5a3 Florent Chuffart
      return (ok)
1755 5bfac5a3 Florent Chuffart
    }
1756 5bfac5a3 Florent Chuffart
    squeeze_rois = function(list1, list2) {
1757 5bfac5a3 Florent Chuffart
      rois = apply(t(1:nrow(list1)), 2, function(i){
1758 5bfac5a3 Florent Chuffart
        roi = list1[i,]
1759 5bfac5a3 Florent Chuffart
        roi2 = list2[i,]
1760 5bfac5a3 Florent Chuffart
        roi$begin = max(roi$begin, roi2$begin)
1761 5bfac5a3 Florent Chuffart
        roi$end = min(roi$end, roi2$end)
1762 5bfac5a3 Florent Chuffart
        roi$length =  roi$end - roi$begin + 1
1763 5bfac5a3 Florent Chuffart
        return(roi)
1764 5bfac5a3 Florent Chuffart
      })
1765 5bfac5a3 Florent Chuffart
      return(do.call(rbind, rois))
1766 5bfac5a3 Florent Chuffart
    }
1767 780f632a Florent Chuffart
    # foo_orig = compute_curs2(config=config)
1768 5bfac5a3 Florent Chuffart
    # foo = foo_orig
1769 5bfac5a3 Florent Chuffart
    STOP = FALSE
1770 5bfac5a3 Florent Chuffart
    nb_round = 0
1771 5bfac5a3 Florent Chuffart
    while(!STOP) {
1772 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1773 5bfac5a3 Florent Chuffart
      print(paste("2-2 round #", nb_round, sep=""))
1774 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1775 4016229d Florent Chuffart
      fooyjm = translate_curs(foo, "YJM", config=config)
1776 4016229d Florent Chuffart
      fooyjmby = translate_curs(fooyjm, "BY", config=config)
1777 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, fooyjmby)) {
1778 5bfac5a3 Florent Chuffart
        print("case 1")
1779 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, fooyjmby)
1780 5bfac5a3 Florent Chuffart
    		next
1781 5bfac5a3 Florent Chuffart
      }
1782 4016229d Florent Chuffart
      foorm = translate_curs(foo, "RM", config=config)
1783 4016229d Florent Chuffart
      foormby = translate_curs(foorm, "BY", config=config)
1784 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foormby)) {
1785 5bfac5a3 Florent Chuffart
        print("case 2")
1786 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foormby)
1787 5bfac5a3 Florent Chuffart
    		next
1788 5bfac5a3 Florent Chuffart
      }
1789 4016229d Florent Chuffart
      fooyjmrm = translate_curs(fooyjm, "RM", config=config)
1790 4016229d Florent Chuffart
      fooyjmrmyjm = translate_curs(fooyjmrm, "YJM", config=config)
1791 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooyjm, fooyjmrmyjm)) {
1792 5bfac5a3 Florent Chuffart
        print("case 3")
1793 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooyjm, fooyjmrmyjm)
1794 5bfac5a3 Florent Chuffart
        next
1795 5bfac5a3 Florent Chuffart
      }
1796 4016229d Florent Chuffart
      foormyjm = translate_curs(foorm, "YJM", config=config)
1797 4016229d Florent Chuffart
      foormyjmrm = translate_curs(foormyjm, "RM", config=config)
1798 5bfac5a3 Florent Chuffart
      if (!is_it_ok(foorm, foormyjmrm)) {
1799 5bfac5a3 Florent Chuffart
        print("case 4")
1800 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(foorm, foormyjmrm)
1801 5bfac5a3 Florent Chuffart
        next
1802 5bfac5a3 Florent Chuffart
      }
1803 4016229d Florent Chuffart
      foo = translate_curs(foo, "BY", config=config)
1804 5bfac5a3 Florent Chuffart
      STOP = TRUE
1805 5bfac5a3 Florent Chuffart
    }
1806 5bfac5a3 Florent Chuffart
    STOP = FALSE
1807 5bfac5a3 Florent Chuffart
    nb_round = 0
1808 5bfac5a3 Florent Chuffart
    while(!STOP) {
1809 5bfac5a3 Florent Chuffart
      nb_round = nb_round + 1
1810 5bfac5a3 Florent Chuffart
      print(paste("3-3 round #", nb_round, sep=""))
1811 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1812 4016229d Florent Chuffart
      foobyrm = translate_curs(fooby, "RM", config=config)
1813 4016229d Florent Chuffart
      foobyrmyjm = translate_curs(foobyrm, "YJM", config=config)
1814 4016229d Florent Chuffart
      foobyrmyjmby = translate_curs(foobyrmyjm, "BY", config=config)
1815 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyrmyjmby)) {
1816 5bfac5a3 Florent Chuffart
        print("case 1")
1817 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyrmyjmby)
1818 5bfac5a3 Florent Chuffart
      }
1819 4016229d Florent Chuffart
      fooby = translate_curs(foo, "BY", config=config)
1820 4016229d Florent Chuffart
      foobyyjm = translate_curs(fooby, "YJM", config=config)
1821 4016229d Florent Chuffart
      foobyyjmrm = translate_curs(foobyyjm, "RM", config=config)
1822 4016229d Florent Chuffart
      foobyyjmrmby = translate_curs(foobyyjmrm, "BY", config=config)
1823 5bfac5a3 Florent Chuffart
      if (!is_it_ok(fooby, foobyyjmrmby)) {
1824 5bfac5a3 Florent Chuffart
        print("case 2")
1825 5bfac5a3 Florent Chuffart
        foo = squeeze_rois(fooby, foobyyjmrmby)
1826 5bfac5a3 Florent Chuffart
        next
1827 5bfac5a3 Florent Chuffart
      }
1828 4016229d Florent Chuffart
      foo = translate_curs(foo, "BY", config=config)
1829 5bfac5a3 Florent Chuffart
      STOP = TRUE
1830 5bfac5a3 Florent Chuffart
    }
1831 5bfac5a3 Florent Chuffart
    print("end")
1832 5bfac5a3 Florent Chuffart
    return(foo)
1833 5bfac5a3 Florent Chuffart
  }
1834 1d833b97 Florent Chuffart
1835 5bfac5a3 Florent Chuffart
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 30, min_cur_width = 200, config=NULL) {
1836 5bfac5a3 Florent Chuffart
    c2c = c2c_extraction(strain1, strain2, config=config)
1837 5bfac5a3 Florent Chuffart
    # computing diffs
1838 55c1cdff Florent Chuffart
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
1839 55c1cdff Florent Chuffart
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
1840 1d833b97 Florent Chuffart
    # Filtering
1841 1d833b97 Florent Chuffart
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1842 1d833b97 Florent Chuffart
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1843 5bfac5a3 Florent Chuffart
    rois = apply(t(1:length(indexes_start)), 2, function(i) {
1844 5bfac5a3 Florent Chuffart
      if ( i %% 20 == 1) print(paste(i, "/", length(indexes_start)))
1845 5bfac5a3 Florent Chuffart
      returned_rois = NULL
1846 1d833b97 Florent Chuffart
  		start = indexes_start[i]
1847 1d833b97 Florent Chuffart
  		stop = indexes_stop[i]
1848 1d833b97 Florent Chuffart
  		sub_c2c = c2c[start:stop,]
1849 5bfac5a3 Florent Chuffart
      check_homogeneity(sub_c2c)
1850 1d833b97 Florent Chuffart
  		if (strain1 == "BY") {
1851 1d833b97 Florent Chuffart
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1852 5bfac5a3 Florent Chuffart
  		} else {
1853 1d833b97 Florent Chuffart
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1854 1d833b97 Florent Chuffart
  		}
1855 5bfac5a3 Florent Chuffart
  		roi = list(chr=chr, begin=min(c(sub_c2c$V2,sub_c2c$V3)), end=max(c(sub_c2c$V2,sub_c2c$V3)), strain_ref=strain1)
1856 5bfac5a3 Florent Chuffart
  		roi$length = roi$end - roi$begin + 1
1857 1d833b97 Florent Chuffart
  		if (roi$length >= min_cur_width) {
1858 5bfac5a3 Florent Chuffart
  			lower_bound = roi$begin
1859 5bfac5a3 Florent Chuffart
  			upper_bound = roi$end
1860 5bfac5a3 Florent Chuffart
        check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1861 5bfac5a3 Florent Chuffart
        while(!is.null(check)) {
1862 5bfac5a3 Florent Chuffart
          # print(check)
1863 5bfac5a3 Florent Chuffart
      		roi1 = roi
1864 5bfac5a3 Florent Chuffart
          roi1$end = check[[1]]
1865 5bfac5a3 Florent Chuffart
      		roi1$length = roi1$end - roi1$begin + 1
1866 5bfac5a3 Florent Chuffart
      		if (roi1$length >= min_cur_width) {
1867 5bfac5a3 Florent Chuffart
            returned_rois = dfadd(returned_rois,roi1)
1868 5bfac5a3 Florent Chuffart
          }
1869 5bfac5a3 Florent Chuffart
          roi$begin = check[[2]]
1870 5bfac5a3 Florent Chuffart
      		roi$length = roi$end - roi$begin + 1
1871 5bfac5a3 Florent Chuffart
      		if (roi$length >= min_cur_width) {
1872 5bfac5a3 Florent Chuffart
      			lower_bound = min(roi$begin, roi$end)
1873 5bfac5a3 Florent Chuffart
      			upper_bound = max(roi$begin, roi$end)
1874 5bfac5a3 Florent Chuffart
            check = check_overlaping(strain1, strain2, chr, lower_bound, upper_bound, config=config)
1875 5bfac5a3 Florent Chuffart
          } else {
1876 5bfac5a3 Florent Chuffart
            check = NULL
1877 5bfac5a3 Florent Chuffart
            roi = NULL
1878 5bfac5a3 Florent Chuffart
          }
1879 5bfac5a3 Florent Chuffart
        }
1880 5bfac5a3 Florent Chuffart
        returned_rois = dfadd(returned_rois,roi)
1881 1d833b97 Florent Chuffart
  	  }
1882 5bfac5a3 Florent Chuffart
    })
1883 5bfac5a3 Florent Chuffart
    rois = do.call(rbind,rois)
1884 5bfac5a3 Florent Chuffart
    rois = rois[order(as.numeric(rois$chr), rois$begin), ]
1885 1d833b97 Florent Chuffart
  	return(rois)
1886 1d833b97 Florent Chuffart
  }
1887 1d833b97 Florent Chuffart
1888 4016229d Florent Chuffart
  translate_curs = function(rois, target_strain, config) {
1889 5bfac5a3 Florent Chuffart
    tr_rois = apply(t(1:nrow(rois)), 2, function(i){
1890 5bfac5a3 Florent Chuffart
      roi = rois[i,]
1891 4016229d Florent Chuffart
      tr_roi = translate_cur(roi, target_strain, config=config)  
1892 5bfac5a3 Florent Chuffart
      tmp_begin = min(tr_roi$begin, tr_roi$end)
1893 5bfac5a3 Florent Chuffart
      tmp_end = max(tr_roi$begin, tr_roi$end)
1894 5bfac5a3 Florent Chuffart
      tr_roi$begin = tmp_begin
1895 5bfac5a3 Florent Chuffart
      tr_roi$end = tmp_end
1896 5bfac5a3 Florent Chuffart
      tr_roi$length =  tr_roi$end - tr_roi$begin + 1
1897 5bfac5a3 Florent Chuffart
      return(tr_roi)
1898 5bfac5a3 Florent Chuffart
    })
1899 5bfac5a3 Florent Chuffart
    tr_rois = do.call(rbind, tr_rois)
1900 5bfac5a3 Florent Chuffart
    return(tr_rois)
1901 5bfac5a3 Florent Chuffart
  }
1902 729c934e Florent Chuffart
1903 5bfac5a3 Florent Chuffart
  rois = list()
1904 5bfac5a3 Florent Chuffart
  for (combi in combis) {
1905 5bfac5a3 Florent Chuffart
    strain1 = combi[1]
1906 5bfac5a3 Florent Chuffart
    strain2 = combi[2]
1907 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1908 5bfac5a3 Florent Chuffart
    rois_fwd = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1909 5bfac5a3 Florent Chuffart
    strain1 = combi[2]
1910 5bfac5a3 Florent Chuffart
    strain2 = combi[1]
1911 5bfac5a3 Florent Chuffart
    print(paste(strain1, strain2))
1912 5bfac5a3 Florent Chuffart
    rois_rev = get_inter_strain_rois(strain1, strain2, min_cur_width = min_cur_width, diff_allowed = diff_allowed, config=config)
1913 4016229d Florent Chuffart
    tr_rois_rev = translate_curs(rois_rev, combi[1], config)      
1914 5bfac5a3 Florent Chuffart
    region1 = rois_fwd
1915 5bfac5a3 Florent Chuffart
    region2 = tr_rois_rev
1916 5bfac5a3 Florent Chuffart
    rois[[paste(combi[1], combi[2], sep="_")]] = intersect_region(rois_fwd, tr_rois_rev)
1917 5bfac5a3 Florent Chuffart
  }
1918 b26ac9e7 Florent Chuffart
1919 b26ac9e7 Florent Chuffart
  if (length(combis)==1) {
1920 780f632a Florent Chuffart
    curs = rois[[1]]
1921 b26ac9e7 Florent Chuffart
  } else {
1922 b26ac9e7 Florent Chuffart
    reducted_1_rois = intersect_region(rois[["BY_RM"]], rois[["BY_YJM"]])
1923 b26ac9e7 Florent Chuffart
    reducted_1_rois = reducted_1_rois[reducted_1_rois$length >= min_cur_width, ]
1924 b26ac9e7 Florent Chuffart
    tr_reducted_1_rois = translate_curs(reducted_1_rois, "RM", config)  
1925 b26ac9e7 Florent Chuffart
    reducted_2_rois = intersect_region(tr_reducted_1_rois, rois[["RM_YJM"]])
1926 b26ac9e7 Florent Chuffart
    reducted_2_rois = reducted_2_rois[reducted_2_rois$length >= min_cur_width, ]
1927 b26ac9e7 Florent Chuffart
    reducted_rois = translate_curs(reducted_2_rois, "BY", config)  
1928 b26ac9e7 Florent Chuffart
    reducted_rois = reducted_rois[order(as.numeric(reducted_rois$chr), reducted_rois$begin), ]
1929 b26ac9e7 Florent Chuffart
    squeezed_rois = test_and_squeeze_rois(reducted_rois, config=config)
1930 780f632a Florent Chuffart
    curs = squeezed_rois
1931 b26ac9e7 Florent Chuffart
  }
1932 780f632a Florent Chuffart
    
1933 780f632a Florent Chuffart
  cur_filename = paste(config$RESULTS_DIR, "/all_curs.tab", sep="")
1934 780f632a Florent Chuffart
  write.table(curs, file=cur_filename, row.names=FALSE, quote=FALSE)  
1935 780f632a Florent Chuffart
  return(curs)
1936 1d833b97 Florent Chuffart
}
1937 1d833b97 Florent Chuffart
1938 5bfac5a3 Florent Chuffart
intersect_region = function(# Returns the intersection of 2 list on regions.
1939 5bfac5a3 Florent Chuffart
### This function...
1940 5bfac5a3 Florent Chuffart
region1, ##<< Original regions.
1941 5bfac5a3 Florent Chuffart
region2 ##<< Regions to intersect.
1942 5bfac5a3 Florent Chuffart
) {
1943 5bfac5a3 Florent Chuffart
  intersection = apply(t(1:nrow(region1)), 2, function(i) {
1944 5bfac5a3 Florent Chuffart
    roi1 = region1[i, ]
1945 5bfac5a3 Florent Chuffart
    sub_regions2 = region2[region2$chr == roi1$chr, ]
1946 5bfac5a3 Florent Chuffart
    sub_regions2 = sub_regions2[roi1$begin <= sub_regions2$begin & sub_regions2$begin <= roi1$end |
1947 5bfac5a3 Florent Chuffart
                                roi1$begin <= sub_regions2$end & sub_regions2$end <= roi1$end |
1948 5bfac5a3 Florent Chuffart
                                sub_regions2$begin < roi1$begin  & roi1$end < sub_regions2$end
1949 5bfac5a3 Florent Chuffart
                                , ]
1950 5bfac5a3 Florent Chuffart
    if (nrow(sub_regions2) == 0) {
1951 5bfac5a3 Florent Chuffart
      print("removing a region")
1952 5bfac5a3 Florent Chuffart
      return(NULL)
1953 5bfac5a3 Florent Chuffart
    } else if (nrow(sub_regions2) > 1) {
1954 5bfac5a3 Florent Chuffart
      print("more than one region in intersect_region")          
1955 5bfac5a3 Florent Chuffart
      return(do.call(rbind, apply(t(1:nrow(sub_regions2)), 2, function(i) {intersect_region(roi1, sub_regions2[i,])})))
1956 5bfac5a3 Florent Chuffart
    } else {
1957 5bfac5a3 Florent Chuffart
      roi2 = sub_regions2[1,]
1958 5bfac5a3 Florent Chuffart
      if (roi1$begin < roi2$begin) {
1959 5bfac5a3 Florent Chuffart
        print("not the same begin")
1960 5bfac5a3 Florent Chuffart
        roi1$begin = roi2$begin
1961 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1962 5bfac5a3 Florent Chuffart
      }
1963 5bfac5a3 Florent Chuffart
      if (roi1$end > roi2$end) {
1964 5bfac5a3 Florent Chuffart
        print("not the same end")
1965 5bfac5a3 Florent Chuffart
        roi1$end = roi2$end
1966 5bfac5a3 Florent Chuffart
        roi1$length =  roi1$end - roi1$begin + 1
1967 5bfac5a3 Florent Chuffart
      }
1968 5bfac5a3 Florent Chuffart
      return(roi1)
1969 5bfac5a3 Florent Chuffart
    }         
1970 5bfac5a3 Florent Chuffart
  })
1971 5bfac5a3 Florent Chuffart
  return(do.call(rbind,intersection))
1972 5bfac5a3 Florent Chuffart
}
1973 1d833b97 Florent Chuffart
1974 55c1cdff Florent Chuffart
build_replicates = structure(function(# Stage replicates data
1975 e5603c3f Florent Chuffart
### This function loads in memory the data corresponding to the given experiments.
1976 e5603c3f Florent Chuffart
expe, ##<< a list of vectors corresponding to replicates.
1977 61914235 Florent Chuffart
roi, ##<< the region that we are interested in.
1978 61914235 Florent Chuffart
only_fetch=FALSE, ##<< filter or not inputs.
1979 61914235 Florent Chuffart
get_genome=FALSE,##<< Load or not corresponding genome.
1980 61914235 Florent Chuffart
all_samples, ##<< Global list of samples.
1981 61914235 Florent Chuffart
config=NULL ##<< GLOBAL config variable.
1982 61914235 Florent Chuffart
) {
1983 61914235 Florent Chuffart
  build_samples = function(samples_ids, roi, only_fetch=FALSE, get_genome=TRUE, get_ouputs=TRUE, all_samples) {
1984 61914235 Florent Chuffart
  	samples=list()
1985 61914235 Florent Chuffart
  	for (i in samples_ids) {
1986 61914235 Florent Chuffart
  		sample = as.list(all_samples[all_samples$id==i,])
1987 a0b91fee Florent Chuffart
      sample$orig_roi = roi
1988 4016229d Florent Chuffart
      sample$roi = translate_cur(roi, sample$strain, config = config)
1989 61914235 Florent Chuffart
  		if (get_genome) {
1990 61914235 Florent Chuffart
  			# Get Genome
1991 61914235 Florent Chuffart
  			fasta_ref_filename = config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]]
1992 b26ac9e7 Florent Chuffart
  			sample$roi$genome = mread.fasta(fasta_ref_filename, )[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
1993 61914235 Florent Chuffart
  		}
1994 61914235 Florent Chuffart
  		# Get inputs
1995 780f632a Florent Chuffart
      if ("tf_input" %in% names(all_samples)) {
1996 780f632a Florent Chuffart
        sample_inputs_filename = all_samples$tf_input[all_samples$id==i]
1997 780f632a Florent Chuffart
      } else {
1998 780f632a Florent Chuffart
        sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
1999 780f632a Florent Chuffart
      }
2000 61914235 Florent Chuffart
  		sample_inputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep="")
2001 b26ac9e7 Florent Chuffart
  		sample$inputs = mread.table(sample_inputs_filename, stringsAsFactors=FALSE)
2002 5bfac5a3 Florent Chuffart
  		sample$total_reads = sum(sample$inputs[,4])
2003 61914235 Florent Chuffart
  		if (!only_fetch) {
2004 b8a95426 Florent Chuffart
  		  sample$inputs = filter_tf_inputs(sample$inputs, sample$roi$chr, min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300, filter_for_coverage=TRUE)
2005 61914235 Florent Chuffart
  	  }
2006 61914235 Florent Chuffart
  	  # Get TF outputs for Mnase_Seq samples
2007 5bfac5a3 Florent Chuffart
  		if (sample$marker == "Mnase_Seq" & get_ouputs) {
2008 780f632a Florent Chuffart
        if ("tf_output" %in% names(all_samples)) {
2009 780f632a Florent Chuffart
          sample_outputs_filename = all_samples$tf_output[all_samples$id==i]
2010 780f632a Florent Chuffart
        } else {
2011 780f632a Florent Chuffart
          sample_outputs_filename = paste(config$ALIGN_DIR, "/TF/sample_", i, "_all_nucs.tab", sep="")                
2012 780f632a Florent Chuffart
        }
2013 b26ac9e7 Florent Chuffart
  			sample$outputs = mread.table(sample_outputs_filename, header=TRUE, sep="\t")
2014 61914235 Florent Chuffart
  			if (!only_fetch) {
2015 61914235 Florent Chuffart
  	  		sample$outputs = filter_tf_outputs(sample$outputs, sample$roi$chr,  min(sample$roi$begin, sample$roi$end), max(sample$roi$begin, sample$roi$end), 300)
2016 61914235 Florent Chuffart
    		}
2017 61914235 Florent Chuffart
  		}
2018 5bfac5a3 Florent Chuffart
  		samples[[length(samples) + 1]] = sample
2019 61914235 Florent Chuffart
  	}
2020 61914235 Florent Chuffart
  	return(samples)
2021 61914235 Florent Chuffart
  }
2022 61914235 Florent Chuffart
	replicates = list()
2023 61914235 Florent Chuffart
	for(samples_ids in expe) {
2024 61914235 Florent Chuffart
		samples = build_samples(samples_ids, roi, only_fetch=only_fetch, get_genome=get_genome, all_samples=all_samples)
2025 61914235 Florent Chuffart
		replicates[[length(replicates) + 1]] = samples
2026 61914235 Florent Chuffart
	}
2027 61914235 Florent Chuffart
	return(replicates)
2028 61914235 Florent Chuffart
  }, ex = function() {
2029 61914235 Florent Chuffart
    # library(rjson)
2030 61914235 Florent Chuffart
    # library(nucleominer)
2031 55c1cdff Florent Chuffart
    #
2032 61914235 Florent Chuffart
    # # Read config file
2033 e5603c3f Florent Chuffart
    # json_conf_file = "nucleominer_config.json"
2034 61914235 Florent Chuffart
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
2035 61914235 Florent Chuffart
    # # Read sample file
2036 b26ac9e7 Florent Chuffart
    # all_samples = read.cvs(config$CSV_SAMPLE_FILE, sep=";", header=TRUE, stringsAsFactors=FALSE)
2037 61914235 Florent Chuffart
    # # here are the sample ids in a list
2038 61914235 Florent Chuffart
    # expes = list(c(1))
2039 61914235 Florent Chuffart
    # # here is the region that we wnt to see the coverage
2040 55c1cdff Florent Chuffart
    # cur = list(chr="8", begin=472000, end=474000, strain_ref="BY")
2041 61914235 Florent Chuffart
    # # it displays the corverage
2042 61914235 Florent Chuffart
    # replicates = build_replicates(expes, cur, all_samples=all_samples, config=config)
2043 55c1cdff Florent Chuffart
    # out = watch_samples(replicates, config$READ_LENGTH,
2044 55c1cdff Florent Chuffart
    #       plot_coverage = TRUE,
2045 55c1cdff Florent Chuffart
    #       plot_squared_reads = FALSE,
2046 55c1cdff Florent Chuffart
    #       plot_ref_genome = FALSE,
2047 55c1cdff Florent Chuffart
    #       plot_arrow_raw_reads = FALSE,
2048 55c1cdff Florent Chuffart
    #       plot_arrow_nuc_reads = FALSE,
2049 55c1cdff Florent Chuffart
    #       plot_gaussian_reads = FALSE,
2050 55c1cdff Florent Chuffart
    #       plot_gaussian_unified_reads = FALSE,
2051 55c1cdff Florent Chuffart
    #       plot_ellipse_nucs = FALSE,
2052 55c1cdff Florent Chuffart
    #       plot_wp_nucs = FALSE,
2053 55c1cdff Florent Chuffart
    #       plot_wp_nuc_model = FALSE,
2054 55c1cdff Florent Chuffart
    #       plot_common_nucs = FALSE,
2055 61914235 Florent Chuffart
    #       height = 50)
2056 61914235 Florent Chuffart
  })
2057 61914235 Florent Chuffart
2058 61914235 Florent Chuffart
2059 61914235 Florent Chuffart
2060 61914235 Florent Chuffart
2061 61914235 Florent Chuffart
2062 61914235 Florent Chuffart
2063 61914235 Florent Chuffart
2064 61914235 Florent Chuffart
2065 61914235 Florent Chuffart
2066 61914235 Florent Chuffart
2067 61914235 Florent Chuffart
2068 61914235 Florent Chuffart
2069 61914235 Florent Chuffart
2070 61914235 Florent Chuffart
2071 61914235 Florent Chuffart
2072 61914235 Florent Chuffart
2073 61914235 Florent Chuffart
2074 61914235 Florent Chuffart
2075 61914235 Florent Chuffart
2076 61914235 Florent Chuffart
2077 61914235 Florent Chuffart
2078 61914235 Florent Chuffart
2079 61914235 Florent Chuffart
2080 61914235 Florent Chuffart
2081 61914235 Florent Chuffart
2082 61914235 Florent Chuffart
2083 61914235 Florent Chuffart
2084 61914235 Florent Chuffart
2085 61914235 Florent Chuffart
2086 61914235 Florent Chuffart
2087 61914235 Florent Chuffart
2088 61914235 Florent Chuffart
2089 61914235 Florent Chuffart
2090 1d833b97 Florent Chuffart
2091 1d833b97 Florent Chuffart
2092 1d833b97 Florent Chuffart
2093 1d833b97 Florent Chuffart
2094 1d833b97 Florent Chuffart
2095 1d833b97 Florent Chuffart
2096 1d833b97 Florent Chuffart
2097 1d833b97 Florent Chuffart
2098 1d833b97 Florent Chuffart
2099 1d833b97 Florent Chuffart
2100 1d833b97 Florent Chuffart
2101 1d833b97 Florent Chuffart
2102 1d833b97 Florent Chuffart
2103 1d833b97 Florent Chuffart
2104 1d833b97 Florent Chuffart
2105 1d833b97 Florent Chuffart
2106 1d833b97 Florent Chuffart
2107 1d833b97 Florent Chuffart
2108 1d833b97 Florent Chuffart
2109 1d833b97 Florent Chuffart
2110 1d833b97 Florent Chuffart
2111 1d833b97 Florent Chuffart
2112 1d833b97 Florent Chuffart
2113 1d833b97 Florent Chuffart
2114 1d833b97 Florent Chuffart
2115 1d833b97 Florent Chuffart
2116 1d833b97 Florent Chuffart
2117 1d833b97 Florent Chuffart
2118 1d833b97 Florent Chuffart
2119 1d833b97 Florent Chuffart
2120 1d833b97 Florent Chuffart
2121 1d833b97 Florent Chuffart
2122 1d833b97 Florent Chuffart
2123 1d833b97 Florent Chuffart
2124 6b5a528c Florent Chuffart
2125 6b5a528c Florent Chuffart
2126 6b5a528c Florent Chuffart
2127 6b5a528c Florent Chuffart
2128 6b5a528c Florent Chuffart
2129 6b5a528c Florent Chuffart
2130 6b5a528c Florent Chuffart
2131 6b5a528c Florent Chuffart
2132 6b5a528c Florent Chuffart
2133 6b5a528c Florent Chuffart
2134 6b5a528c Florent Chuffart
2135 6b5a528c Florent Chuffart
2136 6b5a528c Florent Chuffart
2137 6b5a528c Florent Chuffart
2138 6b5a528c Florent Chuffart
2139 6b5a528c Florent Chuffart
2140 55c1cdff Florent Chuffart
watch_samples = function(# Watching analysis of samples
2141 6b5a528c Florent Chuffart
### This function allows to view analysis for a particuler region of the genome.
2142 6b5a528c Florent Chuffart
replicates, ##<< replicates under the form...
2143 6b5a528c Florent Chuffart
read_length, ##<< length of the reads
2144 6b5a528c Florent Chuffart
plot_ref_genome = TRUE, ##<< Plot (or not) reference genome.
2145 6b5a528c Florent Chuffart
plot_arrow_raw_reads = TRUE,  ##<< Plot (or not) arrows for raw reads.
2146 6b5a528c Florent Chuffart
plot_arrow_nuc_reads = TRUE,  ##<< Plot (or not) arrows for reads aasiocied to a nucleosome.
2147 6b5a528c Florent Chuffart
plot_squared_reads = TRUE,  ##<< Plot (or not) reads in the square fashion.
2148 6b5a528c Florent Chuffart
plot_coverage = FALSE,  ##<< Plot (or not) reads in the covergae fashion. fashion.
2149 6b5a528c Florent Chuffart
plot_gaussian_reads = TRUE,  ##<< Plot (or not) gaussian model of a F anf R reads.
2150 6b5a528c Florent Chuffart
plot_gaussian_unified_reads = TRUE,  ##<< Plot (or not) gaussian model of a nuc.
2151 6b5a528c Florent Chuffart
plot_ellipse_nucs = TRUE,  ##<< Plot (or not) ellipse for a nuc.
2152 7646593d Florent Chuffart
change_col = TRUE, ##<< Change the color of each nucleosome.
2153 6b5a528c Florent Chuffart
plot_wp_nucs = TRUE,  ##<< Plot (or not) cluster of nucs
2154 3bfae55d Florent Chuffart
plot_fuzzy_nucs = FALSE,  ##<< Plot (or not) cluster of fuzzy
2155 6b5a528c Florent Chuffart
plot_wp_nuc_model = TRUE,  ##<< Plot (or not) gaussian model for a cluster of nucs
2156 9fdbfada Florent Chuffart
plot_common_nucs = FALSE,  ##<< Plot (or not) aligned reads.
2157 9fdbfada Florent Chuffart
plot_common_unrs = FALSE,  ##<< Plot (or not) unaligned nucleosomal refgions (UNRs).
2158 6b5a528c Florent Chuffart
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
2159 729c934e Florent Chuffart
plot_chain = FALSE,  ##<< Plot (or not) clusterised nuceosomes between mnase samples.
2160 6e0010bc Florent Chuffart
plot_sample_id = FALSE, ##<<  Plot (or not) the sample id for each sample.
2161 55c1cdff Florent Chuffart
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
2162 6b5a528c Florent Chuffart
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
2163 1d833b97 Florent Chuffart
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
2164 6e0010bc Florent Chuffart
main=NULL, ##<< main title of the produced plot
2165 6e0010bc Florent Chuffart
xlab=NULL, ##<< xlab of the produced plot
2166 6e0010bc Florent Chuffart
ylab="#reads (per million reads)", ##<< ylab of the produced plot
2167 1d833b97 Florent Chuffart
config=NULL ##<< GLOBAL config variable
2168 6b5a528c Florent Chuffart
){
2169 6b5a528c Florent Chuffart
  returned_list = list()
2170 6b5a528c Florent Chuffart
  # Computing global display parameters
2171 6b5a528c Florent Chuffart
  if (replicates[[1]][[1]]$roi[["begin"]] < replicates[[1]][[1]]$roi[["end"]]) {
2172 6b5a528c Florent Chuffart
	  x_min_glo = replicates[[1]][[1]]$roi[["begin"]]
2173 6b5a528c Florent Chuffart
	  x_max_glo = replicates[[1]][[1]]$roi[["end"]]
2174 6b5a528c Florent Chuffart
  } else {
2175 6b5a528c Florent Chuffart
	  x_min_glo = - replicates[[1]][[1]]$roi[["begin"]]
2176 6b5a528c Florent Chuffart
	  x_max_glo = - replicates[[1]][[1]]$roi[["end"]]
2177 6b5a528c Florent Chuffart
  }
2178 6b5a528c Florent Chuffart
	base_glo = 0
2179 6b5a528c Florent Chuffart
	nb_rank_glo = 0
2180 6b5a528c Florent Chuffart
  for (samples in replicates) {
2181 6b5a528c Florent Chuffart
  	nb_rank_glo = nb_rank_glo + length(samples)
2182 6b5a528c Florent Chuffart
  }
2183 6b5a528c Florent Chuffart
	ylim_glo = c(base_glo, base_glo + height * nb_rank_glo)
2184 6b5a528c Florent Chuffart
	y_min_glo = min(ylim_glo)
2185 6b5a528c Florent Chuffart
	y_max_glo = max(ylim_glo)
2186 6b5a528c Florent Chuffart
  delta_y_glo = y_max_glo - y_min_glo
2187 6b5a528c Florent Chuffart
  # Plot main frame
2188 6e0010bc Florent Chuffart
  if (is.null(xlab)) {
2189 6e0010bc Florent Chuffart
    xlab = paste("Ref strain:", replicates[[1]][[1]]$strain, "chr: ", replicates[[1]][[1]]$roi$chr)
2190 6e0010bc Florent Chuffart
  }
2191 6e0010bc Florent Chuffart
  plot(c(x_min_glo,x_max_glo), c(0,0), ylim=ylim_glo, col=0, yaxt="n", ylab=ylab, xlab=xlab, main=main )
2192 6b5a528c Florent Chuffart
  axis(2, at=0:(nb_rank_glo*2) * delta_y_glo / (nb_rank_glo*2), labels=c(rep(c(height/2,0),nb_rank_glo),height/2))
2193 6b5a528c Florent Chuffart
  # Go
2194 6b5a528c Florent Chuffart
	replicates_wp_nucs = list()
2195 9fdbfada Florent Chuffart
  wp_maps = list()
2196 9fdbfada Florent Chuffart
  fuzzy_maps = list()
2197 6b5a528c Florent Chuffart
  for (replicate_rank in 1:length(replicates)) {
2198 6b5a528c Florent Chuffart
		# Computing replicate parameters
2199 6b5a528c Florent Chuffart
		nb_rank = length(samples)
2200 6b5a528c Florent Chuffart
		base = (replicate_rank-1) * height * nb_rank
2201 6b5a528c Florent Chuffart
		ylim = c(base, base + height * nb_rank)
2202 6b5a528c Florent Chuffart
		y_min = min(ylim)
2203 6b5a528c Florent Chuffart
		y_max = max(ylim)
2204 6b5a528c Florent Chuffart
	  delta_y = y_max - y_min
2205 6b5a528c Florent Chuffart
		samples = replicates[[replicate_rank]]
2206 6b5a528c Florent Chuffart
		for (sample_rank in 1:length(samples)) {
2207 6b5a528c Florent Chuffart
			# computing sample parameters
2208 6b5a528c Florent Chuffart
			sample = samples[[sample_rank]]
2209 6b5a528c Florent Chuffart
			y_lev = y_min + (sample_rank - 0.5) * delta_y/nb_rank
2210 6e0010bc Florent Chuffart
      if (plot_sample_id) {
2211 6e0010bc Florent Chuffart
  			text(x_min_glo, y_lev + height/2 - delta_y_glo/100, labels=paste("(",sample$id,") ",sample$strain, " ", sample$marker, sep=""))
2212 6e0010bc Florent Chuffart
      }
2213 6b5a528c Florent Chuffart
2214 6b5a528c Florent Chuffart
		  if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2215 6b5a528c Florent Chuffart
			  x_min = sample$roi[["begin"]]
2216 6b5a528c Florent Chuffart
			  x_max = sample$roi[["end"]]
2217 6b5a528c Florent Chuffart
		  } else {
2218 6b5a528c Florent Chuffart
			  x_min = - sample$roi[["begin"]]
2219 6b5a528c Florent Chuffart
			  x_max = - sample$roi[["end"]]
2220 5bfac5a3 Florent Chuffart
		  }
2221 6b5a528c Florent Chuffart
			shift = x_min_glo - x_min
2222 6b5a528c Florent Chuffart
	    # Plot Genome seq
2223 6b5a528c Florent Chuffart
			if (plot_ref_genome) {
2224 6b5a528c Florent Chuffart
		  	text(1:length(sample$roi$genome) + x_min - 1 + shift, rep(y_lev - height/2, length(sample$roi$genome)), labels=toupper(sample$roi$genome), cex=dev.size()[1]*9/(x_max-x_min), family="Courier")
2225 6b5a528c Florent Chuffart
		  }
2226 6b5a528c Florent Chuffart
			# Plot reads
2227 6b5a528c Florent Chuffart
			reads = sample$inputs
2228 5bfac5a3 Florent Chuffart
			signs = sign_from_strand(reads[,3])
2229 6b5a528c Florent Chuffart
			if (plot_arrow_raw_reads) {
2230 55c1cdff Florent Chuffart
				arrows(sign(x_min) * reads[,2] + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (reads[,2] + signs * read_length) + shift, sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads + y_lev,
2231 5bfac5a3 Florent Chuffart
				col=1, length=0.15/nb_rank)
2232 6b5a528c Florent Chuffart
			}
2233 5bfac5a3 Florent Chuffart
	    if (plot_squared_reads) {
2234 6b5a528c Florent Chuffart
        # require(plotrix)
2235 6b5a528c Florent Chuffart
				rect(sign(x_min) * reads[,2] + shift, rep(y_lev,length(reads[,1])), sign(x_min) * (reads[,2] + signs * read_length) + shift,  y_lev + sign(x_min) * signs * reads[,4] * 1000000/sample$total_reads, col=adjustcolor(1, alpha.f = 0.1),border=0)
2236 6b5a528c Florent Chuffart
			}
2237 5bfac5a3 Florent Chuffart
	    if (plot_coverage) {
2238 a52f85d7 Florent Chuffart
        if (length(reads[,1]) != 0) {
2239 a52f85d7 Florent Chuffart
          step_h = sign(x_min) * signs * reads[,4]
2240 a52f85d7 Florent Chuffart
          step_b = sign(x_min) * reads[,2] + shift
2241 a52f85d7 Florent Chuffart
          step_e = sign(x_min) * (reads[,2] + signs * 150) + shift
2242 55c1cdff Florent Chuffart
          steps_x = min(step_b, step_e):max(step_b, step_e)
2243 a52f85d7 Florent Chuffart
          steps_y = rep(0, length(steps_x))
2244 55c1cdff Florent Chuffart
          for (i in 1:length(step_h)) {
2245 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==min(step_b[i], step_e[i]))] =  steps_y[which(steps_x==min(step_b[i], step_e[i]))] + abs(step_h[i])
2246 a52f85d7 Florent Chuffart
            steps_y[which(steps_x==max(step_b[i], step_e[i]))] =  steps_y[which(steps_x==max(step_b[i], step_e[i]))] - abs(step_h[i])
2247 a52f85d7 Florent Chuffart
          }
2248 a52f85d7 Florent Chuffart
          tmp_index = which(steps_y != 0)
2249 a52f85d7 Florent Chuffart
          steps_x = steps_x[tmp_index]
2250 a52f85d7 Florent Chuffart
          steps_y = steps_y[tmp_index]
2251 a52f85d7 Florent Chuffart
          tmp_current_level = 0
2252 55c1cdff Florent Chuffart
          for (i in 1:length(steps_y)) {
2253 a52f85d7 Florent Chuffart
            steps_y[i] = tmp_current_level + steps_y[i]
2254 a52f85d7 Florent Chuffart
            tmp_current_level = steps_y[i]
2255 a52f85d7 Florent Chuffart
          }
2256 a52f85d7 Florent Chuffart
          steps_y = c(0, steps_y)
2257 a52f85d7 Florent Chuffart
          steps_y = steps_y * 1000000/sample$total_reads
2258 a52f85d7 Florent Chuffart
        } else {
2259 a52f85d7 Florent Chuffart
          steps_y = c(0, 0, 0)
2260 1e310114 Florent Chuffart
          steps_x = c(x_min, x_max)
2261 6b5a528c Florent Chuffart
        }
2262 0d1475ac Florent Chuffart
        # print(steps_x)
2263 0d1475ac Florent Chuffart
        # print(steps_y)
2264 6b5a528c Florent Chuffart
        lines(stepfun(steps_x, steps_y + y_lev), pch="")
2265 6b5a528c Florent Chuffart
        abline(y_lev,0)
2266 6b5a528c Florent Chuffart
        returned_list[[paste("cov", sample$id, sep="_")]] = stepfun(steps_x, steps_y)
2267 6b5a528c Florent Chuffart
			}
2268 6b5a528c Florent Chuffart
			# Plot nucs
2269 6b5a528c Florent Chuffart
	    if (sample$marker == "Mnase_Seq" & (plot_squared_reads | plot_gaussian_reads | plot_gaussian_unified_reads | plot_arrow_nuc_reads)) {
2270 6b5a528c Florent Chuffart
				nucs = sample$outputs
2271 6b5a528c Florent Chuffart
				if (length(nucs$center) > 0) {
2272 6b5a528c Florent Chuffart
					col = 1
2273 6b5a528c Florent Chuffart
		      for (i in 1:length(nucs$center)) {
2274 7646593d Florent Chuffart
            if (change_col) {
2275 55c1cdff Florent Chuffart
  						col = col + 1
2276 6e0010bc Florent Chuffart
              } else {
2277 6e0010bc Florent Chuffart
                col = "blue"
2278 6e0010bc Florent Chuffart
              }
2279 6b5a528c Florent Chuffart
		        nuc = nucs[i,]
2280 6b5a528c Florent Chuffart
						involved_reads = filter_tf_inputs(reads, sample$roi$chr, nuc$lower_bound, nuc$upper_bound, nuc_width = nuc$width)
2281 6b5a528c Florent Chuffart
				  	involved_signs = apply(t(involved_reads[,3]), 2, function(strand) {	if (strand == "F") return(1) else return(-1)})
2282 55c1cdff Florent Chuffart
						total_involved_reads = sum(involved_reads[,4])
2283 6b5a528c Florent Chuffart
						if (plot_arrow_nuc_reads ) {
2284 55c1cdff Florent Chuffart
							arrows(sign(x_min) * involved_reads[,2] + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev, sign(x_min) * (involved_reads[,2] + involved_signs * read_length) + shift, sign(x_min) * involved_signs * involved_reads[,4] * 1000000/sample$total_reads + y_lev,
2285 5bfac5a3 Florent Chuffart
							col=col, length=0.15/nb_rank)
2286 6b5a528c Florent Chuffart
						}
2287 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads | plot_gaussian_unified_reads) {
2288 6b5a528c Florent Chuffart
  						flatted_reads = flat_reads(involved_reads, nuc$width)
2289 6b5a528c Florent Chuffart
	  					delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
2290 6b5a528c Florent Chuffart
		  			}
2291 6b5a528c Florent Chuffart
	          if (plot_gaussian_reads ) {
2292 6b5a528c Florent Chuffart
							flatted_reads = flat_reads(involved_reads, nuc$width)
2293 6b5a528c Florent Chuffart
							delta_x = (nuc$center - nuc$width):(nuc$center + nuc$width)
2294 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[1]]), sd(flatted_reads[[1]])) * length(flatted_reads[[1]]) * sign(x_min) * height/5 + y_lev, col=col)
2295 883439d6 Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[2]]), sd(flatted_reads[[2]])) * length(flatted_reads[[2]]) * -1 * sign(x_min) * height/5 + y_lev, col=col)
2296 6b5a528c Florent Chuffart
	          }
2297 6b5a528c Florent Chuffart
	          if (plot_gaussian_unified_reads ) {
2298 6e0010bc Florent Chuffart
							lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(flatted_reads[[3]]), sd(flatted_reads[[3]])) * length(flatted_reads[[3]]) * height/5 + y_lev, col=col, lty=1)
2299 6b5a528c Florent Chuffart
	          }
2300 6b5a528c Florent Chuffart
	          if (plot_ellipse_nucs) {
2301 6b5a528c Florent Chuffart
				      # require(plotrix)
2302 55c1cdff Florent Chuffart
	  	 				draw.ellipse(sign(x_min) * nuc$center + shift, y_lev, nuc$width/2, total_involved_reads/nuc$width * height/5, border=col)
2303 6b5a528c Florent Chuffart
						}
2304 6b5a528c Florent Chuffart
		      }
2305 6b5a528c Florent Chuffart
		    } else {
2306 6b5a528c Florent Chuffart
		      print("WARNING! No nucs to print.")
2307 6b5a528c Florent Chuffart
		    }
2308 6b5a528c Florent Chuffart
			}
2309 6b5a528c Florent Chuffart
	  }
2310 55c1cdff Florent Chuffart
2311 6b5a528c Florent Chuffart
	  # Plot wp nucs
2312 9fdbfada Florent Chuffart
		if ((plot_wp_nucs_4_nonmnase | sample$marker == "Mnase_Seq") & (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs | plot_chain)) {
2313 6b5a528c Florent Chuffart
			if (samples[[1]]$marker == "Mnase_Seq") {
2314 55c1cdff Florent Chuffart
				if (is.null(aggregated_intra_strain_nucs)) {
2315 5bfac5a3 Florent Chuffart
	  			wp_nucs = aggregate_intra_strain_nucs(samples)[[1]]
2316 6b5a528c Florent Chuffart
				} else {
2317 6b5a528c Florent Chuffart
					wp_nucs = aggregated_intra_strain_nucs[[replicate_rank]]
2318 6b5a528c Florent Chuffart
				}
2319 6b5a528c Florent Chuffart
		  } else {
2320 6b5a528c Florent Chuffart
  			wp_nucs = replicates_wp_nucs[[replicate_rank-2]]
2321 6b5a528c Florent Chuffart
		  }
2322 729c934e Florent Chuffart
      if (plot_chain) {
2323 55c1cdff Florent Chuffart
        tf_nucs = lapply(wp_nucs, function(nuc) {
2324 729c934e Florent Chuffart
          bar = apply(t(nuc$nucs), 2, function(tmp_nuc){
2325 729c934e Florent Chuffart
            tmp_nuc = tmp_nuc[[1]]
2326 729c934e Florent Chuffart
            tmp_nuc$inputs = NULL
2327 729c934e Florent Chuffart
            tmp_nuc$original_reads = NULL
2328 729c934e Florent Chuffart
            tmp_nuc$wp = nuc$wp
2329 729c934e Florent Chuffart
            # print(tmp_nuc)
2330 729c934e Florent Chuffart
            return(tmp_nuc)
2331 55c1cdff Florent Chuffart
          })
2332 dc58ee6c Florent Chuffart
          return(do.call(rbind, bar))
2333 dc58ee6c Florent Chuffart
        })
2334 729c934e Florent Chuffart
        tf_nucs = data.frame(do.call(rbind, tf_nucs))
2335 55c1cdff Florent Chuffart
        tmp_x = (unlist(tf_nucs$lower_bound) + unlist(tf_nucs$upper_bound)) / 2
2336 dc58ee6c Florent Chuffart
        tmp_y =  y_min + (unlist(tf_nucs$track) - 0.5) * delta_y/nb_rank
2337 dc58ee6c Florent Chuffart
        tmp_y_prev = tmp_y[-length(tmp_y)]
2338 dc58ee6c Florent Chuffart
        tmp_y_next = tmp_y[-1]
2339 dc58ee6c Florent Chuffart
        tmp_y_inter = (tmp_y_prev + tmp_y_next) / 2
2340 dc58ee6c Florent Chuffart
        tmp_track = unlist(tf_nucs$track)
2341 dc58ee6c Florent Chuffart
        tmp_track_prev = tmp_track[-length(tmp_track)]
2342 dc58ee6c Florent Chuffart
        tmp_track_next = tmp_track[-1]
2343 6e0010bc Florent Chuffart
        # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
2344 e5603c3f Florent Chuffart
        if (is.null(config$TRACK_LLR_OFFSET)) {
2345 e5603c3f Florent Chuffart
          config$TRACK_LLR_OFFSET = 0
2346 6e0010bc Florent Chuffart
        }
2347 e5603c3f Florent Chuffart
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LLR_OFFSET * 25
2348 dc58ee6c Florent Chuffart
        tmp_x_prev = tmp_x[-length(tmp_x)]
2349 dc58ee6c Florent Chuffart
        tmp_x_next = tmp_x[-1]
2350 dc58ee6c Florent Chuffart
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
2351 dc58ee6c Florent Chuffart
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
2352 7e2d37e1 Florent Chuffart
        tmp_llr_inter =signif(unlist(tf_nucs$llr_score)[-1], 2)
2353 55c1cdff Florent Chuffart
        new_tmp_x = c()
2354 55c1cdff Florent Chuffart
        new_tmp_y = c()
2355 dc58ee6c Florent Chuffart
        index_odd = 1:length(tmp_x) * 2 - 1
2356 55c1cdff Florent Chuffart
        index_even = (1:(length(tmp_x) - 1)) * 2
2357 55c1cdff Florent Chuffart
        new_tmp_x[index_odd] = tmp_x
2358 dc58ee6c Florent Chuffart
        new_tmp_y[index_odd] = tmp_y
2359 dc58ee6c Florent Chuffart
        new_tmp_x[index_even] = tmp_x_inter
2360 dc58ee6c Florent Chuffart
        new_tmp_y[index_even] = tmp_y_inter
2361 ec2936ea Florent Chuffart
        lines(new_tmp_x , new_tmp_y, lwd=2)
2362 dc58ee6c Florent Chuffart
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
2363 ec2936ea Florent Chuffart
        points(tmp_x, tmp_y, cex=4, lwd=2)
2364 dc58ee6c Florent Chuffart
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
2365 e5603c3f Florent Chuffart
        if (is.null(config$LEGEND_LLR_POS)) {
2366 6e0010bc Florent Chuffart
          pos = 2
2367 6e0010bc Florent Chuffart
        } else {
2368 e5603c3f Florent Chuffart
          pos = config$LEGEND_LLR_POS
2369 6e0010bc Florent Chuffart
        }
2370 7e2d37e1 Florent Chuffart
        col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
2371 7e2d37e1 Florent Chuffart
        text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) 
2372 37b11e9b Florent Chuffart
        
2373 729c934e Florent Chuffart
      }
2374 55c1cdff Florent Chuffart
2375 9fdbfada Florent Chuffart
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
2376 729c934e Florent Chuffart
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
2377 9fdbfada Florent Chuffart
        strain = samples[[1]]$strain
2378 cc54d805 Florent Chuffart
        nb_tracks = length(replicates[[1]])
2379 cc54d805 Florent Chuffart
        # print(paste("**************"), nb_tracks)
2380 d973538c Florent Chuffart
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo", nb_tracks)
2381 9fdbfada Florent Chuffart
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
2382 9fdbfada Florent Chuffart
2383 9fdbfada Florent Chuffart
        if (plot_fuzzy_nucs) {
2384 9fdbfada Florent Chuffart
          fuzzy_map = fuzzy_maps[[strain]]
2385 4016229d Florent Chuffart
          if (!is.null(fuzzy_map)) {
2386 4016229d Florent Chuffart
            if (nrow(fuzzy_map) > 0) {
2387 4016229d Florent Chuffart
              rect(sign(x_min) * fuzzy_map$lower_bound + shift, y_min, sign(x_min) * fuzzy_map$upper_bound + shift, y_max, col=adjustcolor(3, alpha.f = 0.1), border=1)                    
2388 4016229d Florent Chuffart
            }
2389 d492bca4 Florent Chuffart
          }
2390 9fdbfada Florent Chuffart
        } 
2391 9fdbfada Florent Chuffart
2392 9fdbfada Florent Chuffart
        if (plot_wp_nucs) {
2393 9fdbfada Florent Chuffart
    			for (wp_nuc in wp_nucs) {
2394 9fdbfada Florent Chuffart
    				if (wp_nuc$wp){
2395 9fdbfada Florent Chuffart
    					rect(sign(x_min) * wp_nuc$lower_bound + shift, y_min, sign(x_min) * wp_nuc$upper_bound + shift, y_max, col=adjustcolor(2, alpha.f = 0.1), border=1)
2396 9fdbfada Florent Chuffart
    					if (plot_wp_nuc_model) {
2397 9fdbfada Florent Chuffart
      					all_original_reads = c()
2398 9fdbfada Florent Chuffart
      					for(initial_nuc in wp_nuc$nucs) {
2399 9fdbfada Florent Chuffart
      						all_original_reads = c(all_original_reads, initial_nuc$original_reads)
2400 9fdbfada Florent Chuffart
      					}
2401 9fdbfada Florent Chuffart
      					delta_x = wp_nuc$lower_bound:wp_nuc$upper_bound
2402 9fdbfada Florent Chuffart
    					  lines(sign(x_min) * delta_x + shift, dnorm(delta_x, mean(all_original_reads), sd(all_original_reads)) * length(all_original_reads) * height/5 + y_min, col=1)
2403 9fdbfada Florent Chuffart
    				  }
2404 729c934e Florent Chuffart
  				  }
2405 729c934e Florent Chuffart
  				}
2406 729c934e Florent Chuffart
  			}
2407 729c934e Florent Chuffart
      }
2408 6b5a528c Florent Chuffart
		}
2409 5bfac5a3 Florent Chuffart
	}
2410 55c1cdff Florent Chuffart
2411 5bfac5a3 Florent Chuffart
	if (plot_common_nucs) {
2412 9fdbfada Florent Chuffart
    if (is.null(aligned_inter_strain_nucs)) {
2413 9fdbfada Florent Chuffart
      aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
2414 b69443b5 Florent Chuffart
      if (!is.null(aligned_inter_strain_nucs)) {
2415 4016229d Florent Chuffart
        aligned_inter_strain_nucs$cur_index = "foo" 
2416 b69443b5 Florent Chuffart
      }
2417 9fdbfada Florent Chuffart
    }
2418 55c1cdff Florent Chuffart
2419 9fdbfada Florent Chuffart
    #Plot common wp nucs
2420 9fdbfada Florent Chuffart
    mid_y = shift = x_min = x_max = nb_rank = base = ylim = ymin = y_max = delta_y = list()
2421 9fdbfada Florent Chuffart
    for (replicate_rank in 1:length(replicates)) {
2422 9fdbfada Florent Chuffart
      nb_rank[[replicate_rank]] = length(samples)
2423 9fdbfada Florent Chuffart
      base[[replicate_rank]] = (replicate_rank-1) * height * nb_rank[[replicate_rank]]
2424 9fdbfada Florent Chuffart
      ylim[[replicate_rank]] = c(base[[replicate_rank]], base[[replicate_rank]] + height * nb_rank[[replicate_rank]])
2425 9fdbfada Florent Chuffart
      y_min[[replicate_rank]] = min(ylim[[replicate_rank]])
2426 9fdbfada Florent Chuffart
      y_max[[replicate_rank]] = max(ylim[[replicate_rank]])
2427 9fdbfada Florent Chuffart
      delta_y[[replicate_rank]] = y_max[[replicate_rank]] - y_min[[replicate_rank]]
2428 9fdbfada Florent Chuffart
      mid_y[[replicate_rank]] = (y_max[[replicate_rank]] + y_min[[replicate_rank]]) / 2
2429 9fdbfada Florent Chuffart
2430 9fdbfada Florent Chuffart
      samples = replicates[[replicate_rank]]
2431 9fdbfada Florent Chuffart
      for (sample_rank in 1:length(samples)) {
2432 9fdbfada Florent Chuffart
        sample = samples[[sample_rank]]
2433 9fdbfada Florent Chuffart
        y_lev = y_min[[replicate_rank]] + (sample_rank - 0.5) * delta_y[[replicate_rank]]/nb_rank[[replicate_rank]]
2434 9fdbfada Florent Chuffart
        if (sample$roi[["begin"]] < sample$roi[["end"]]) {
2435 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = sample$roi[["begin"]]
2436 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = sample$roi[["end"]]
2437 9fdbfada Florent Chuffart
        } else {
2438 9fdbfada Florent Chuffart
          x_min[[replicate_rank]] = - sample$roi[["begin"]]
2439 9fdbfada Florent Chuffart
          x_max[[replicate_rank]] = - sample$roi[["end"]]
2440 6b5a528c Florent Chuffart
        }
2441 9fdbfada Florent Chuffart
        shift[[replicate_rank]] = x_min[[1]] - x_min[[replicate_rank]]
2442 55c1cdff Florent Chuffart
      }
2443 9fdbfada Florent Chuffart
    }
2444 b69443b5 Florent Chuffart
    print(aligned_inter_strain_nucs)
2445 b69443b5 Florent Chuffart
    if (!is.null(aligned_inter_strain_nucs)) {
2446 b69443b5 Florent Chuffart
      for (inter_strain_nuc_index in 1:length(aligned_inter_strain_nucs[,1])) {
2447 b69443b5 Florent Chuffart
        inter_strain_nuc = aligned_inter_strain_nucs[inter_strain_nuc_index,]
2448 b69443b5 Florent Chuffart
        tmp_xs = tmp_ys = c()
2449 b69443b5 Florent Chuffart
        for (replicate_rank in 1:length(replicates)) {
2450 b69443b5 Florent Chuffart
          samples = replicates[[replicate_rank]]
2451 b69443b5 Florent Chuffart
          strain = samples[[1]]$strain
2452 b69443b5 Florent Chuffart
          tmp_xs = c(tmp_xs, sign(x_min[[replicate_rank]]) * (inter_strain_nuc[[paste("lower_bound_",strain,sep="")]] + inter_strain_nuc[[paste("upper_bound_",strain,sep="")]])/2 + shift[[replicate_rank]])
2453 b69443b5 Florent Chuffart
          tmp_ys = c(tmp_ys, mid_y[[replicate_rank]])
2454 b69443b5 Florent Chuffart
        }
2455 37b11e9b Florent Chuffart
        lines(tmp_xs, tmp_ys, col=2, type="b", lwd=dev.size()[1]*100/(x_max[[1]]-x_min[[1]])*8, cex=dev.size()[1]*400/(x_max[[1]]-x_min[[1]]), pch=19)
2456 55c1cdff Florent Chuffart
      }
2457 9fdbfada Florent Chuffart
    }
2458 9fdbfada Florent Chuffart
    
2459 9fdbfada Florent Chuffart
    if (plot_common_unrs) {
2460 9fdbfada Florent Chuffart
      combi = c(replicates[[1]][[1]]$strain, replicates[[2]][[1]]$strain)
2461 9fdbfada Florent Chuffart
      roi = as.list(samples[[1]]$roi)
2462 4016229d Florent Chuffart
      cur_index = "foo"
2463 9fdbfada Florent Chuffart
      common_nuc_results = list()
2464 9fdbfada Florent Chuffart
      common_nuc_results[[paste(combi[1], combi[2], sep="_")]] = aligned_inter_strain_nucs
2465 4016229d Florent Chuffart
      unrs = get_unrs(combi, roi, cur_index, wp_maps, fuzzy_maps, common_nuc_results, config = config) 
2466 ec2936ea Florent Chuffart
      rect(sign(x_min[[1]]) * unrs$lower_bound + shift[[1]], y_min[[1]], sign(x_min[[1]]) * unrs$upper_bound + shift[[1]], y_max[[2]], border=4, lwd=10, col=adjustcolor(4, alpha.f = 0.05))        
2467 9fdbfada Florent Chuffart
    }
2468 9fdbfada Florent Chuffart
2469 6b5a528c Florent Chuffart
	}
2470 6b5a528c Florent Chuffart
  return(returned_list)
2471 6b5a528c Florent Chuffart
}
2472 6b5a528c Florent Chuffart
2473 6b5a528c Florent Chuffart
2474 6b5a528c Florent Chuffart
2475 9fdbfada Florent Chuffart
get_intra_strain_fuzzy = function(# Compute the fuzzy list for a given strain.
2476 9fdbfada Florent Chuffart
### This function grabs the nucleosomes detxted by template_filter that have been rejected bt aggregate_intra_strain_nucs as well positions.
2477 0a13b5e3 Florent Chuffart
wp_map, ##<< Well positionned nucleosomes map.
2478 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2479 9fdbfada Florent Chuffart
strain, ##<< The strain we want to extracvt the fuzzy map.
2480 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable.
2481 9fdbfada Florent Chuffart
) {
2482 9fdbfada Florent Chuffart
  fuzzy_map = wp_map[wp_map$wp==0, ]
2483 9fdbfada Florent Chuffart
  if (nrow(fuzzy_map) > 0) {
2484 9fdbfada Florent Chuffart
    fuzzy_map = substract_region(fuzzy_map, wp_map[wp_map$wp==1,])
2485 9fdbfada Florent Chuffart
    if (!is.null(fuzzy_map)) {
2486 9fdbfada Florent Chuffart
      fuzzy_map = union_regions(fuzzy_map)
2487 9fdbfada Florent Chuffart
      fuzzy_map = crop_fuzzy(fuzzy_map, roi, strain, config)
2488 9fdbfada Florent Chuffart
    }
2489 9fdbfada Florent Chuffart
  }
2490 9fdbfada Florent Chuffart
  return(fuzzy_map)
2491 9fdbfada Florent Chuffart
}
2492 9fdbfada Florent Chuffart
2493 9fdbfada Florent Chuffart
2494 9fdbfada Florent Chuffart
2495 9fdbfada Florent Chuffart
2496 9fdbfada Florent Chuffart
2497 9fdbfada Florent Chuffart
get_unrs = function(# Compute the unaligned nucleosomal regions (UNRs).
2498 9fdbfada Florent Chuffart
### This function aggregate non common wp nucs for each strain and substract common wp nucs. It does not take care about the size of the resulting UNR. It will be take into account in the count read part og the pipeline.
2499 9fdbfada Florent Chuffart
combi, ##<< The strain combination to consider.
2500 9fdbfada Florent Chuffart
roi, ##<< The region of interest.
2501 4016229d Florent Chuffart
cur_index, ##<< The region of interest index.
2502 9fdbfada Florent Chuffart
wp_maps, ##<< Well positionned nucleosomes maps.
2503 9fdbfada Florent Chuffart
fuzzy_maps, ##<< Fuzzy nucleosomes maps.
2504 9fdbfada Florent Chuffart
common_nuc_results, ##<< Common wp nuc maps
2505 9fdbfada Florent Chuffart
config=NULL ##<< GLOBAL config variable
2506 9fdbfada Florent Chuffart
) {
2507 4016229d Florent Chuffart
  # print(cur_index)
2508 9fdbfada Florent Chuffart
2509 9fdbfada Florent Chuffart
  tmp_combi_key = paste(combi[1], combi[2], sep="_")
2510 9fdbfada Florent Chuffart
  tmp_common_nucs = common_nuc_results[[tmp_combi_key]]
2511 4016229d Florent Chuffart
  tmp_common_nucs = tmp_common_nucs[tmp_common_nucs$cur_index == cur_index, ]
2512 9fdbfada Florent Chuffart
2513 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from", combi[1]))
2514 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[1]]]
2515 4016229d Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2516 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2517 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2518 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2519 9fdbfada Florent Chuffart
  # Let's go!
2520 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2521 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2522 9fdbfada Florent Chuffart
      return(NULL)
2523 9fdbfada Florent Chuffart
    } else {
2524 9fdbfada Florent Chuffart
      return (index_nuc)
2525 9fdbfada Florent Chuffart
    }
2526 9fdbfada Florent Chuffart
  }))
2527 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2528 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2529 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2530 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2531 9fdbfada Florent Chuffart
  }
2532 9fdbfada Florent Chuffart
  tmp_unr_nucs_1 = tmp_unr
2533 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_1[,1]) == 0) {return(NULL)}
2534 9fdbfada Florent Chuffart
  agg_unr_1 = tmp_unr_nucs_1
2535 9fdbfada Florent Chuffart
2536 f0703205 Florent Chuffart
  # print(paste("Dealing with unr from ", combi[2]))
2537 9fdbfada Florent Chuffart
  tmp_fuzzy = fuzzy_maps[[combi[2]]]
2538 4016229d Florent Chuffart
  tmp_fuzzy = tmp_fuzzy[tmp_fuzzy$cur_index == cur_index, ]
2539 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2540 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2541 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2542 9fdbfada Florent Chuffart
  # Let's go!
2543 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2544 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2545 9fdbfada Florent Chuffart
      return(NULL)
2546 9fdbfada Florent Chuffart
    } else {
2547 9fdbfada Florent Chuffart
      return (index_nuc)
2548 9fdbfada Florent Chuffart
    }
2549 9fdbfada Florent Chuffart
  }))
2550 9fdbfada Florent Chuffart
  tmp_unaligned_wp = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2551 9fdbfada Florent Chuffart
  tmp_unr = rbind(tmp_fuzzy,tmp_unaligned_wp[,1:4])
2552 9fdbfada Florent Chuffart
  if (length(tmp_unr) != 0) {
2553 9fdbfada Florent Chuffart
    tmp_unr = union_regions(tmp_unr)
2554 9fdbfada Florent Chuffart
  }
2555 9fdbfada Florent Chuffart
  tmp_unr_nucs_2 = tmp_unr
2556 9fdbfada Florent Chuffart
  if (length(tmp_unr_nucs_2[,1]) == 0) {return(NULL)}
2557 9fdbfada Florent Chuffart
  agg_unr_2 = crop_fuzzy(tmp_unr_nucs_2, roi, combi[2], config)
2558 4016229d Florent Chuffart
  tr_agg_unr_2 = translate_regions(agg_unr_2, combi, cur_index, roi=roi, config=config)
2559 9fdbfada Florent Chuffart
  tr_agg_unr_2 = union_regions(tr_agg_unr_2)
2560 9fdbfada Florent Chuffart
2561 f0703205 Florent Chuffart
  # print("Dealing with unr from both...")
2562 9fdbfada Florent Chuffart
  all_unr = union_regions(rbind(agg_unr_1, tr_agg_unr_2))
2563 9fdbfada Florent Chuffart
2564 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[1]))
2565 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[1]]]
2566 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2567 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2568 9fdbfada Florent Chuffart
  # Let's go!
2569 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2570 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[1], sep="_")]]) {
2571 9fdbfada Florent Chuffart
      return (index_nuc)
2572 9fdbfada Florent Chuffart
    } else {
2573 9fdbfada Florent Chuffart
      return(NULL)
2574 9fdbfada Florent Chuffart
    }
2575 9fdbfada Florent Chuffart
  }))
2576 9fdbfada Florent Chuffart
  wp_nucs_1 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2577 9fdbfada Florent Chuffart
  
2578 f0703205 Florent Chuffart
  # print(paste("Dealing with wp from", combi[2]))
2579 9fdbfada Florent Chuffart
  tmp_wp = wp_maps[[combi[2]]]
2580 9fdbfada Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$wp==1,]
2581 4016229d Florent Chuffart
  tmp_wp = tmp_wp[tmp_wp$cur_index == cur_index, ]
2582 9fdbfada Florent Chuffart
  # Let's go!
2583 9fdbfada Florent Chuffart
  tmp_index = unlist(apply(t(tmp_wp$index_nuc), 2, function(index_nuc) {
2584 9fdbfada Florent Chuffart
    if (index_nuc %in% tmp_common_nucs[[paste("index_nuc", combi[2], sep="_")]]) {
2585 9fdbfada Florent Chuffart
      return (index_nuc)
2586 9fdbfada Florent Chuffart
    } else {
2587 9fdbfada Florent Chuffart
      return(NULL)
2588 9fdbfada Florent Chuffart
    }
2589 9fdbfada Florent Chuffart
  }))
2590 9fdbfada Florent Chuffart
  wp_nucs_2 = tmp_wp[tmp_wp$index_nuc %in% tmp_index, ]
2591 9fdbfada Florent Chuffart
  wp_nucs_2 = crop_fuzzy(wp_nucs_2, roi, combi[2], config)
2592 9fdbfada Florent Chuffart
  if (nrow(wp_nucs_2) == 0) {
2593 9fdbfada Florent Chuffart
    tr_wp_nucs_2 = wp_nucs_2
2594 9fdbfada Florent Chuffart
  } else {
2595 4016229d Florent Chuffart
    tr_wp_nucs_2 = translate_regions(wp_nucs_2, combi, cur_index, roi=roi, config=config)      
2596 9fdbfada Florent Chuffart
  }
2597 9fdbfada Florent Chuffart
2598 f0703205 Florent Chuffart
  # print("Dealing with wp from both...")
2599 9fdbfada Florent Chuffart
  all_wp = union_regions(rbind(wp_nucs_1[,1:4], tr_wp_nucs_2))
2600 9fdbfada Florent Chuffart
2601 f0703205 Florent Chuffart
  # print("Dealing with unr and wp...")
2602 9fdbfada Florent Chuffart
  non_inter_unr = substract_region(all_unr, all_wp)
2603 9fdbfada Florent Chuffart
  non_inter_unr = crop_fuzzy(non_inter_unr, roi, combi[1], config)
2604 9fdbfada Florent Chuffart
  if (is.null(non_inter_unr)) { return(NULL) }
2605 9fdbfada Florent Chuffart
  non_inter_unr$len = non_inter_unr$upper_bound - non_inter_unr$lower_bound
2606 37b11e9b Florent Chuffart
  min_unr_width = 75
2607 37b11e9b Florent Chuffart
  non_inter_unr = non_inter_unr[non_inter_unr$len >= min_unr_width,]
2608 9fdbfada Florent Chuffart
2609 9fdbfada Florent Chuffart
  non_inter_unr$index_nuc = 1:length(non_inter_unr[,1])
2610 9fdbfada Florent Chuffart
  return (non_inter_unr)
2611 9fdbfada Florent Chuffart
}
2612 9fdbfada Florent Chuffart
2613 9fdbfada Florent Chuffart