Révision d973538c
b/doc/Chuffart_NM2_sweave/Chuffart_NM2_sweave.Rnw | ||
---|---|---|
41 | 41 |
config = fromJSON(paste(readLines(json_conf_file), collapse="")) |
42 | 42 |
|
43 | 43 |
# Read sample file |
44 |
all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
|
|
44 |
all_samples = get_content(config$CSV_SAMPLE_FILE, "csv", sep=";", head=TRUE, stringsAsFactors=FALSE)
|
|
45 | 45 |
|
46 | 46 |
# Remove samples that seems to be eronous for some manipulation efficency reasons e.g. PCR |
47 | 47 |
all_samples = all_samples[!(all_samples$id %in% c(33,45,48,55)), ] |
... | ... | |
705 | 705 |
print(nb_iso/sum(check_iso_snep$snep_index)) |
706 | 706 |
|
707 | 707 |
|
708 |
wpunr_mnase = read.table( paste(config$RESULTS_DIR, "/full/" ,combi[1],"_",combi[2],"_wp_mnase.tab",sep=""), header=TRUE) |
|
708 |
wpunr_mnase = read.table( paste(config$RESULTS_DIR, "/full/" ,combi[1],"_",combi[2],"_wpunr_mnase.tab",sep=""), header=TRUE)
|
|
709 | 709 |
head(wpunr_mnase) |
710 | 710 |
|
711 | 711 |
sf = read.table(paste(config$RESULTS_DIR,"/full/size_factors.tab",sep=""), header=TRUE) |
b/doc/Chuffart_NM2_workdir/src/current/configurator.py | ||
---|---|---|
3 | 3 |
import json |
4 | 4 |
|
5 | 5 |
CSV_SAMPLE_FILE = None |
6 |
"""Path to cvs file that contains sample information."""
|
|
6 |
"""Path to csv file that contains sample information."""
|
|
7 | 7 |
if __name__ == "__main__": |
8 | 8 |
CSV_SAMPLE_FILE = "data/samples.csv" |
9 | 9 |
|
... | ... | |
98 | 98 |
] |
99 | 99 |
} |
100 | 100 |
|
101 |
NEUTRAL_TRANSLATE_CUR = None |
|
102 |
"""Desactivate the genome translation facilities""" |
|
103 |
if __name__ == "__main__": |
|
104 |
NEUTRAL_TRANSLATE_CUR = False |
|
105 |
|
|
106 |
|
|
101 | 107 |
READ_LENGTH = None |
102 | 108 |
"""Length of Illumina reads.""" |
103 | 109 |
if __name__ == "__main__": |
b/doc/Chuffart_NM2_workdir/src/current/extract_maps.R | ||
---|---|---|
46 | 46 |
} |
47 | 47 |
|
48 | 48 |
print("Running bot engine...") |
49 |
run_engine(tasks, extract_maps, debug = substr(Sys.info()[["nodename"]],1,7) == "stainer", rm_starter = TRUE, log_dir = "log", nb_proc=NULL)
|
|
49 |
run_engine(tasks, extract_maps, debug = substr(Sys.info()[["nodename"]],1,7) == "stainer", rm_starter = FALSE, log_dir = "log", nb_proc=NULL)
|
|
50 | 50 |
|
51 | 51 |
|
52 | 52 |
|
... | ... | |
107 | 107 |
for (strain in strains) { |
108 | 108 |
print(paste("Collecting mpas for", strain, "...")) |
109 | 109 |
partial_strain_maps = aggregated_intra_strain_nucs[[strain]][[1]] |
110 |
tmp_nuc_map = flat_aggregated_intra_strain_nucs(partial_strain_maps, cur_index) |
|
110 |
print(strain) |
|
111 |
nb_tracks = sum(all_samples$strain == strain & all_samples$marker == "Mnase_Seq") |
|
112 |
|
|
113 |
tmp_nuc_map = flat_aggregated_intra_strain_nucs(partial_strain_maps, cur_index, nb_tracks) |
|
111 | 114 |
|
112 | 115 |
wp_indexes = which(tmp_nuc_map$wp == 1) |
113 | 116 |
tmp_wp_llr = apply(t(wp_indexes), 2, function(wp_index){ |
114 |
tmp_wp = partial_strain_maps[[wp_index]] |
|
115 |
res = llr_score_nvecs(list(tmp_wp$nucs[[1]]$original_reads, tmp_wp$nucs[[2]]$original_reads, tmp_wp$nucs[[3]]$original_reads)) |
|
117 |
tmp_wp = partial_strain_maps[[wp_index]] |
|
118 |
l = lapply(tmp_wp$nucs, function(tmp_wp_nucs){ |
|
119 |
tmp_wp_nucs$original_reads |
|
120 |
}) |
|
121 |
res = llr_score_nvecs(l) |
|
116 | 122 |
return(res) |
117 | 123 |
}) |
118 | 124 |
|
119 | 125 |
tmp_dyad_shift = apply(t(wp_indexes), 2, function(wp_index){ |
120 | 126 |
tmp_wp = partial_strain_maps[[wp_index]] |
121 |
ds = max(c(tmp_wp$nucs[[1]]$center, tmp_wp$nucs[[2]]$center, tmp_wp$nucs[[3]]$center)) - min(c(tmp_wp$nucs[[1]]$center, tmp_wp$nucs[[2]]$center, tmp_wp$nucs[[3]]$center)) |
|
127 |
c = sapply(tmp_wp$nucs, function(tmp_wp_nucs){ |
|
128 |
tmp_wp_nucs$center |
|
129 |
}) |
|
130 |
ds = max(c) - min(c) |
|
122 | 131 |
return(ds) |
123 | 132 |
}) |
124 | 133 |
|
b/doc/Chuffart_NM2_workdir/src/current/headers.R | ||
---|---|---|
5 | 5 |
json_conf_file = "src/current/nucleominer_config.json" |
6 | 6 |
config = fromJSON(paste(readLines(json_conf_file), collapse="")) |
7 | 7 |
|
8 |
all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
|
|
8 |
all_samples = get_content(config$CSV_SAMPLE_FILE, "csv", sep=";", head=TRUE, stringsAsFactors=FALSE)
|
|
9 | 9 |
all_samples = all_samples[!(all_samples$id %in% c(33,45,48,55)), ] |
10 | 10 |
|
11 | 11 |
markers = unique(all_samples$marker) |
b/doc/sphinx_doc/Makefile | ||
---|---|---|
112 | 112 |
|
113 | 113 |
text: |
114 | 114 |
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text |
115 |
cat build/text/index.txt build/text/readme.txt build/text/tuto.txt ../../README |
|
115 |
cat build/text/index.txt build/text/readme.txt build/text/tuto.txt > ../../README
|
|
116 | 116 |
@echo |
117 | 117 |
@echo "Build finished. The text files are in $(BUILDDIR)/text." |
118 | 118 |
|
b/doc/sphinx_doc/conf.py | ||
---|---|---|
50 | 50 |
# built documents. |
51 | 51 |
# |
52 | 52 |
# The short X.Y version. |
53 |
version = '2.3.46'
|
|
53 |
version = '2.3.47'
|
|
54 | 54 |
# The full version, including alpha/beta/rc tags. |
55 |
release = '2.3.46'
|
|
55 |
release = '2.3.47'
|
|
56 | 56 |
|
57 | 57 |
# The language for content autogenerated by Sphinx. Refer to documentation |
58 | 58 |
# for a list of supported languages. |
b/doc/sphinx_doc/rref.rst | ||
---|---|---|
690 | 690 |
:: |
691 | 691 |
|
692 | 692 |
filter_tf_inputs(inputs, chr, x_min, x_max, nuc_width = 160, |
693 |
only_f = FALSE, only_r = FALSE, filter_for_coverage = FALSE) |
|
693 |
only_f = FALSE, only_r = FALSE, filter_for_coverage = FALSE, |
|
694 |
USE_DPLYR = TRUE) |
|
694 | 695 |
|
695 | 696 |
Arguments |
696 | 697 |
~~~~~~~~~ |
... | ... | |
727 | 728 |
|
728 | 729 |
Does it filter for plot coverage? |
729 | 730 |
|
731 |
``USE_DPLYR`` |
|
732 |
|
|
733 |
Use dplyr lib to filter reads. |
|
734 |
|
|
730 | 735 |
Value |
731 | 736 |
~~~~~ |
732 | 737 |
|
... | ... | |
814 | 819 |
|
815 | 820 |
:: |
816 | 821 |
|
817 |
flat_aggregated_intra_strain_nucs(partial_strain_maps, cur_index) |
|
822 |
flat_aggregated_intra_strain_nucs(partial_strain_maps, cur_index, |
|
823 |
nb_tracks = 3) |
|
818 | 824 |
|
819 | 825 |
Arguments |
820 | 826 |
~~~~~~~~~ |
... | ... | |
827 | 833 |
|
828 | 834 |
the index of the roi involved |
829 | 835 |
|
836 |
``nb_tracks`` |
|
837 |
|
|
838 |
the number of replicates |
|
839 |
|
|
830 | 840 |
Value |
831 | 841 |
~~~~~ |
832 | 842 |
|
... | ... | |
1251 | 1261 |
+---------------+---------------------------------------------------+ |
1252 | 1262 |
| Author: | Florent Chuffart | |
1253 | 1263 |
+---------------+---------------------------------------------------+ |
1254 |
| Version: | 2.3.46 |
|
|
1264 |
| Version: | 2.3.47 |
|
|
1255 | 1265 |
+---------------+---------------------------------------------------+ |
1256 | 1266 |
| License: | CeCILL | |
1257 | 1267 |
+---------------+---------------------------------------------------+ |
1258 | 1268 |
| Title: | nm | |
1259 | 1269 |
+---------------+---------------------------------------------------+ |
1260 |
| Depends: | seqinr, plotrix, DESeq, cachecache |
|
|
1270 |
| Depends: | seqinr, plotrix, DESeq, cachecache, dplyr |
|
|
1261 | 1271 |
+---------------+---------------------------------------------------+ |
1262 | 1272 |
|
1263 | 1273 |
Author(s) |
b/src/DESCRIPTION | ||
---|---|---|
1 | 1 |
Package: nucleominer |
2 | 2 |
Maintainer: Florent Chuffart <florent.chuffart@ens-lyon.fr> |
3 | 3 |
Author: Florent Chuffart |
4 |
Version: 2.3.46
|
|
4 |
Version: 2.3.47
|
|
5 | 5 |
License: CeCILL |
6 | 6 |
Title: nm |
7 |
Depends: seqinr, plotrix, DESeq, cachecache |
|
7 |
Depends: seqinr, plotrix, DESeq, cachecache, dplyr
|
|
8 | 8 |
Description: It provides a set of useful functions allowing to perform quantitative analysis of nucleosomal epigenome. |
b/src/R/nucleominer.R | ||
---|---|---|
153 | 153 |
nuc_width = 160, ##<< Nucleosome width. |
154 | 154 |
only_f = FALSE, ##<< Filter only F reads. |
155 | 155 |
only_r = FALSE, ##<< Filter only R reads. |
156 |
filter_for_coverage = FALSE ##<< Does it filter for plot coverage? |
|
156 |
filter_for_coverage = FALSE, ##<< Does it filter for plot coverage? |
|
157 |
USE_DPLYR = TRUE ##<< Use dplyr lib to filter reads. |
|
157 | 158 |
) { |
158 |
if (only_f) { |
|
159 |
inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,] |
|
160 |
} else if (only_r) { |
|
161 |
inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,] |
|
162 |
} else { |
|
163 |
inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,] |
|
164 |
} |
|
159 |
n = names(inputs) |
|
160 |
|
|
161 |
if (!USE_DPLYR) { |
|
162 |
if (only_f) { |
|
163 |
inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,] |
|
164 |
} else if (only_r) { |
|
165 |
inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,] |
|
166 |
} else { |
|
167 |
inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,] |
|
168 |
} |
|
169 |
} else { |
|
170 |
names(inputs) = c("chr", "pos", "str", "lev") |
|
171 |
if (only_f) { |
|
172 |
inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, str == "F", pos <= x_max + nuc_width) |
|
173 |
} else if (only_r) { |
|
174 |
inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, str == "R" & pos <= x_max + nuc_width) |
|
175 |
} else { |
|
176 |
inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, pos <= x_max + nuc_width) |
|
177 |
} |
|
178 |
# if (!filter_for_coverage) { |
|
179 |
# inputs$corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3]) |
|
180 |
# inputs = filter(inputs, chr == chr, corrected_inputs_coords >= x_min, corrected_inputs_coords <= x_max) |
|
181 |
# inputs$corrected_inputs_coords = NULL |
|
182 |
# } |
|
183 |
} |
|
184 |
|
|
165 | 185 |
if (!filter_for_coverage) { |
166 |
corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
|
|
167 |
inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
|
|
186 |
corrected_inputs_coords = inputs_out[,2] + nuc_width/2 * sign_from_strand(inputs_out[,3])
|
|
187 |
inputs_out = inputs_out[inputs_out[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
|
|
168 | 188 |
} |
169 |
return(inputs) |
|
189 |
|
|
190 |
names(inputs_out) = n |
|
191 |
return(inputs_out) |
|
170 | 192 |
### Returns filtred inputs. |
171 | 193 |
} |
172 | 194 |
|
... | ... | |
205 | 227 |
} |
206 | 228 |
return(TRUE) |
207 | 229 |
} |
208 |
store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
|
|
230 |
store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center) {
|
|
209 | 231 |
if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) { |
210 | 232 |
new_cluster$wp = TRUE |
211 | 233 |
center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2 |
... | ... | |
248 | 270 |
} |
249 | 271 |
i = i+1 |
250 | 272 |
} |
273 |
nb_tracks = length(tf_outs) |
|
251 | 274 |
# print(track_readers) |
252 | 275 |
new_cluster = NULL |
253 | 276 |
nb_nucs_in_cluster = 0 |
254 | 277 |
nuc_from_track = c() |
255 |
for (i in 1:length(tf_outs)){
|
|
278 |
for (i in 1:nb_tracks){
|
|
256 | 279 |
nuc_from_track[i] = FALSE |
257 | 280 |
} |
258 | 281 |
# Start clustering |
... | ... | |
297 | 320 |
} else { |
298 | 321 |
if (!is.null(new_cluster)) { |
299 | 322 |
# store old cluster |
300 |
clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
|
|
323 |
clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
|
|
301 | 324 |
} |
302 | 325 |
# Reinit current cluster composition stuff |
303 | 326 |
nb_nucs_in_cluster = 0 |
304 | 327 |
nuc_from_track = c() |
305 |
for (i in 1:length(tf_outs)){
|
|
328 |
for (i in 1:nb_tracks){
|
|
306 | 329 |
nuc_from_track[i] = FALSE |
307 | 330 |
} |
308 | 331 |
# create new cluster |
... | ... | |
331 | 354 |
# store last cluster |
332 | 355 |
if (!is.null(new_cluster)) { |
333 | 356 |
# store old cluster |
334 |
clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
|
|
357 |
clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
|
|
335 | 358 |
} |
336 | 359 |
return(list(clusters, llr_scores)) |
337 | 360 |
### Returns a list of clusterized nucleosomes, and all computed llr scores. |
... | ... | |
364 | 387 |
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output |
365 | 388 |
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function. |
366 | 389 |
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function |
367 |
cur_index ##<< the index of the roi involved |
|
390 |
cur_index, ##<< the index of the roi involved |
|
391 |
nb_tracks=3 ##<< the number of replicates |
|
368 | 392 |
) { |
369 | 393 |
if (length(partial_strain_maps) == 0 ){ |
370 | 394 |
print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." )) |
... | ... | |
386 | 410 |
tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads) |
387 | 411 |
tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs) |
388 | 412 |
if (tmp_nuc$wp) { |
389 |
tmp_nuc_as_list[["llr_1"]] = signif(tmp_nuc$nucs[[2]]$llr_score,5) |
|
390 |
tmp_nuc_as_list[["llr_2"]] = signif(tmp_nuc$nucs[[3]]$llr_score,5) |
|
413 |
for (i in 1:(nb_tracks-1)) { |
|
414 |
tmp_nuc_as_list[[paste("llr", i, sep="_")]] = signif(tmp_nuc$nucs[[i + 1]]$llr_score,5) |
|
415 |
} |
|
391 | 416 |
} else { |
392 |
tmp_nuc_as_list[["llr_1"]] = NA |
|
393 |
tmp_nuc_as_list[["llr_2"]] = NA |
|
417 |
for (i in 1:(nb_tracks-1)) { |
|
418 |
tmp_nuc_as_list[[paste("llr", i, sep="_")]] = NA |
|
419 |
} |
|
394 | 420 |
} |
395 | 421 |
return(tmp_nuc_as_list) |
396 | 422 |
}) |
... | ... | |
1141 | 1167 |
config=NULL, ##<< GLOBAL config variable |
1142 | 1168 |
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed. |
1143 | 1169 |
) { |
1144 |
strain1 = roi$strain_ref |
|
1145 |
if (strain1 == strain2) { |
|
1146 |
roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1 |
|
1170 |
strain1 = roi$strain_ref |
|
1171 |
# Do something or nothing? |
|
1172 |
if (is.null(config$NEUTRAL_TRANSLATE_CUR)) { |
|
1173 |
config$NEUTRAL_TRANSLATE_CUR = FALSE |
|
1174 |
} |
|
1175 |
if (strain1 == strain2 | config$NEUTRAL_TRANSLATE_CUR) { |
|
1176 |
roi$strain_ref = strain2 |
|
1177 |
roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) |
|
1147 | 1178 |
return(roi) |
1148 | 1179 |
} |
1149 |
|
|
1180 |
|
|
1150 | 1181 |
# Extract c2c file |
1151 | 1182 |
if (!is.null(big_cur)) { |
1152 | 1183 |
# Dealing with big_cur |
... | ... | |
1964 | 1995 |
if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) { |
1965 | 1996 |
replicates_wp_nucs[[replicate_rank]] = wp_nucs |
1966 | 1997 |
strain = samples[[1]]$strain |
1967 |
wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo") |
|
1998 |
wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo", nb_tracks)
|
|
1968 | 1999 |
fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config) |
1969 | 2000 |
|
1970 | 2001 |
if (plot_fuzzy_nucs) { |
Formats disponibles : Unified diff