Révision d973538c

b/doc/Chuffart_NM2_sweave/Chuffart_NM2_sweave.Rnw
41 41
  config = fromJSON(paste(readLines(json_conf_file), collapse=""))
42 42

  
43 43
  # Read sample file
44
  all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)  
44
  all_samples = get_content(config$CSV_SAMPLE_FILE, "csv", sep=";", head=TRUE, stringsAsFactors=FALSE)  
45 45
  
46 46
  # Remove samples that seems to be eronous for some manipulation efficency reasons e.g. PCR
47 47
  all_samples = all_samples[!(all_samples$id %in% c(33,45,48,55)), ]
......
705 705
print(nb_iso/sum(check_iso_snep$snep_index))
706 706

  
707 707

  
708
wpunr_mnase = read.table( paste(config$RESULTS_DIR, "/full/" ,combi[1],"_",combi[2],"_wp_mnase.tab",sep=""), header=TRUE)
708
wpunr_mnase = read.table( paste(config$RESULTS_DIR, "/full/" ,combi[1],"_",combi[2],"_wpunr_mnase.tab",sep=""), header=TRUE)
709 709
head(wpunr_mnase)
710 710

  
711 711
sf = read.table(paste(config$RESULTS_DIR,"/full/size_factors.tab",sep=""), header=TRUE)        
b/doc/Chuffart_NM2_workdir/src/current/configurator.py
3 3
import json
4 4

  
5 5
CSV_SAMPLE_FILE = None
6
"""Path to cvs file that contains sample information."""
6
"""Path to csv file that contains sample information."""
7 7
if __name__ == "__main__":
8 8
  CSV_SAMPLE_FILE = "data/samples.csv"
9 9

  
......
98 98
    ]
99 99
  }
100 100

  
101
NEUTRAL_TRANSLATE_CUR = None
102
"""Desactivate the genome translation facilities"""
103
if __name__ == "__main__":
104
  NEUTRAL_TRANSLATE_CUR = False
105

  
106

  
101 107
READ_LENGTH = None
102 108
"""Length of Illumina reads."""
103 109
if __name__ == "__main__":
b/doc/Chuffart_NM2_workdir/src/current/extract_maps.R
46 46
}
47 47

  
48 48
print("Running bot engine...")
49
run_engine(tasks, extract_maps, debug = substr(Sys.info()[["nodename"]],1,7) == "stainer", rm_starter = TRUE, log_dir = "log", nb_proc=NULL)
49
run_engine(tasks, extract_maps, debug = substr(Sys.info()[["nodename"]],1,7) == "stainer", rm_starter = FALSE, log_dir = "log", nb_proc=NULL)
50 50

  
51 51

  
52 52

  
......
107 107
			for (strain in strains) {
108 108
				print(paste("Collecting mpas for", strain, "..."))
109 109
				partial_strain_maps = aggregated_intra_strain_nucs[[strain]][[1]]
110
        tmp_nuc_map = flat_aggregated_intra_strain_nucs(partial_strain_maps, cur_index)
110
        print(strain)
111
        nb_tracks = sum(all_samples$strain == strain & all_samples$marker == "Mnase_Seq")
112
        
113
        tmp_nuc_map = flat_aggregated_intra_strain_nucs(partial_strain_maps, cur_index, nb_tracks)
111 114

  
112 115
        wp_indexes = which(tmp_nuc_map$wp == 1)
113 116
        tmp_wp_llr = apply(t(wp_indexes), 2, function(wp_index){
114
          tmp_wp = partial_strain_maps[[wp_index]]  
115
    			res = llr_score_nvecs(list(tmp_wp$nucs[[1]]$original_reads, tmp_wp$nucs[[2]]$original_reads, tmp_wp$nucs[[3]]$original_reads))
117
          tmp_wp = partial_strain_maps[[wp_index]]
118
          l = lapply(tmp_wp$nucs, function(tmp_wp_nucs){
119
            tmp_wp_nucs$original_reads
120
          })  
121
          res = llr_score_nvecs(l)
116 122
          return(res)
117 123
        })
118 124

  
119 125
        tmp_dyad_shift = apply(t(wp_indexes), 2, function(wp_index){
120 126
          tmp_wp = partial_strain_maps[[wp_index]]  
121
          ds = max(c(tmp_wp$nucs[[1]]$center, tmp_wp$nucs[[2]]$center, tmp_wp$nucs[[3]]$center)) - min(c(tmp_wp$nucs[[1]]$center, tmp_wp$nucs[[2]]$center, tmp_wp$nucs[[3]]$center))
127
          c = sapply(tmp_wp$nucs, function(tmp_wp_nucs){
128
            tmp_wp_nucs$center
129
          })  
130
          ds = max(c) - min(c)
122 131
          return(ds)
123 132
        })
124 133

  
b/doc/Chuffart_NM2_workdir/src/current/headers.R
5 5
json_conf_file = "src/current/nucleominer_config.json"
6 6
config = fromJSON(paste(readLines(json_conf_file), collapse=""))
7 7

  
8
all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
8
all_samples = get_content(config$CSV_SAMPLE_FILE, "csv", sep=";", head=TRUE, stringsAsFactors=FALSE)
9 9
all_samples = all_samples[!(all_samples$id %in% c(33,45,48,55)), ]
10 10

  
11 11
markers = unique(all_samples$marker)
b/doc/sphinx_doc/Makefile
112 112

  
113 113
text:
114 114
	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
115
	cat build/text/index.txt build/text/readme.txt build/text/tuto.txt ../../README 
115
	cat build/text/index.txt build/text/readme.txt build/text/tuto.txt > ../../README 
116 116
	@echo
117 117
	@echo "Build finished. The text files are in $(BUILDDIR)/text."
118 118

  
b/doc/sphinx_doc/conf.py
50 50
# built documents.
51 51
#
52 52
# The short X.Y version.
53
version = '2.3.46'
53
version = '2.3.47'
54 54
# The full version, including alpha/beta/rc tags.
55
release = '2.3.46'
55
release = '2.3.47'
56 56

  
57 57
# The language for content autogenerated by Sphinx. Refer to documentation
58 58
# for a list of supported languages.
b/doc/sphinx_doc/rref.rst
690 690
::
691 691

  
692 692
    filter_tf_inputs(inputs, chr, x_min, x_max, nuc_width = 160, 
693
        only_f = FALSE, only_r = FALSE, filter_for_coverage = FALSE)
693
        only_f = FALSE, only_r = FALSE, filter_for_coverage = FALSE, 
694
        USE_DPLYR = TRUE)
694 695

  
695 696
Arguments
696 697
~~~~~~~~~
......
727 728

  
728 729
Does it filter for plot coverage?
729 730

  
731
``USE_DPLYR``
732

  
733
Use dplyr lib to filter reads.
734

  
730 735
Value
731 736
~~~~~
732 737

  
......
814 819

  
815 820
::
816 821

  
817
    flat_aggregated_intra_strain_nucs(partial_strain_maps, cur_index)
822
    flat_aggregated_intra_strain_nucs(partial_strain_maps, cur_index, 
823
        nb_tracks = 3)
818 824

  
819 825
Arguments
820 826
~~~~~~~~~
......
827 833

  
828 834
the index of the roi involved
829 835

  
836
``nb_tracks``
837

  
838
the number of replicates
839

  
830 840
Value
831 841
~~~~~
832 842

  
......
1251 1261
+---------------+---------------------------------------------------+
1252 1262
| Author:       | Florent Chuffart                                  |
1253 1263
+---------------+---------------------------------------------------+
1254
| Version:      | 2.3.46                                            |
1264
| Version:      | 2.3.47                                            |
1255 1265
+---------------+---------------------------------------------------+
1256 1266
| License:      | CeCILL                                            |
1257 1267
+---------------+---------------------------------------------------+
1258 1268
| Title:        | nm                                                |
1259 1269
+---------------+---------------------------------------------------+
1260
| Depends:      | seqinr, plotrix, DESeq, cachecache                |
1270
| Depends:      | seqinr, plotrix, DESeq, cachecache, dplyr         |
1261 1271
+---------------+---------------------------------------------------+
1262 1272

  
1263 1273
Author(s)
b/src/DESCRIPTION
1 1
Package: nucleominer
2 2
Maintainer: Florent Chuffart <florent.chuffart@ens-lyon.fr>
3 3
Author: Florent Chuffart
4
Version: 2.3.46
4
Version: 2.3.47
5 5
License: CeCILL 
6 6
Title: nm
7
Depends: seqinr, plotrix, DESeq, cachecache
7
Depends: seqinr, plotrix, DESeq, cachecache, dplyr
8 8
Description: It provides a set of useful functions allowing to perform quantitative analysis of nucleosomal epigenome.
b/src/R/nucleominer.R
153 153
nuc_width = 160, ##<< Nucleosome width.
154 154
only_f = FALSE, ##<< Filter only F reads.
155 155
only_r = FALSE, ##<< Filter only R reads.
156
filter_for_coverage = FALSE ##<< Does it filter for plot coverage?
156
filter_for_coverage = FALSE, ##<< Does it filter for plot coverage?
157
USE_DPLYR = TRUE ##<< Use dplyr lib to filter reads.
157 158
) {
158
	if (only_f) {
159
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
160
	} else if (only_r) {
161
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
162
	} else {
163
		inputs = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
164
	}
159
  n = names(inputs)
160

  
161
  if (!USE_DPLYR) {  
162
    if (only_f) {
163
      inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "F" & inputs[,2] <= x_max + nuc_width,]
164
    } else if (only_r) {
165
      inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,3] == "R" & inputs[,2] <= x_max + nuc_width,]
166
    } else {
167
      inputs_out = inputs[inputs[,1]==chr & inputs[,2] >= x_min - nuc_width & inputs[,2] <= x_max + nuc_width,]
168
    }
169
  } else {
170
    names(inputs) = c("chr", "pos", "str", "lev")
171
    if (only_f) {
172
      inputs_out = filter(inputs, chr == chr,  pos >= x_min - nuc_width, str == "F", pos <= x_max + nuc_width)
173
    } else if (only_r) {
174
      inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, str == "R" & pos <= x_max + nuc_width)
175
    } else {
176
      inputs_out = filter(inputs, chr == chr, pos >= x_min - nuc_width, pos <= x_max + nuc_width)
177
    }
178
      # if (!filter_for_coverage) {
179
      #   inputs$corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
180
      #   inputs = filter(inputs, chr == chr, corrected_inputs_coords >= x_min, corrected_inputs_coords <= x_max)
181
      #   inputs$corrected_inputs_coords = NULL
182
      # }  
183
  }
184

  
165 185
  if (!filter_for_coverage) {
166
    corrected_inputs_coords = inputs[,2] + nuc_width/2 * sign_from_strand(inputs[,3])
167
    inputs = inputs[inputs[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
186
    corrected_inputs_coords = inputs_out[,2] + nuc_width/2 * sign_from_strand(inputs_out[,3])
187
    inputs_out = inputs_out[inputs_out[,1]==chr & corrected_inputs_coords >= x_min & corrected_inputs_coords <= x_max,]
168 188
  }
169
	return(inputs)
189

  
190
  names(inputs_out) = n 
191
	return(inputs_out)
170 192
### Returns filtred inputs.
171 193
}
172 194

  
......
205 227
	  }
206 228
	  return(TRUE)
207 229
	}
208
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,nb_tracks, min_nuc_center, max_nuc_center) {
230
	store_cluster = function(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center) {
209 231
		if ( nb_nucs_in_cluster==nb_tracks & sum(nuc_from_track)==nb_tracks) {
210 232
			new_cluster$wp = TRUE
211 233
			center = (new_cluster$lower_bound + new_cluster$upper_bound) / 2
......
248 270
		}
249 271
		i = i+1
250 272
  }
273
  nb_tracks = length(tf_outs)
251 274
	# print(track_readers)
252 275
  new_cluster = NULL
253 276
  nb_nucs_in_cluster = 0
254 277
  nuc_from_track = c()
255
  for (i in 1:length(tf_outs)){
278
  for (i in 1:nb_tracks){
256 279
    nuc_from_track[i] = FALSE
257 280
  }
258 281
  # Start clustering
......
297 320
    } else {
298 321
			if (!is.null(new_cluster)) {
299 322
        # store old cluster
300
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
323
	      clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
301 324
			}
302 325
      # Reinit current cluster composition stuff
303 326
      nb_nucs_in_cluster = 0
304 327
      nuc_from_track = c()
305
      for (i in 1:length(tf_outs)){
328
      for (i in 1:nb_tracks){
306 329
        nuc_from_track[i] = FALSE
307 330
      }
308 331
      # create new cluster
......
331 354
  # store last cluster
332 355
  if (!is.null(new_cluster)) {
333 356
    # store old cluster
334
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
357
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster, nuc_from_track, nb_tracks, min_nuc_center, max_nuc_center)
335 358
  }
336 359
	return(list(clusters, llr_scores))
337 360
### Returns a list of clusterized nucleosomes, and all computed llr scores.
......
364 387
flat_aggregated_intra_strain_nucs = function(# to flat aggregate_intra_strain_nucs function output
365 388
### This function builds a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
366 389
partial_strain_maps, ##<< the output of aggregate_intra_strain_nucs function
367
cur_index ##<< the index of the roi involved
390
cur_index, ##<< the index of the roi involved
391
nb_tracks=3 ##<< the number of replicates
368 392
) {
369 393
	if  (length(partial_strain_maps) == 0 ){
370 394
		print(paste("Empty partial_strain_maps for roi", cur_index, "ands current strain." ))
......
386 410
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
387 411
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
388 412
			if (tmp_nuc$wp) {
389
				tmp_nuc_as_list[["llr_1"]] = signif(tmp_nuc$nucs[[2]]$llr_score,5)
390
				tmp_nuc_as_list[["llr_2"]] = signif(tmp_nuc$nucs[[3]]$llr_score,5)
413
        for (i in 1:(nb_tracks-1)) {
414
  				tmp_nuc_as_list[[paste("llr", i, sep="_")]] = signif(tmp_nuc$nucs[[i + 1]]$llr_score,5)          
415
        }
391 416
			} else {
392
				tmp_nuc_as_list[["llr_1"]] = NA
393
				tmp_nuc_as_list[["llr_2"]] = NA
417
        for (i in 1:(nb_tracks-1)) {
418
  				tmp_nuc_as_list[[paste("llr", i, sep="_")]] = NA
419
        }
394 420
			}
395 421
      return(tmp_nuc_as_list)
396 422
    })
......
1141 1167
config=NULL, ##<< GLOBAL config variable
1142 1168
big_cur=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1143 1169
) {
1144
	strain1 = roi$strain_ref
1145
	if (strain1 == strain2) {
1146
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
1170
	strain1 = roi$strain_ref  
1171
  # Do something or nothing?
1172
  if (is.null(config$NEUTRAL_TRANSLATE_CUR)) {
1173
    config$NEUTRAL_TRANSLATE_CUR = FALSE
1174
  }
1175
	if (strain1 == strain2 | config$NEUTRAL_TRANSLATE_CUR) {
1176
    roi$strain_ref = strain2
1177
    roi$length = roi$end - roi$begin + sign(roi$end - roi$begin)
1147 1178
		return(roi)
1148 1179
	}
1149

  
1180
  
1150 1181
	# Extract c2c file
1151 1182
	if (!is.null(big_cur)) {
1152 1183
  	# Dealing with big_cur
......
1964 1995
      if (plot_wp_nucs | plot_fuzzy_nucs | plot_common_nucs ) {
1965 1996
    		replicates_wp_nucs[[replicate_rank]] = wp_nucs
1966 1997
        strain = samples[[1]]$strain
1967
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo")
1998
        wp_maps[[strain]] = flat_aggregated_intra_strain_nucs(wp_nucs, "foo", nb_tracks)
1968 1999
        fuzzy_maps[[strain]] = get_intra_strain_fuzzy(wp_maps[[strain]], as.list(samples[[1]]$roi), samples[[1]]$strain, config=config)
1969 2000

  
1970 2001
        if (plot_fuzzy_nucs) {

Formats disponibles : Unified diff