Révision 21b8928f

b/doc/sphinx_doc/build/text/ref.txt
6 6
Python Reference
7 7
================
8 8

  
9
configurator.CSV_SAMPLE_FILE = None
10

  
11
   Path to cvs file that contains sample information.
12

  
13
configurator.BOWTIE_BUILD_BIN = None
14

  
15
   Path for bowtie2 build bin.
16

  
17
configurator.BOWTIE2_BIN = None
18

  
19
   Path for bowtie2 bin.
20

  
21
configurator.SAMTOOLS_BIN = None
22

  
23
   Path for samtools bin.
24

  
25
configurator.BEDTOOLS_BIN = None
26

  
27
   Path for bedtools bin.
28

  
29
configurator.TF_BIN = None
30

  
31
   Path for TemplateFilter bin.
32

  
33
configurator.TF_TEMPLATES_FILE = None
34

  
35
   Path for TemplateFilter templates file.
36

  
37
configurator.ILLUMINA_OUTPUTFILE_PREFIX = None
38

  
39
   Prefix for Illumina fastq output files.
40

  
41
configurator.INDEX_DIR = None
42

  
43
   Path for index dir.
44

  
45
configurator.ALIGN_DIR = None
46

  
47
   Path for align dir.
48

  
49
configurator.LOG_DIR = None
50

  
51
   Path for log dir
52

  
53
configurator.CACHE_DIR = None
54

  
55
   Path for cache dir.
56

  
57
configurator.RESULTS_DIR = None
58

  
59
   Path for results dir
60

  
61
configurator.FASTA_REFERENCE_GENOME_FILES = None
62

  
63
   Dictionary where each fasta reference genomes is indexed by
64
   reference strain that it corresponds.
65

  
66
configurator.AREA_BLACK_LIST = None
67

  
68
   Dictionary where keys are strain and values are black listed of
69
   geneome region.
70

  
71
configurator.FASTA_INDEXES = None
72

  
73
   Dictionary of strain that indexes dictionaries where keys are
74
   chromosome reference from Fastq file and value are its
75
   correspondance for Templatefilter.
76

  
77
configurator.C2C_FILES = None
78

  
79
   Dictionary where each strain combination indexes genome aligment.
80

  
81
configurator.READ_LENGTH = None
82

  
83
   Length of Illumina reads.
84

  
85
configurator.MAPQ_THRES = None
86

  
87
   Aligment quality thresold.
88

  
89
configurator.TF_CORR = None
90

  
91
   TemplateFilter Template correlation threshold.
92

  
93
configurator.TF_MINW = None
94

  
95
   TemplateFilter minimum width of a nucleosome.
96

  
97
configurator.TF_MAXW = None
98

  
99
   TemplateFilter maximum  width of a nucleosome.
100

  
101
configurator.TF_OL = None
102

  
103
   TemplateFilter maximum allowed overlap for two nucleosomes.
104

  
105
wf.json_conf_file = 'src/nucleo_miner/nucleo_miner_config.json'
106

  
107
   Path to the json configuration file.
108

  
109
wf.samples = []
110

  
111
   List of samples where a sample is identify by an id (key: *id*) and
112
   a strain name (key *strain*).
113

  
114
wf.samples_mnase = []
115

  
116
   List of Mnase samples.
117

  
118
wf.strains = []
119

  
120
   List of reference strains.
121

  
122
libcoverage.create_bowtie_index(strain, strain_fasta_ref, index_dir, bowtie_build_bin)
123

  
124
   Creates bowtie index for a strain *strain*.
125

  
126
   Parameters:
127
      * **strain** -- the strain reference.
128

  
129
      * **strain_fasta_ref** -- fasta reference genome.
130

  
131
      * **index_dir** -- directories where to put bowtie index.
132

  
133
      * **bowtie_build_bin** -- bowtie2 build binary.
134

  
135
libcoverage.align_reads(sample, align_dir, log_dir, index_dir, illumina_outputfile_prefix, bowtie2_bin, samtools_bin, bedtools_bin)
136

  
137
   Aligns reads to reference genomes. It produces .sam files, that are
138
   converted to .bam, that are converted to .bed.
139

  
140
   Parameters:
141
      * **sample** -- a dict that describe a sample.
142

  
143
      * **align_dir** -- directory where aligned reads will be
144
        stored.
145

  
146
      * **log_dir** -- directory where logs will be stored.
147

  
148
      * **illumina_outputfile_prefix** -- prefix of Illumina
149
        sequencer fastq.gz output files.
150

  
151
      * **bowtie2_bin** -- bowtie2 binary.
152

  
153
      * **samtools_bin** -- samtools binary.
154

  
155
      * **bedtools_bin** -- bedtools binary.
156

  
157
      * **index_dir** -- bowtie index directory.
158

  
159
libcoverage.split_fr_4_TF(sample, align_dir, fasta_indexes, area_black_list, read_length, mapq_thres)
160

  
161
   Create TempleFilter input files form bed files. This function
162
   appends in two times. First, it collects reads from bed files and
163
   feeds a datastructure
164

  
165
   Parameters:
166
      * **sample** -- a dict that describe a sample.
167

  
168
      * **align_dir** -- directory where aligned reads will be
169
        stored.
170

  
171
      * **fasta_index** -- the chr reference from the illumina
172
        output file.
173

  
174
      * **area_black_list** -- the description of genome that will
175
        be omit.
176

  
177
      * **read_length** -- Length of Illumina reads.
178

  
179
      * **mapq_thres** -- mapping quality criterion threshold, see
180
        MAPQ in BED/BAM file format.
181

  
182
libcoverage.template_filter(sample, align_dir, log_dir, tf_bin, tf_templates_file, corr, minw, maxw, ol)
183

  
184
   Run TemplateFilter on a specifi sample. It produces .tab file.
185

  
186
   Parameters:
187
      * **sample** -- a dict that describe a sample.
188

  
189
      * **align_dir** -- directory where aligned reads will be
190
        stored.
191

  
192
      * **log_dir** -- directory where logs will be stored.
193

  
194
      * **tf_bin** -- path to the TemplateFilter binary.
195

  
196
      * **tf_templates_file** -- path to the TemplateFilter
197
        templates file.
198

  
199
      * **corr** -- correlation threshold transmits to
200
        TemplateFilter.
201

  
202
      * **minw** -- minimum width of a nuc, transmits to
203
        TemplateFilter.
204

  
205
      * **maxw** -- maximum width of a nuc, transmits to
206
        TemplateFilter.
207

  
208
      * **ol** -- maximum overlaps for 2 nuc, transmits to
209
        TemplateFilter.
210

  
211 9

  
212 10
R Reference
213 11
===========
......
333 131
Usage
334 132
~~~~~
335 133

  
336
   aggregate_intra_strain_nucs(samples, lod_thres = -20, coord_max = 2e+07)
134
   aggregate_intra_strain_nucs(samples, lod_thres = 20, coord_max = 2e+07)
337 135

  
338 136

  
339 137
Arguments
......
412 210
~~~~~
413 211

  
414 212
   align_inter_strain_nucs(replicates, wp_nucs_strain_ref1 = NULL,
415
       wp_nucs_strain_ref2 = NULL, corr_thres = 0.5, lod_thres = -100,
213
       wp_nucs_strain_ref2 = NULL, corr_thres = 0.5, lod_thres = 100,
416 214
       config = NULL, ...)
417 215

  
418 216

  
......
632 430
   #       plot_common_nucs = FALSE,
633 431
   #       height = 50)
634 432

  
433
R: reformat an "apply manipulated" list of regions
434

  
435

  
436
reformat an "apply manipulated" list of regions
437
-----------------------------------------------
438

  
439

  
440
Description
441
~~~~~~~~~~~
442

  
443
Utils to reformat an "apply manipulated" list of regions
444

  
445

  
446
Usage
447
~~~~~
448

  
449
   collapse_regions(regions)
450

  
451

  
452
Arguments
453
~~~~~~~~~
454

  
455
+-----------------+------+
456
+-----------------+------+
457

  
458

  
459
Author(s)
460
~~~~~~~~~
461

  
462
Florent Chuffart
463

  
635 464
R: Compute Common Uninterrupted Regions (CUR)
636 465

  
637 466

  
......
904 733
~~~~~
905 734

  
906 735
   filter_tf_inputs(inputs, chr, x_min, x_max, nuc_width = 160,
907
       only_f = FALSE, only_r = FALSE)
736
       only_f = FALSE, only_r = FALSE, filter_for_coverage = FALSE)
908 737

  
909 738

  
910 739
Arguments
......
938 767

  
939 768
Filter only R reads.
940 769

  
770
"filter_for_coverage"
771

  
772
Does it filter for plot coverage?
773

  
941 774

  
942 775
Value
943 776
~~~~~
......
1015 848

  
1016 849
Florent Chuffart
1017 850

  
851
R: to flat aggregate_intra_strain_nucs function output
852

  
853

  
854
to flat aggregate_intra_strain_nucs function output
855
---------------------------------------------------
856

  
857

  
858
Description
859
~~~~~~~~~~~
860

  
861
This function builds a dataframe of all clusters obtain from
862
aggregate_intra_strain_nucs function.
863

  
864

  
865
Usage
866
~~~~~
867

  
868
   flat_aggregated_intra_strain_nucs(partial_strain_maps, roi_index)
869

  
870

  
871
Arguments
872
~~~~~~~~~
873

  
874
"partial_strain_maps"
875

  
876
the output of aggregate_intra_strain_nucs function
877

  
878
"roi_index"
879

  
880
the index of the roi involved
881

  
882

  
883
Value
884
~~~~~
885

  
886
Returns a dataframe of all clusters obtain from
887
aggregate_intra_strain_nucs function.
888

  
889

  
890
Author(s)
891
~~~~~~~~~
892

  
893
Florent Chuffart
894

  
1018 895
R: flat reads
1019 896

  
1020 897

  
......
1073 950
Usage
1074 951
~~~~~
1075 952

  
1076
   get_all_reads(marker, combi, form = "wp")
953
   get_all_reads(marker, combi, form = "wp", config = NULL)
1077 954

  
1078 955

  
1079 956
Arguments
......
1091 968

  
1092 969
The nuc form to considere.
1093 970

  
971
"config"
972

  
973
GLOBAL config variable
974

  
1094 975

  
1095 976
Author(s)
1096 977
~~~~~~~~~
......
1247 1128
Usage
1248 1129
~~~~~
1249 1130

  
1250
   get_sneps(marker, combi, form, all_samples)
1131
   get_sneps(marker, combi, form, all_samples, config = NULL)
1251 1132

  
1252 1133

  
1253 1134
Arguments
......
1269 1150

  
1270 1151
Global list of samples.
1271 1152

  
1153
"config"
1154

  
1155
GLOBAL config variable
1156

  
1272 1157

  
1273 1158
Author(s)
1274 1159
~~~~~~~~~
......
1369 1254
+-----------------+-----------------------------------------------------+
1370 1255
| Author:         | Florent Chuffart                                    |
1371 1256
+-----------------+-----------------------------------------------------+
1372
| Version:        | 2.3.3                                               |
1257
| Version:        | 2.3.28                                              |
1373 1258
+-----------------+-----------------------------------------------------+
1374 1259
| License:        | CeCILL                                              |
1375 1260
+-----------------+-----------------------------------------------------+
......
1823 1708
       plot_arrow_raw_reads = TRUE, plot_arrow_nuc_reads = TRUE,
1824 1709
       plot_squared_reads = TRUE, plot_coverage = FALSE, plot_gaussian_reads = TRUE,
1825 1710
       plot_gaussian_unified_reads = TRUE, plot_ellipse_nucs = TRUE,
1826
       plot_wp_nucs = TRUE, plot_wp_nuc_model = TRUE, plot_common_nucs = TRUE,
1827
       plot_anovas = FALSE, plot_anova_boxes = FALSE, plot_wp_nucs_4_nonmnase = FALSE,
1828
       aggregated_intra_strain_nucs = NULL, aligned_inter_strain_nucs = NULL,
1829
       height = 10, config = NULL)
1711
       change_col = TRUE, plot_wp_nucs = TRUE, plot_wp_nuc_model = TRUE,
1712
       plot_common_nucs = TRUE, plot_anovas = FALSE, plot_anova_boxes = FALSE,
1713
       plot_wp_nucs_4_nonmnase = FALSE, plot_chain = FALSE, aggregated_intra_strain_nucs = NULL,
1714
       aligned_inter_strain_nucs = NULL, height = 10, config = NULL)
1830 1715

  
1831 1716

  
1832 1717
Arguments
......
1872 1757

  
1873 1758
Plot (or not) ellipse for a nuc.
1874 1759

  
1760
"change_col"
1761

  
1762
Change the color of each nucleosome.
1763

  
1875 1764
"plot_wp_nucs"
1876 1765

  
1877 1766
Plot (or not) cluster of nucs
......
1896 1785

  
1897 1786
Plot (or not) clusters for non inputs samples.
1898 1787

  
1788
"plot_chain"
1789

  
1790
Plot (or not) clusterised nuceosomes between mnase samples.
1791

  
1899 1792
"aggregated_intra_strain_nucs"
1900 1793

  
1901 1794
list of aggregated intra strain nucs. If NULL, it will be computed.
b/doc/sphinx_doc/build/text/tuto.txt
44 44
the 53 samples is indentify by a uniq identifier. The file
45 45
*CSV_SAMPLE_FILE* sums up this information.
46 46

  
47
configurator.CSV_SAMPLE_FILE = None
48

  
49
   Path to cvs file that contains sample information.
50

  
51 47
We use a convention to link sample and Illumina fastq outputs.
52 48
Illumina output files of the sample *ID* will be stored in the
53 49
directory *ILLUMINA_OUTPUTFILE_PREFIX* + *ID*. For example, sample 41
54 50
outputs will be stored in the directory
55 51
*data/2012-09-05/FASTQ/Sample_Yvert_Bq41/*.
56 52

  
57
configurator.ILLUMINA_OUTPUTFILE_PREFIX = None
58

  
59
   Prefix for Illumina fastq output files.
60

  
61 53
For BY (resp. RM and YJM) we use following reference genome
62 54
*saccharomyces_cerevisiae_BY_S288c_chromosomes.fasta* (resp.
63 55
*saccharomyces_cerevisiae_rm11-1a_1_supercontigs.fasta* and
64 56
*saccharomyces_cerevisiae_YJM_789_screencontig.fasta*). The index
65 57
*FASTA_REFERENCE_GENOME_FILES* stores this information.
66 58

  
67
configurator.FASTA_REFERENCE_GENOME_FILES = None
68

  
69
   Dictionary where each fasta reference genomes is indexed by
70
   reference strain that it corresponds.
71

  
72 59
Each chromosome/contig is identify in the fasta file by an obscure
73 60
identifier. For example, BY chromosome I is identify by
74 61
*gi|144228165|ref|NC_001133.7|* when TemplateFilter is waiting for an
75 62
integer. So, we translate it. The index *FASTA_INDEXES* stores this
76 63
translation.
77 64

  
78
configurator.FASTA_INDEXES = None
79

  
80
   Dictionary of strain that indexes dictionaries where keys are
81
   chromosome reference from Fastq file and value are its
82
   correspondance for Templatefilter.
83

  
84 65
From a pragamatical point of view we discard some part of the genome
85 66
(repeated sequence etc...). The list of the black listed area is
86 67
explicitely detailled in *AREA_BLACK_LIST*.
87 68

  
88
configurator.AREA_BLACK_LIST = None
89

  
90
   Dictionary where keys are strain and values are black listed of
91
   geneome region.
92

  
93 69
For BY-RM (resp. BY-YJM and RM-YJM) genome sequence alignment we use
94 70
previously compute .c2c file
95 71
*data/2012-03_primarydata/BY_RM_gxcomp.c2c* (resp.
......
98 74
*NucleoMiner*, the old version of *NucleoMiner2* (http://www.ens-
99 75
lyon.fr/LBMC/gisv/NucleoMiner_Manual/manual.pdf).
100 76

  
101
configurator.C2C_FILES = None
102

  
103
   Dictionary where each strain combination indexes genome aligment.
104

  
105 77
*nucleominer* uses specific directory to work in, these are described
106 78
in *INDEX_DIR*, *ALIGN_DIR* and *LOG_DIR*.
107 79

  
......
112 84
All paths, prefixes and indexes could be change in the
113 85
*src/current/nucleominer_config.json* file.
114 86

  
115
wf.json_conf_file = 'src/nucleo_miner/nucleo_miner_config.json'
116

  
117
   Path to the json configuration file.
118

  
119 87

  
120 88
Preprocessing Illumina Fastq Reads for Each Sample
121 89
==================================================
......
125 93
*samples* *samples_mnase* and *strains* that will be used along the 4
126 94
steps.
127 95

  
128
wf.samples = []
129

  
130
   List of samples where a sample is identify by an id (key: *id*) and
131
   a strain name (key *strain*).
132

  
133
wf.samples_mnase = []
134

  
135
   List of Mnase samples.
136

  
137
wf.strains = []
138

  
139
   List of reference strains.
140

  
141 96

  
142 97
Creating Bowtie Index from each Reference Genome
143 98
------------------------------------------------
......
147 102
will be used by bowtie to align reads. This step is performed by the
148 103
following part of the *wf.py* script:
149 104

  
150
     for strain in strains:
151
       per_strain_stats[strain] = create_bowtie_index(strain, 
152
         config["FASTA_REFERENCE_GENOME_FILES"][strain], config["INDEX_DIR"], 
153
         config["BOWTIE_BUILD_BIN"])
154

  
155 105
The following table sum up involved file sizes and process durations
156 106
concerning this step.
157 107

  
......
175 125
*subprocess* class. This step is performed by the followinw part of
176 126
the *wf.py* script:
177 127

  
178
     for sample in samples:
179
       per_sample_align_stats["sample_%s" % sample["id"]] = align_reads(sample, 
180
         config["ALIGN_DIR"], config["LOG_DIR"], config["INDEX_DIR"], 
181
         config["ILLUMINA_OUTPUTFILE_PREFIX"], config["BOWTIE2_BIN"], 
182
         config["SAMTOOLS_BIN"], config["BEDTOOLS_BIN"])
183

  
184 128

  
185 129
Convert Aligned Reads for TemplateFilter
186 130
----------------------------------------
......
206 150

  
207 151
This step is performed by the followinw part of the *wf.py* script:
208 152

  
209
     for sample in samples:
210
       per_sample_convert_stats["sample_%s" % sample["id"]] = split_fr_4_TF(sample, 
211
         config["ALIGN_DIR"], config["FASTA_INDEXES"], config["AREA_BLACK_LIST"], 
212
         config["READ_LENGTH"],config["MAPQ_THRES"])
213

  
214 153
The following table sum up number of reads, involved file sizes and
215 154
process durations concerning the two last steps. In our case, aligment
216 155
process have been multuthreaded over over 3 cores.
......
346 285

  
347 286
This step is performed by the followinw part of the *wf.py* script:
348 287

  
349
     for sample in samples_mnase:
350
       per_mnase_sample_stats["sample_%s" % sample["id"]] = template_filter(sample, 
351
         config["ALIGN_DIR"], config["LOG_DIR"], config["TF_BIN"], 
352
         config["TF_TEMPLATES_FILE"], config["TF_CORR"], config["TF_MINW"], 
353
         config["TF_MAXW"], config["TF_OL"])  
354

  
355 288
+----+--------+------------+---------------+------------------+
356 289
| id | strain | found nucs | nuc file size | process duration |
357 290
+====+========+============+===============+==================+
b/doc/sphinx_doc/conf.py
50 50
# built documents.
51 51
#
52 52
# The short X.Y version.
53
version = '2.3.27'
53
version = '2.3.28'
54 54
# The full version, including alpha/beta/rc tags.
55
release = '2.3.27'
55
release = '2.3.28'
56 56

  
57 57
# The language for content autogenerated by Sphinx. Refer to documentation
58 58
# for a list of supported languages.
b/doc/sphinx_doc/rref.rst
1166 1166
+---------------+---------------------------------------------------+
1167 1167
| Author:       | Florent Chuffart                                  |
1168 1168
+---------------+---------------------------------------------------+
1169
| Version:      | 2.3.27                                            |
1169
| Version:      | 2.3.28                                            |
1170 1170
+---------------+---------------------------------------------------+
1171 1171
| License:      | CeCILL                                            |
1172 1172
+---------------+---------------------------------------------------+
b/src/DESCRIPTION
1 1
Package: nucleominer
2 2
Maintainer: Florent Chuffart <florent.chuffart@ens-lyon.fr>
3 3
Author: Florent Chuffart
4
Version: 2.3.27
4
Version: 2.3.28
5 5
License: CeCILL 
6 6
Title: nm
7 7
Depends: seqinr, plotrix, DESeq, cachecache
b/src/NAMESPACE
1
export(FDR, lod_score_vecs, dfadd, filter_tf_inputs, filter_tf_outputs, sign_from_strand, flat_reads, get_comp_strand, aggregate_intra_strain_nucs, align_inter_strain_nucs, translate_roi, fetch_mnase_replicates, substract_region, union_regions, remove_aligned_wp, translate_regions, extract_wp, crop_fuzzy, get_fuzzy, get_all_reads, get_design, plot_dist_samples, analyse_design, get_sneps, perform_anovas, watch_samples, compute_inter_all_strain_curs, switch_pairlist, build_replicates, ARAB2ROM, ROM2ARAB)
1
export(flat_aggregated_intra_strain_nucs, FDR, lod_score_vecs, dfadd, filter_tf_inputs, filter_tf_outputs, sign_from_strand, flat_reads, get_comp_strand, aggregate_intra_strain_nucs, align_inter_strain_nucs, translate_roi, fetch_mnase_replicates, substract_region, union_regions, remove_aligned_wp, translate_regions, extract_wp, crop_fuzzy, get_fuzzy, get_all_reads, get_design, plot_dist_samples, analyse_design, get_sneps, perform_anovas, watch_samples, compute_inter_all_strain_curs, switch_pairlist, build_replicates, ARAB2ROM, ROM2ARAB)
b/src/R/nucleominer.R
479 479
    })
480 480
    tmp_strain_maps = do.call("rbind", tmp_strain_map)
481 481
	}
482
  return(data.frame(tmp_strain_maps))
482
  return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
483 483
### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
484 484
}
485 485

  
......
801 801
    }
802 802
  })
803 803
  }
804
  non_inter_fuzzy = rec_substract_region(region1, region2)
804
  non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
805 805
  if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
806 806
  tmp_ulist = unlist(non_inter_fuzzy)
807 807
  tmp_names = names(tmp_ulist)[1:4]

Formats disponibles : Unified diff