/ - Diff - NucleoMiner - Forge du Centre Blaise Pascal

Révision 21b8928f

     Python Reference
     ================
     configurator.CSV_SAMPLE_FILE = None
        Path to cvs file that contains sample information.
     configurator.BOWTIE_BUILD_BIN = None
        Path for bowtie2 build bin.
     configurator.BOWTIE2_BIN = None
        Path for bowtie2 bin.
     configurator.SAMTOOLS_BIN = None
        Path for samtools bin.
     configurator.BEDTOOLS_BIN = None
        Path for bedtools bin.
     configurator.TF_BIN = None
        Path for TemplateFilter bin.
     configurator.TF_TEMPLATES_FILE = None
        Path for TemplateFilter templates file.
     configurator.ILLUMINA_OUTPUTFILE_PREFIX = None
        Prefix for Illumina fastq output files.
     configurator.INDEX_DIR = None
        Path for index dir.
     configurator.ALIGN_DIR = None
        Path for align dir.
     configurator.LOG_DIR = None
        Path for log dir
     configurator.CACHE_DIR = None
        Path for cache dir.
     configurator.RESULTS_DIR = None
        Path for results dir
     configurator.FASTA_REFERENCE_GENOME_FILES = None
        Dictionary where each fasta reference genomes is indexed by
        reference strain that it corresponds.
     configurator.AREA_BLACK_LIST = None
        Dictionary where keys are strain and values are black listed of
        geneome region.
     configurator.FASTA_INDEXES = None
        Dictionary of strain that indexes dictionaries where keys are
        chromosome reference from Fastq file and value are its
        correspondance for Templatefilter.
     configurator.C2C_FILES = None
        Dictionary where each strain combination indexes genome aligment.
     configurator.READ_LENGTH = None
        Length of Illumina reads.
     configurator.MAPQ_THRES = None
        Aligment quality thresold.
     configurator.TF_CORR = None
        TemplateFilter Template correlation threshold.
     configurator.TF_MINW = None
        TemplateFilter minimum width of a nucleosome.
     configurator.TF_MAXW = None
        TemplateFilter maximum  width of a nucleosome.
     configurator.TF_OL = None
        TemplateFilter maximum allowed overlap for two nucleosomes.
     wf.json_conf_file = 'src/nucleo_miner/nucleo_miner_config.json'
        Path to the json configuration file.
     wf.samples = []
        List of samples where a sample is identify by an id (key: *id*) and
        a strain name (key *strain*).
     wf.samples_mnase = []
        List of Mnase samples.
     wf.strains = []
        List of reference strains.
     libcoverage.create_bowtie_index(strain, strain_fasta_ref, index_dir, bowtie_build_bin)
        Creates bowtie index for a strain *strain*.
        Parameters:
           * **strain** -- the strain reference.
           * **strain_fasta_ref** -- fasta reference genome.
           * **index_dir** -- directories where to put bowtie index.
           * **bowtie_build_bin** -- bowtie2 build binary.
     libcoverage.align_reads(sample, align_dir, log_dir, index_dir, illumina_outputfile_prefix, bowtie2_bin, samtools_bin, bedtools_bin)
        Aligns reads to reference genomes. It produces .sam files, that are
        converted to .bam, that are converted to .bed.
        Parameters:
           * **sample** -- a dict that describe a sample.
           * **align_dir** -- directory where aligned reads will be
             stored.
           * **log_dir** -- directory where logs will be stored.
           * **illumina_outputfile_prefix** -- prefix of Illumina
             sequencer fastq.gz output files.
           * **bowtie2_bin** -- bowtie2 binary.
           * **samtools_bin** -- samtools binary.
           * **bedtools_bin** -- bedtools binary.
           * **index_dir** -- bowtie index directory.
     libcoverage.split_fr_4_TF(sample, align_dir, fasta_indexes, area_black_list, read_length, mapq_thres)
        Create TempleFilter input files form bed files. This function
        appends in two times. First, it collects reads from bed files and
        feeds a datastructure
        Parameters:
           * **sample** -- a dict that describe a sample.
           * **align_dir** -- directory where aligned reads will be
             stored.
           * **fasta_index** -- the chr reference from the illumina
             output file.
           * **area_black_list** -- the description of genome that will
             be omit.
           * **read_length** -- Length of Illumina reads.
           * **mapq_thres** -- mapping quality criterion threshold, see
             MAPQ in BED/BAM file format.
     libcoverage.template_filter(sample, align_dir, log_dir, tf_bin, tf_templates_file, corr, minw, maxw, ol)
        Run TemplateFilter on a specifi sample. It produces .tab file.
        Parameters:
           * **sample** -- a dict that describe a sample.
           * **align_dir** -- directory where aligned reads will be
             stored.
           * **log_dir** -- directory where logs will be stored.
           * **tf_bin** -- path to the TemplateFilter binary.
           * **tf_templates_file** -- path to the TemplateFilter
             templates file.
           * **corr** -- correlation threshold transmits to
             TemplateFilter.
           * **minw** -- minimum width of a nuc, transmits to
             TemplateFilter.
           * **maxw** -- maximum width of a nuc, transmits to
             TemplateFilter.
           * **ol** -- maximum overlaps for 2 nuc, transmits to
             TemplateFilter.
     R Reference
     ===========
-...
     Usage
     ~~~~~
        aggregate_intra_strain_nucs(samples, lod_thres = -20, coord_max = 2e+07)
        aggregate_intra_strain_nucs(samples, lod_thres = 20, coord_max = 2e+07)
     Arguments
-...
     ~~~~~
        align_inter_strain_nucs(replicates, wp_nucs_strain_ref1 = NULL,
            wp_nucs_strain_ref2 = NULL, corr_thres = 0.5, lod_thres = -100,
            wp_nucs_strain_ref2 = NULL, corr_thres = 0.5, lod_thres = 100,
            config = NULL, ...)
-...
        #       plot_common_nucs = FALSE,
        #       height = 50)
     R: reformat an "apply manipulated" list of regions
     reformat an "apply manipulated" list of regions
     -----------------------------------------------
     Description
     ~~~~~~~~~~~
     Utils to reformat an "apply manipulated" list of regions
     Usage
     ~~~~~
        collapse_regions(regions)
     Arguments
     ~~~~~~~~~
     +-----------------+------+
     +-----------------+------+
     Author(s)
     ~~~~~~~~~
     Florent Chuffart
     R: Compute Common Uninterrupted Regions (CUR)
-...
     ~~~~~
        filter_tf_inputs(inputs, chr, x_min, x_max, nuc_width = 160,
            only_f = FALSE, only_r = FALSE)
            only_f = FALSE, only_r = FALSE, filter_for_coverage = FALSE)
     Arguments
-...
     Filter only R reads.
     "filter_for_coverage"
     Does it filter for plot coverage?
     Value
     ~~~~~
-...
     Florent Chuffart
     R: to flat aggregate_intra_strain_nucs function output
     to flat aggregate_intra_strain_nucs function output
     ---------------------------------------------------
     Description
     ~~~~~~~~~~~
     This function builds a dataframe of all clusters obtain from
     aggregate_intra_strain_nucs function.
     Usage
     ~~~~~
        flat_aggregated_intra_strain_nucs(partial_strain_maps, roi_index)
     Arguments
     ~~~~~~~~~
     "partial_strain_maps"
     the output of aggregate_intra_strain_nucs function
     "roi_index"
     the index of the roi involved
     Value
     ~~~~~
     Returns a dataframe of all clusters obtain from
     aggregate_intra_strain_nucs function.
     Author(s)
     ~~~~~~~~~
     Florent Chuffart
     R: flat reads
-...
     Usage
     ~~~~~
        get_all_reads(marker, combi, form = "wp")
        get_all_reads(marker, combi, form = "wp", config = NULL)
     Arguments
-...
     The nuc form to considere.
     "config"
     GLOBAL config variable
     Author(s)
     ~~~~~~~~~
-...
     Usage
     ~~~~~
        get_sneps(marker, combi, form, all_samples)
        get_sneps(marker, combi, form, all_samples, config = NULL)
     Arguments
-...
     Global list of samples.
     "config"
     GLOBAL config variable
     Author(s)
     ~~~~~~~~~
-...
     +-----------------+-----------------------------------------------------+
     | Author:         | Florent Chuffart                                    |
     +-----------------+-----------------------------------------------------+
     | Version:        | 2.3.3                                               |
     | Version:        | 2.3.28                                              |
     +-----------------+-----------------------------------------------------+
     | License:        | CeCILL                                              |
     +-----------------+-----------------------------------------------------+
-...
            plot_arrow_raw_reads = TRUE, plot_arrow_nuc_reads = TRUE,
            plot_squared_reads = TRUE, plot_coverage = FALSE, plot_gaussian_reads = TRUE,
            plot_gaussian_unified_reads = TRUE, plot_ellipse_nucs = TRUE,
            plot_wp_nucs = TRUE, plot_wp_nuc_model = TRUE, plot_common_nucs = TRUE,
            plot_anovas = FALSE, plot_anova_boxes = FALSE, plot_wp_nucs_4_nonmnase = FALSE,
            aggregated_intra_strain_nucs = NULL, aligned_inter_strain_nucs = NULL,
            height = 10, config = NULL)
            change_col = TRUE, plot_wp_nucs = TRUE, plot_wp_nuc_model = TRUE,
            plot_common_nucs = TRUE, plot_anovas = FALSE, plot_anova_boxes = FALSE,
            plot_wp_nucs_4_nonmnase = FALSE, plot_chain = FALSE, aggregated_intra_strain_nucs = NULL,
            aligned_inter_strain_nucs = NULL, height = 10, config = NULL)
     Arguments
-...
     Plot (or not) ellipse for a nuc.
     "change_col"
     Change the color of each nucleosome.
     "plot_wp_nucs"
     Plot (or not) cluster of nucs
-...
     Plot (or not) clusters for non inputs samples.
     "plot_chain"
     Plot (or not) clusterised nuceosomes between mnase samples.
     "aggregated_intra_strain_nucs"
     list of aggregated intra strain nucs. If NULL, it will be computed.

     the 53 samples is indentify by a uniq identifier. The file
     *CSV_SAMPLE_FILE* sums up this information.
     configurator.CSV_SAMPLE_FILE = None
        Path to cvs file that contains sample information.
     We use a convention to link sample and Illumina fastq outputs.
     Illumina output files of the sample *ID* will be stored in the
     directory *ILLUMINA_OUTPUTFILE_PREFIX* + *ID*. For example, sample 41
     outputs will be stored in the directory
     *data/2012-09-05/FASTQ/Sample_Yvert_Bq41/*.
     configurator.ILLUMINA_OUTPUTFILE_PREFIX = None
        Prefix for Illumina fastq output files.
     For BY (resp. RM and YJM) we use following reference genome
     *saccharomyces_cerevisiae_BY_S288c_chromosomes.fasta* (resp.
     *saccharomyces_cerevisiae_rm11-1a_1_supercontigs.fasta* and
     *saccharomyces_cerevisiae_YJM_789_screencontig.fasta*). The index
     *FASTA_REFERENCE_GENOME_FILES* stores this information.
     configurator.FASTA_REFERENCE_GENOME_FILES = None
        Dictionary where each fasta reference genomes is indexed by
        reference strain that it corresponds.
     Each chromosome/contig is identify in the fasta file by an obscure
     identifier. For example, BY chromosome I is identify by
     *gi|144228165|ref|NC_001133.7|* when TemplateFilter is waiting for an
     integer. So, we translate it. The index *FASTA_INDEXES* stores this
     translation.
     configurator.FASTA_INDEXES = None
        Dictionary of strain that indexes dictionaries where keys are
        chromosome reference from Fastq file and value are its
        correspondance for Templatefilter.
     From a pragamatical point of view we discard some part of the genome
     (repeated sequence etc...). The list of the black listed area is
     explicitely detailled in *AREA_BLACK_LIST*.
     configurator.AREA_BLACK_LIST = None
        Dictionary where keys are strain and values are black listed of
        geneome region.
     For BY-RM (resp. BY-YJM and RM-YJM) genome sequence alignment we use
     previously compute .c2c file
     *data/2012-03_primarydata/BY_RM_gxcomp.c2c* (resp.
-...
     *NucleoMiner*, the old version of *NucleoMiner2* (http://www.ens-
     lyon.fr/LBMC/gisv/NucleoMiner_Manual/manual.pdf).
     configurator.C2C_FILES = None
        Dictionary where each strain combination indexes genome aligment.
     *nucleominer* uses specific directory to work in, these are described
     in *INDEX_DIR*, *ALIGN_DIR* and *LOG_DIR*.
-...
     All paths, prefixes and indexes could be change in the
     *src/current/nucleominer_config.json* file.
     wf.json_conf_file = 'src/nucleo_miner/nucleo_miner_config.json'
        Path to the json configuration file.
     Preprocessing Illumina Fastq Reads for Each Sample
     ==================================================
-...
     *samples* *samples_mnase* and *strains* that will be used along the 4
     steps.
     wf.samples = []
        List of samples where a sample is identify by an id (key: *id*) and
        a strain name (key *strain*).
     wf.samples_mnase = []
        List of Mnase samples.
     wf.strains = []
        List of reference strains.
     Creating Bowtie Index from each Reference Genome
     ------------------------------------------------
-...
     will be used by bowtie to align reads. This step is performed by the
     following part of the *wf.py* script:
          for strain in strains:
            per_strain_stats[strain] = create_bowtie_index(strain,
              config["FASTA_REFERENCE_GENOME_FILES"][strain], config["INDEX_DIR"],
              config["BOWTIE_BUILD_BIN"])
     The following table sum up involved file sizes and process durations
     concerning this step.
-...
     *subprocess* class. This step is performed by the followinw part of
     the *wf.py* script:
          for sample in samples:
            per_sample_align_stats["sample_%s" % sample["id"]] = align_reads(sample,
              config["ALIGN_DIR"], config["LOG_DIR"], config["INDEX_DIR"],
              config["ILLUMINA_OUTPUTFILE_PREFIX"], config["BOWTIE2_BIN"],
              config["SAMTOOLS_BIN"], config["BEDTOOLS_BIN"])
     Convert Aligned Reads for TemplateFilter
     ----------------------------------------
-...
     This step is performed by the followinw part of the *wf.py* script:
          for sample in samples:
            per_sample_convert_stats["sample_%s" % sample["id"]] = split_fr_4_TF(sample,
              config["ALIGN_DIR"], config["FASTA_INDEXES"], config["AREA_BLACK_LIST"],
              config["READ_LENGTH"],config["MAPQ_THRES"])
     The following table sum up number of reads, involved file sizes and
     process durations concerning the two last steps. In our case, aligment
     process have been multuthreaded over over 3 cores.
-...
     This step is performed by the followinw part of the *wf.py* script:
          for sample in samples_mnase:
            per_mnase_sample_stats["sample_%s" % sample["id"]] = template_filter(sample,
              config["ALIGN_DIR"], config["LOG_DIR"], config["TF_BIN"],
              config["TF_TEMPLATES_FILE"], config["TF_CORR"], config["TF_MINW"],
              config["TF_MAXW"], config["TF_OL"])
     +----+--------+------------+---------------+------------------+
     | id | strain | found nucs | nuc file size | process duration |
     +====+========+============+===============+==================+

     # built documents.
+    #
     # The short X.Y version.
     version = '2.3.27'
     version = '2.3.28'
     # The full version, including alpha/beta/rc tags.
     release = '2.3.27'
     release = '2.3.28'
     # The language for content autogenerated by Sphinx. Refer to documentation
     # for a list of supported languages.

     +---------------+---------------------------------------------------+
     | Author:       | Florent Chuffart                                  |
     +---------------+---------------------------------------------------+
     | Version:      | 2.3.27                                            |
     | Version:      | 2.3.28                                            |
     +---------------+---------------------------------------------------+
     | License:      | CeCILL                                            |
     +---------------+---------------------------------------------------+

     Package: nucleominer
     Maintainer: Florent Chuffart <florent.chuffart@ens-lyon.fr>
     Author: Florent Chuffart
     Version: 2.3.27
     Version: 2.3.28
     License: CeCILL
     Title: nm
     Depends: seqinr, plotrix, DESeq, cachecache

b/src/NAMESPACE

export(FDR, lod_score_vecs, dfadd, filter_tf_inputs, filter_tf_outputs, sign_from_strand, flat_reads, get_comp_strand, aggregate_intra_strain_nucs, align_inter_strain_nucs, translate_roi, fetch_mnase_replicates, substract_region, union_regions, remove_aligned_wp, translate_regions, extract_wp, crop_fuzzy, get_fuzzy, get_all_reads, get_design, plot_dist_samples, analyse_design, get_sneps, perform_anovas, watch_samples, compute_inter_all_strain_curs, switch_pairlist, build_replicates, ARAB2ROM, ROM2ARAB)

export(flat_aggregated_intra_strain_nucs, FDR, lod_score_vecs, dfadd, filter_tf_inputs, filter_tf_outputs, sign_from_strand, flat_reads, get_comp_strand, aggregate_intra_strain_nucs, align_inter_strain_nucs, translate_roi, fetch_mnase_replicates, substract_region, union_regions, remove_aligned_wp, translate_regions, extract_wp, crop_fuzzy, get_fuzzy, get_all_reads, get_design, plot_dist_samples, analyse_design, get_sneps, perform_anovas, watch_samples, compute_inter_all_strain_curs, switch_pairlist, build_replicates, ARAB2ROM, ROM2ARAB)

b/src/R/nucleominer.R
479	479	})
480	480	tmp_strain_maps = do.call("rbind", tmp_strain_map)
481	481	}
482		return(data.frame(tmp_strain_maps))
	482	return(data.frame(lapply(data.frame(tmp_strain_maps, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE))
483	483	### Returns a dataframe of all clusters obtain from aggregate_intra_strain_nucs function.
484	484	}
485	485
...	...
801	801	}
802	802	})
803	803	}
804		non_inter_fuzzy = rec_substract_region(region1, region2)
	804	non_inter_fuzzy = rec_substract_region(region1[,1:4], region2[,1:4])
805	805	if (is.null(non_inter_fuzzy)) {return(non_inter_fuzzy)}
806	806	tmp_ulist = unlist(non_inter_fuzzy)
807	807	tmp_names = names(tmp_ulist)[1:4]

Formats disponibles : Unified diff

LBMC » NucleoMiner

Révision 21b8928f