Révision e5603c3f src/R/nucleominer.R

b/src/R/nucleominer.R
136 136
  return(ratio)
137 137
### Returns the log likelihood ratio.
138 138
}, ex=function(){
139
  # LOD score for 2 set of values
139
  # LLR score for 2 set of values
140 140
  mean1=5; sd1=2; card2 = 250
141 141
  mean2=6; sd2=3; card1 = 200
142 142
  x1 = rnorm(card1, mean1, sd1)
......
273 273

  
274 274

  
275 275
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
276
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{llr_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
276
### This function aggregates nucleosomes from replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center (dyad). A chain of nucleosomes is builts across all replicates. Adjacent nucleosomes of the chain are compared two by two. Comparison is based on a log likelihood ratio (LLR1). depending on the LLR1 value nucleosomes are merged (low LLR) or separated (high LLR). Finally the function returns a list of clusters and all computed llr_scores. Each cluster ows an attribute wp for “well positioned”. This attribute is set to TRUE if the cluster is composed of exactly one nucleosome of each sample.
277 277
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
278
llr_thres=20, ##<< Log likelihood ration threshold.
278
llr_thres=20, ##<< Log likelihood ratio threshold to decide between merging and separating
279 279
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
280 280
){
281 281
	end_of_tracks = function(tracks) {
......
487 487
}
488 488

  
489 489
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains.
490
### This function aligns nucs between two strains for a given genome region.
490
### This function aligns nucleosomes between two strains for a given genome region.
491 491
replicates, ##<< Set of replicates, ideally 3 per strain.
492
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
493
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
492
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's NULL this list will be computed.
493
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's NULL this list will be computed.
494 494
corr_thres=0.5, ##<< Correlation threshold.
495
llr_thres=100, ##<< LOD cut off.
495
llr_thres=100, ##<< Log likelihood ratio threshold to decide between merging and separating
496 496
config=NULL, ##<< GLOBAL config variable
497 497
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
498 498
) {
......
575 575
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
576 576
								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
577 577
								llr_scores = c(llr_scores, llr_score)
578
								# Filtering on LOD Score
578
								# Filtering on LLR Score
579 579
                if (llr_score < llr_thres) {
580 580
									tmp_nuc = list()
581 581
									# strain_ref1
......
809 809
  return(non_inter_fuzzy)
810 810
}
811 811

  
812
union_regions = function(# Aggregate regions that intersect themnselves.
812
union_regions = function(# Aggregate regions that intersect themselves.
813 813
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable.
814 814
regions ##<< The Regions to be aggregated
815 815
) {
......
862 862
# }
863 863

  
864 864
translate_regions = function(# Translate a list of regions from a strain ref to another.
865
### This function is an eloborated call to translate_cur.
865
### This function is an elaborated call to translate_cur.
866 866
regions, ##<< Regions to be translated.
867 867
combi, ##<< Combination of strains.
868 868
cur_index, ##<< The region of interest index.
......
964 964
  return(all_reads)
965 965
}
966 966

  
967
get_design = function(# Build the design for deseq
967
get_design = function(# Build the design for DESeq
968 968
### This function build the design according sample properties.
969 969
marker, ##<< The marker to considere.
970 970
combi, ##<< The starin combination to considere.
......
1006 1006
}
1007 1007

  
1008 1008
plot_dist_samples = function(# Plot the distribution of reads.
1009
### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
1009
### This fuxntion use the DESeq nomalization feature to compare qualitatively the distribution.
1010 1010
strain, ##<< The strain to considere.
1011 1011
marker, ##<< The marker to considere.
1012 1012
res, ##<< Data
......
1035 1035
  legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols)
1036 1036
}
1037 1037

  
1038
analyse_design = function(# Launch deseq methods.
1039
### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
1040
snep_design, ##<< The design to considere.
1041
reads ##<< The data to considere.
1038
analyse_design = function(# Launch DESeq methods.
1039
### This function is based on DESeq example. It normalizes data, fit data to GLM model with and without interaction term and compares the two models.
1040
snep_design, ##<< The design to consider.
1041
reads ##<< The data to consider.
1042 1042
) {
1043 1043
	snep_count_table = reads[, rownames(snep_design)]
1044 1044
	cdsFull = newCountDataSet(snep_count_table, snep_design)
......
1065 1065
combi, ##<< The strain combination involved.
1066 1066
form, ##<< the nuc form involved.
1067 1067
all_samples, ##<< Global list of samples.
1068
FDR = 0.0001, ## the specific False Discover Rate
1068 1069
config=NULL ##<< GLOBAL config variable
1069 1070
) {
1070 1071
  # PRETREAT
......
1082 1083
	reads$pvalsGLM = signif(tmp_analyse[[4]], 5)
1083 1084
	snep_design = tmp_analyse[[3]]
1084 1085
  # print(snep_design)
1085
	fdr = 0.0001
1086
	thres = FDR(reads$pvalsGLM, fdr)
1086
	thres = FDR(reads$pvalsGLM, FDR)
1087 1087
	reads$snep_index = reads$pvalsGLM < thres
1088 1088
	print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = ""))
1089 1089
  return(reads)
......
1119 1119

  
1120 1120

  
1121 1121
ROM2ARAB = function(# Roman to Arabic pair list.
1122
### Util to convert Roman to Arabic
1122
### Utility to convert Roman numbers into Arabic numbers
1123 1123
){list(
1124 1124
  "I" = 1,
1125 1125
  "II" = 2,
......
1159 1159
})
1160 1160

  
1161 1161
ARAB2ROM = function(# Arabic to Roman pair list.
1162
### Util to convert Arabicto Roman
1162
### Utility to convert Arabic numbers to Roman numbers
1163 1163
){switch_pairlist(ROM2ARAB())}
1164 1164

  
1165 1165

  
1166 1166

  
1167 1167

  
1168 1168
c2c_extraction = function(# Extract a sub part of the corresponding c2c file
1169
### This fonction allow to acces to a specific part of the c2c file.
1169
### This fonction allows to access to a specific part of the c2c file.
1170 1170
strain1, ##<< the key strain
1171 1171
strain2, ##<< the target strain
1172
chr=NULL, ##<< if defined, the c2c will filtered according to the chromosome value
1173
lower_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome upper than lower_bound
1174
upper_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome lower than upper_bound
1172
chr=NULL, ##<< if defined, the c2c will be filtered according to the chromosome value
1173
lower_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome upper than lower_bound
1174
upper_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome lower than upper_bound
1175 1175
config=NULL##<<  GLOBAL config variable
1176 1176
) {
1177 1177
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
......
1656 1656
}
1657 1657

  
1658 1658
build_replicates = structure(function(# Stage replicates data
1659
### This function loads in memory data corresponding to the given experiments.
1660
expe, ##<< a list of vector corresponding to vector of replicates.
1659
### This function loads in memory the data corresponding to the given experiments.
1660
expe, ##<< a list of vectors corresponding to replicates.
1661 1661
roi, ##<< the region that we are interested in.
1662 1662
only_fetch=FALSE, ##<< filter or not inputs.
1663 1663
get_genome=FALSE,##<< Load or not corresponding genome.
......
1705 1705
    # library(nucleominer)
1706 1706
    #
1707 1707
    # # Read config file
1708
    # json_conf_file = "nucleo_miner_config.json"
1708
    # json_conf_file = "nucleominer_config.json"
1709 1709
    # config = fromJSON(paste(readLines(json_conf_file), collapse=""))
1710 1710
    # # Read sample file
1711 1711
    # all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE)
......
2017 2017
        tmp_track_prev = tmp_track[-length(tmp_track)]
2018 2018
        tmp_track_next = tmp_track[-1]
2019 2019
        # tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25
2020
        if (is.null(config$TRACK_LOD_OFFSET)) {
2021
          config$TRACK_LOD_OFFSET = 0
2020
        if (is.null(config$TRACK_LLR_OFFSET)) {
2021
          config$TRACK_LLR_OFFSET = 0
2022 2022
        }
2023
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LOD_OFFSET * 25
2023
        tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LLR_OFFSET * 25
2024 2024
        tmp_x_prev = tmp_x[-length(tmp_x)]
2025 2025
        tmp_x_next = tmp_x[-1]
2026 2026
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
......
2038 2038
        points(tmp_x, tmp_y, cex=4, pch=16, col="white")
2039 2039
        points(tmp_x, tmp_y, cex=4, lwd=2)
2040 2040
        text(tmp_x, tmp_y, 1:nrow(tf_nucs))
2041
        if (is.null(config$LEGEND_LOD_POS)) {
2041
        if (is.null(config$LEGEND_LLR_POS)) {
2042 2042
          pos = 2
2043 2043
        } else {
2044
          pos = config$LEGEND_LOD_POS
2044
          pos = config$LEGEND_LLR_POS
2045 2045
        }
2046 2046
        col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
2047 2047
        text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) 

Formats disponibles : Unified diff