Révision e5603c3f src/R/nucleominer.R
b/src/R/nucleominer.R | ||
---|---|---|
136 | 136 |
return(ratio) |
137 | 137 |
### Returns the log likelihood ratio. |
138 | 138 |
}, ex=function(){ |
139 |
# LOD score for 2 set of values
|
|
139 |
# LLR score for 2 set of values
|
|
140 | 140 |
mean1=5; sd1=2; card2 = 250 |
141 | 141 |
mean2=6; sd2=3; card1 = 200 |
142 | 142 |
x1 = rnorm(card1, mean1, sd1) |
... | ... | |
273 | 273 |
|
274 | 274 |
|
275 | 275 |
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes. |
276 |
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{llr_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
|
|
276 |
### This function aggregates nucleosomes from replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center (dyad). A chain of nucleosomes is builts across all replicates. Adjacent nucleosomes of the chain are compared two by two. Comparison is based on a log likelihood ratio (LLR1). depending on the LLR1 value nucleosomes are merged (low LLR) or separated (high LLR). Finally the function returns a list of clusters and all computed llr_scores. Each cluster ows an attribute wp for “well positioned”. This attribute is set to TRUE if the cluster is composed of exactly one nucleosome of each sample.
|
|
277 | 277 |
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=..., end=..., chr=..., genome=...)}. |
278 |
llr_thres=20, ##<< Log likelihood ration threshold.
|
|
278 |
llr_thres=20, ##<< Log likelihood ratio threshold to decide between merging and separating
|
|
279 | 279 |
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound. |
280 | 280 |
){ |
281 | 281 |
end_of_tracks = function(tracks) { |
... | ... | |
487 | 487 |
} |
488 | 488 |
|
489 | 489 |
align_inter_strain_nucs = structure(function(# Aligns nucleosomes between 2 strains. |
490 |
### This function aligns nucs between two strains for a given genome region. |
|
490 |
### This function aligns nucleosomes between two strains for a given genome region.
|
|
491 | 491 |
replicates, ##<< Set of replicates, ideally 3 per strain. |
492 |
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
|
|
493 |
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
|
|
492 |
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's NULL this list will be computed.
|
|
493 |
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's NULL this list will be computed.
|
|
494 | 494 |
corr_thres=0.5, ##<< Correlation threshold. |
495 |
llr_thres=100, ##<< LOD cut off.
|
|
495 |
llr_thres=100, ##<< Log likelihood ratio threshold to decide between merging and separating
|
|
496 | 496 |
config=NULL, ##<< GLOBAL config variable |
497 | 497 |
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed. |
498 | 498 |
) { |
... | ... | |
575 | 575 |
reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1)) |
576 | 576 |
llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2)) |
577 | 577 |
llr_scores = c(llr_scores, llr_score) |
578 |
# Filtering on LOD Score
|
|
578 |
# Filtering on LLR Score
|
|
579 | 579 |
if (llr_score < llr_thres) { |
580 | 580 |
tmp_nuc = list() |
581 | 581 |
# strain_ref1 |
... | ... | |
809 | 809 |
return(non_inter_fuzzy) |
810 | 810 |
} |
811 | 811 |
|
812 |
union_regions = function(# Aggregate regions that intersect themnselves.
|
|
812 |
union_regions = function(# Aggregate regions that intersect themselves. |
|
813 | 813 |
### This function is based on sort of lower bounds to detect regions that intersect. We compare lower bound and upper bound of the porevious item. This function embed a while loop and break break regions list become stable. |
814 | 814 |
regions ##<< The Regions to be aggregated |
815 | 815 |
) { |
... | ... | |
862 | 862 |
# } |
863 | 863 |
|
864 | 864 |
translate_regions = function(# Translate a list of regions from a strain ref to another. |
865 |
### This function is an eloborated call to translate_cur.
|
|
865 |
### This function is an elaborated call to translate_cur.
|
|
866 | 866 |
regions, ##<< Regions to be translated. |
867 | 867 |
combi, ##<< Combination of strains. |
868 | 868 |
cur_index, ##<< The region of interest index. |
... | ... | |
964 | 964 |
return(all_reads) |
965 | 965 |
} |
966 | 966 |
|
967 |
get_design = function(# Build the design for deseq
|
|
967 |
get_design = function(# Build the design for DESeq
|
|
968 | 968 |
### This function build the design according sample properties. |
969 | 969 |
marker, ##<< The marker to considere. |
970 | 970 |
combi, ##<< The starin combination to considere. |
... | ... | |
1006 | 1006 |
} |
1007 | 1007 |
|
1008 | 1008 |
plot_dist_samples = function(# Plot the distribution of reads. |
1009 |
### This fuxntion use the deseq nomalization feature to compare qualitatively the distribution.
|
|
1009 |
### This fuxntion use the DESeq nomalization feature to compare qualitatively the distribution.
|
|
1010 | 1010 |
strain, ##<< The strain to considere. |
1011 | 1011 |
marker, ##<< The marker to considere. |
1012 | 1012 |
res, ##<< Data |
... | ... | |
1035 | 1035 |
legend("topright", col=(1:length(sample_ids))+1, lty=1:length(sample_ids), legend=cols) |
1036 | 1036 |
} |
1037 | 1037 |
|
1038 |
analyse_design = function(# Launch deseq methods.
|
|
1039 |
### This function is based on deseq example. It mormalizes data, fit data to GLM model with and without interaction term and compare the two l;=models.
|
|
1040 |
snep_design, ##<< The design to considere.
|
|
1041 |
reads ##<< The data to considere.
|
|
1038 |
analyse_design = function(# Launch DESeq methods.
|
|
1039 |
### This function is based on DESeq example. It normalizes data, fit data to GLM model with and without interaction term and compares the two models.
|
|
1040 |
snep_design, ##<< The design to consider. |
|
1041 |
reads ##<< The data to consider. |
|
1042 | 1042 |
) { |
1043 | 1043 |
snep_count_table = reads[, rownames(snep_design)] |
1044 | 1044 |
cdsFull = newCountDataSet(snep_count_table, snep_design) |
... | ... | |
1065 | 1065 |
combi, ##<< The strain combination involved. |
1066 | 1066 |
form, ##<< the nuc form involved. |
1067 | 1067 |
all_samples, ##<< Global list of samples. |
1068 |
FDR = 0.0001, ## the specific False Discover Rate |
|
1068 | 1069 |
config=NULL ##<< GLOBAL config variable |
1069 | 1070 |
) { |
1070 | 1071 |
# PRETREAT |
... | ... | |
1082 | 1083 |
reads$pvalsGLM = signif(tmp_analyse[[4]], 5) |
1083 | 1084 |
snep_design = tmp_analyse[[3]] |
1084 | 1085 |
# print(snep_design) |
1085 |
fdr = 0.0001 |
|
1086 |
thres = FDR(reads$pvalsGLM, fdr) |
|
1086 |
thres = FDR(reads$pvalsGLM, FDR) |
|
1087 | 1087 |
reads$snep_index = reads$pvalsGLM < thres |
1088 | 1088 |
print(paste(sum(reads$snep_index), " SNEPs found for ", length(reads[,1])," nucs and ", fdr*100,"% of FDR.", sep = "")) |
1089 | 1089 |
return(reads) |
... | ... | |
1119 | 1119 |
|
1120 | 1120 |
|
1121 | 1121 |
ROM2ARAB = function(# Roman to Arabic pair list. |
1122 |
### Util to convert Roman to Arabic
|
|
1122 |
### Utility to convert Roman numbers into Arabic numbers
|
|
1123 | 1123 |
){list( |
1124 | 1124 |
"I" = 1, |
1125 | 1125 |
"II" = 2, |
... | ... | |
1159 | 1159 |
}) |
1160 | 1160 |
|
1161 | 1161 |
ARAB2ROM = function(# Arabic to Roman pair list. |
1162 |
### Util to convert Arabicto Roman
|
|
1162 |
### Utility to convert Arabic numbers to Roman numbers
|
|
1163 | 1163 |
){switch_pairlist(ROM2ARAB())} |
1164 | 1164 |
|
1165 | 1165 |
|
1166 | 1166 |
|
1167 | 1167 |
|
1168 | 1168 |
c2c_extraction = function(# Extract a sub part of the corresponding c2c file |
1169 |
### This fonction allow to acces to a specific part of the c2c file.
|
|
1169 |
### This fonction allows to access to a specific part of the c2c file.
|
|
1170 | 1170 |
strain1, ##<< the key strain |
1171 | 1171 |
strain2, ##<< the target strain |
1172 |
chr=NULL, ##<< if defined, the c2c will filtered according to the chromosome value |
|
1173 |
lower_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome upper than lower_bound |
|
1174 |
upper_bound=NULL, ##<< if defined, the c2c will filtered for part of the genome lower than upper_bound |
|
1172 |
chr=NULL, ##<< if defined, the c2c will be filtered according to the chromosome value
|
|
1173 |
lower_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome upper than lower_bound
|
|
1174 |
upper_bound=NULL, ##<< if defined, the c2c will be filtered for part of the genome lower than upper_bound
|
|
1175 | 1175 |
config=NULL##<< GLOBAL config variable |
1176 | 1176 |
) { |
1177 | 1177 |
reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM" |
... | ... | |
1656 | 1656 |
} |
1657 | 1657 |
|
1658 | 1658 |
build_replicates = structure(function(# Stage replicates data |
1659 |
### This function loads in memory data corresponding to the given experiments. |
|
1660 |
expe, ##<< a list of vector corresponding to vector of replicates.
|
|
1659 |
### This function loads in memory the data corresponding to the given experiments.
|
|
1660 |
expe, ##<< a list of vectors corresponding to replicates.
|
|
1661 | 1661 |
roi, ##<< the region that we are interested in. |
1662 | 1662 |
only_fetch=FALSE, ##<< filter or not inputs. |
1663 | 1663 |
get_genome=FALSE,##<< Load or not corresponding genome. |
... | ... | |
1705 | 1705 |
# library(nucleominer) |
1706 | 1706 |
# |
1707 | 1707 |
# # Read config file |
1708 |
# json_conf_file = "nucleo_miner_config.json"
|
|
1708 |
# json_conf_file = "nucleominer_config.json" |
|
1709 | 1709 |
# config = fromJSON(paste(readLines(json_conf_file), collapse="")) |
1710 | 1710 |
# # Read sample file |
1711 | 1711 |
# all_samples = get_content(config$CSV_SAMPLE_FILE, "cvs", sep=";", head=TRUE, stringsAsFactors=FALSE) |
... | ... | |
2017 | 2017 |
tmp_track_prev = tmp_track[-length(tmp_track)] |
2018 | 2018 |
tmp_track_next = tmp_track[-1] |
2019 | 2019 |
# tmp_track_inter = signif(tmp_track_prev - tmp_track_next) * (abs(tmp_track_prev - tmp_track_next) > 1) * 25 |
2020 |
if (is.null(config$TRACK_LOD_OFFSET)) {
|
|
2021 |
config$TRACK_LOD_OFFSET = 0
|
|
2020 |
if (is.null(config$TRACK_LLR_OFFSET)) {
|
|
2021 |
config$TRACK_LLR_OFFSET = 0
|
|
2022 | 2022 |
} |
2023 |
tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LOD_OFFSET * 25
|
|
2023 |
tmp_track_inter = signif(tmp_track_prev - tmp_track_next) + config$TRACK_LLR_OFFSET * 25
|
|
2024 | 2024 |
tmp_x_prev = tmp_x[-length(tmp_x)] |
2025 | 2025 |
tmp_x_next = tmp_x[-1] |
2026 | 2026 |
need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50}) |
... | ... | |
2038 | 2038 |
points(tmp_x, tmp_y, cex=4, pch=16, col="white") |
2039 | 2039 |
points(tmp_x, tmp_y, cex=4, lwd=2) |
2040 | 2040 |
text(tmp_x, tmp_y, 1:nrow(tf_nucs)) |
2041 |
if (is.null(config$LEGEND_LOD_POS)) {
|
|
2041 |
if (is.null(config$LEGEND_LLR_POS)) {
|
|
2042 | 2042 |
pos = 2 |
2043 | 2043 |
} else { |
2044 |
pos = config$LEGEND_LOD_POS
|
|
2044 |
pos = config$LEGEND_LLR_POS
|
|
2045 | 2045 |
} |
2046 | 2046 |
col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")}) |
2047 | 2047 |
text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) |
Formats disponibles : Unified diff