Révision 1d833b97 src/R/nucleominer.R
b/src/R/nucleominer.R | ||
---|---|---|
454 | 454 |
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed. |
455 | 455 |
corr_thres=0.5, ##<< Correlation threshold. |
456 | 456 |
lod_thres=-100, ##<< LOD cut off. |
457 |
config=NULL, ##<< GLOBAL config variable |
|
457 | 458 |
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed. |
458 | 459 |
) { |
459 | 460 |
if (length(replicates) < 2) { |
... | ... | |
482 | 483 |
if (is.null(wp_nucs_strain_ref2)) { |
483 | 484 |
wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]] |
484 | 485 |
} |
486 |
# foo <<- wp_nucs_strain_ref1 |
|
487 |
# print(apply(t(wp_nucs_strain_ref1), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)})) |
|
488 |
# print(apply(t(wp_nucs_strain_ref2), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)})) |
|
485 | 489 |
# dealing with matching_nas |
486 | 490 |
lws = c() |
487 | 491 |
ups = c() |
... | ... | |
500 | 504 |
# Filtering on Well Positionned |
501 | 505 |
if (nuc_strain_ref1$wp) { |
502 | 506 |
roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1) |
503 |
# print(roi_strain_ref1) |
|
504 |
roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi) |
|
505 |
if (!is.null(roi_strain_ref2)){ |
|
507 |
roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi, config) |
|
508 |
if (!is.null(roi_strain_ref2)){ |
|
506 | 509 |
# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER) |
507 | 510 |
lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin) |
508 | 511 |
upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin) |
... | ... | |
609 | 612 |
} |
610 | 613 |
### Returns a list of clusterized nucleosomes, and all computed lod scores. |
611 | 614 |
}, ex=function(){ |
615 |
|
|
616 |
# Define new translate_roi function... |
|
617 |
translate_roi = function(roi, strain2, big_roi=NULL, config=NULL) { |
|
618 |
return(roi) |
|
619 |
} |
|
620 |
# Binding it by uncomment follwing lines. |
|
621 |
unlockBinding("translate_roi", as.environment("package:nucleominer")) |
|
622 |
unlockBinding("translate_roi", getNamespace("nucleominer")) |
|
623 |
assign("translate_roi", translate_roi, "package:nucleominer") |
|
624 |
assign("translate_roi", translate_roi, getNamespace("nucleominer")) |
|
625 |
lockBinding("translate_roi", getNamespace("nucleominer")) |
|
626 |
lockBinding("translate_roi", as.environment("package:nucleominer")) |
|
627 |
|
|
612 | 628 |
# Dealing with a region of interest |
613 | 629 |
roi =list(name="example", begin=1000, end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1") |
614 | 630 |
roi2 = translate_roi(roi, roi$strain_ref1) |
... | ... | |
639 | 655 |
print(align_inter_strain_nucs(replicates)) |
640 | 656 |
}) |
641 | 657 |
|
642 |
translate_roi = structure(function(# Translate coords of a genome region. |
|
643 |
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example. |
|
644 |
roi, ##<< Original genome region of interest. |
|
645 |
strain2, ##<< The strain in wich you want the genome region of interest. |
|
646 |
big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed. |
|
647 |
) { |
|
648 |
### This function translate a genome region of interest from a strain coord system to an other. This is a minimal fucntion that will be called by \emph{align_inter_strain_nucs} and its exemnple, you need to overwrite it by your own fucntion. |
|
649 |
strain1 = roi$strain_ref |
|
650 |
if (strain1 == strain2 | strain2 == "strain_ex2") { |
|
651 |
return(roi) |
|
652 |
} else { |
|
653 |
stop("ERROR, you need to overwrite your own function to convert convert strain coords. To binf the new function, have a |
|
654 |
look in the translate_roi example.") |
|
655 |
} |
|
656 |
### The translated genome region of interest. |
|
657 |
}, ex=function(){ |
|
658 |
# Define new translate_roi function... |
|
659 |
translate_roi = function(roi, strain2) { |
|
660 |
strain1 = roi$strain_ref |
|
661 |
if (strain1 == strain2) { |
|
662 |
return(roi) |
|
663 |
} else { |
|
664 |
stop("Here is my new translate_roi function...") |
|
665 |
} |
|
666 |
} |
|
667 |
# Binding it by uncomment follwing lines. |
|
668 |
# unlockBinding("translate_roi", as.environment("package:nm")) |
|
669 |
# unlockBinding("translate_roi", getNamespace("nm")) |
|
670 |
# assign("translate_roi", translate_roi, "package:nm") |
|
671 |
# assign("translate_roi", translate_roi, getNamespace("nm")) |
|
672 |
# lockBinding("translate_roi", getNamespace("nm")) |
|
673 |
# lockBinding("translate_roi", as.environment("package:nm")) |
|
674 |
}) |
|
675 |
|
|
676 |
|
|
677 | 658 |
|
678 | 659 |
|
679 | 660 |
|
... | ... | |
704 | 685 |
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids. |
705 | 686 |
roi, ##<< Region of interest. |
706 | 687 |
all_samples, ##<< Global list of samples. |
707 |
config, ##<< GLOBAL config variable |
|
688 |
config=NULL, ##<< GLOBAL config variable
|
|
708 | 689 |
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking. |
709 | 690 |
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence. |
710 | 691 |
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files. |
... | ... | |
713 | 694 |
samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id) |
714 | 695 |
for (i in samples_ids) { |
715 | 696 |
sample = as.list(all_samples[all_samples$id==i,]) |
716 |
sample$roi = translate_roi(roi, sample$strain) |
|
697 |
sample$roi = translate_roi(roi, sample$strain, config)
|
|
717 | 698 |
if (get_genome) { |
718 | 699 |
# Get Genome |
719 |
spl = function(l) { |
|
720 |
ret = list() |
|
721 |
for (name in names(l)) { |
|
722 |
ret[[as.character(l[[name]])]] = name |
|
723 |
} |
|
724 |
ret |
|
725 |
} |
|
726 |
sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[spl(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end] |
|
700 |
sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end] |
|
727 | 701 |
} |
728 | 702 |
# Get inputs |
729 | 703 |
sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE) |
... | ... | |
856 | 830 |
regions, ##<< Regions to be translated. |
857 | 831 |
combi, ##<< Combination of strains. |
858 | 832 |
roi_index, ##<< The region of interest index. |
833 |
config=NULL, ##<< GLOBAL config variable |
|
859 | 834 |
roi ##<< The region of interest. |
860 | 835 |
) { |
861 | 836 |
tr_regions = apply(t(1:length(regions[,1])), 2, function(i) { |
862 | 837 |
tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2]) |
863 |
big_roi = translate_roi(roi, tmp_regions_ref2$strain_ref) |
|
838 |
big_roi = translate_roi(roi, tmp_regions_ref2$strain_ref, config)
|
|
864 | 839 |
tmp_min = min(big_roi$begin, big_roi$end) |
865 | 840 |
tmp_max = max(big_roi$begin, big_roi$end) |
866 | 841 |
big_roi$begin = tmp_min |
867 | 842 |
big_roi$end = tmp_max |
868 |
trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], big_roi) |
|
843 |
trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], config, big_roi)
|
|
869 | 844 |
data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), roi_index=roi_index)) |
870 | 845 |
}) |
871 | 846 |
tr_regions = do.call("rbind", tr_regions) |
... | ... | |
902 | 877 |
### The fucntion is no more necessary since we remove "big_roi" bug in translate_roi function. |
903 | 878 |
tmp_fuzzy_nucs, ##<< the regiuons to be croped. |
904 | 879 |
roi, ##<< The region of interest. |
905 |
strain ##<< The strain to consider. |
|
880 |
strain, ##<< The strain to consider. |
|
881 |
config=NULL ##<< GLOBAL config variable |
|
906 | 882 |
) { |
907 |
tr_roi = translate_roi(roi, strain) |
|
883 |
tr_roi = translate_roi(roi, strain, config)
|
|
908 | 884 |
tr_roi_begin = min(tr_roi$begin, tr_roi$end) |
909 | 885 |
tr_roi_end = max(tr_roi$begin, tr_roi$end) |
910 | 886 |
if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) { |
... | ... | |
930 | 906 |
roi, ##<< The region of interest. |
931 | 907 |
roi_index, ##<< The region of interest index. |
932 | 908 |
strain_maps, ##<< Nuc maps. |
933 |
common_nuc_results ##<< Common wp nuc maps |
|
909 |
common_nuc_results, ##<< Common wp nuc maps |
|
910 |
config=NULL ##<< GLOBAL config variable |
|
934 | 911 |
) { |
935 | 912 |
print(roi_index) |
936 | 913 |
PLOT = FALSE |
... | ... | |
939 | 916 |
|
940 | 917 |
print(paste("Dealing with fuzzy from", combi[1])) |
941 | 918 |
tmp_fuzzy_nucs_1 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[1]) |
942 |
tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1]) |
|
919 |
tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1], config)
|
|
943 | 920 |
if (length(tmp_fuzzy_nucs_1[,1]) == 0) {return(NULL)} |
944 | 921 |
agg_fuzzy_1 = union_regions(tmp_fuzzy_nucs_1) |
945 | 922 |
if (PLOT) for (i in 1:length(agg_fuzzy_1[,1])) { |
... | ... | |
950 | 927 |
tmp_fuzzy_nucs_2 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[2]) |
951 | 928 |
if (length(tmp_fuzzy_nucs_2[,1]) == 0) {return(NULL)} |
952 | 929 |
agg_fuzzy_2 = union_regions(tmp_fuzzy_nucs_2) |
953 |
agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2]) |
|
930 |
agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2], config)
|
|
954 | 931 |
tr_agg_fuzzy_2 = translate_regions(agg_fuzzy_2, combi, roi_index, roi) |
955 |
tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2]) |
|
932 |
tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2], config)
|
|
956 | 933 |
# tr_agg_fuzzy_2 = union_regions(tr_agg_fuzzy_2) |
957 | 934 |
if (PLOT) for (i in 1:length(tr_agg_fuzzy_2[,1])) { |
958 | 935 |
lines(c(tr_agg_fuzzy_2[i,]$lower_bound, tr_agg_fuzzy_2[i,]$upper_bound), c(+3.3,+3.3), col=2) |
... | ... | |
1188 | 1165 |
|
1189 | 1166 |
|
1190 | 1167 |
|
1168 |
ROM2ARAB = function(# Roman to Arabic pair list. |
|
1169 |
### Util to convert Roman to Arabic |
|
1170 |
){list( |
|
1171 |
"I" = 1, |
|
1172 |
"II" = 2, |
|
1173 |
"III" = 3, |
|
1174 |
"IV" = 4, |
|
1175 |
"V" = 5, |
|
1176 |
"VI" = 6, |
|
1177 |
"VII" = 7, |
|
1178 |
"VIII" = 8, |
|
1179 |
"IX" = 9, |
|
1180 |
"X" = 10, |
|
1181 |
"XI" = 11, |
|
1182 |
"XII" = 12, |
|
1183 |
"XIII" = 13, |
|
1184 |
"XIV" = 14, |
|
1185 |
"XV" = 15, |
|
1186 |
"XVI" = 16, |
|
1187 |
"XVII" = 17, |
|
1188 |
"XVIII" = 18, |
|
1189 |
"XIX" = 19, |
|
1190 |
"XX" = 20 |
|
1191 |
)} |
|
1192 |
|
|
1193 |
switch_pairlist = structure(function(# Switch a pairlist |
|
1194 |
### Take a pairlist key:value and return the switched pairlist value:key. |
|
1195 |
l ##<< The pairlist to switch. |
|
1196 |
) { |
|
1197 |
ret = list() |
|
1198 |
for (name in names(l)) { |
|
1199 |
ret[[as.character(l[[name]])]] = name |
|
1200 |
} |
|
1201 |
ret |
|
1202 |
### The switched pairlist. |
|
1203 |
}, ex=function(){ |
|
1204 |
l = list(key1 = "value1", key2 = "value2") |
|
1205 |
print(switch_pairlist(l)) |
|
1206 |
}) |
|
1207 |
|
|
1208 |
ARAB2ROM = function(# Arabic to Roman pair list. |
|
1209 |
### Util to convert Arabicto Roman |
|
1210 |
){switch_pairlist(ROM2ARAB())} |
|
1211 |
|
|
1212 |
|
|
1213 |
# translate_roi = structure(function(# Translate coords of a genome region. |
|
1214 |
# ### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example. |
|
1215 |
# roi, ##<< Original genome region of interest. |
|
1216 |
# strain2, ##<< The strain in wich you want the genome region of interest. |
|
1217 |
# big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed. |
|
1218 |
# ) { |
|
1219 |
# ### This function translate a genome region of interest from a strain coord system to an other. This is a minimal fucntion that will be called by \emph{align_inter_strain_nucs} and its exemnple, you need to overwrite it by your own fucntion. |
|
1220 |
# strain1 = roi$strain_ref |
|
1221 |
# if (strain1 == strain2 | strain2 == "strain_ex2") { |
|
1222 |
# return(roi) |
|
1223 |
# } else { |
|
1224 |
# stop("ERROR, you need to overwrite your own function to convert convert strain coords. To binf the new function, have a |
|
1225 |
# look in the translate_roi example.") |
|
1226 |
# } |
|
1227 |
# ### The translated genome region of interest. |
|
1228 |
# }, ex=function(){ |
|
1229 |
# # Define new translate_roi function... |
|
1230 |
# translate_roi = function(roi, strain2) { |
|
1231 |
# strain1 = roi$strain_ref |
|
1232 |
# if (strain1 == strain2) { |
|
1233 |
# return(roi) |
|
1234 |
# } else { |
|
1235 |
# stop("Here is my new translate_roi function...") |
|
1236 |
# } |
|
1237 |
# } |
|
1238 |
# # Binding it by uncomment follwing lines. |
|
1239 |
# # unlockBinding("translate_roi", as.environment("package:nm")) |
|
1240 |
# # unlockBinding("translate_roi", getNamespace("nm")) |
|
1241 |
# # assign("translate_roi", translate_roi, "package:nm") |
|
1242 |
# # assign("translate_roi", translate_roi, getNamespace("nm")) |
|
1243 |
# # lockBinding("translate_roi", getNamespace("nm")) |
|
1244 |
# # lockBinding("translate_roi", as.environment("package:nm")) |
|
1245 |
# }) |
|
1246 |
# |
|
1247 |
# |
|
1248 |
|
|
1249 |
|
|
1250 |
translate_roi = structure(function(# Translate coords of a genome region. |
|
1251 |
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example. |
|
1252 |
roi, ##<< Original genome region of interest. |
|
1253 |
strain2, ##<< The strain in wich you want the genome region of interest. |
|
1254 |
config=NULL, ##<< GLOBAL config variable |
|
1255 |
big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed. |
|
1256 |
) { |
|
1257 |
strain1 = roi$strain_ref |
|
1258 |
reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM" |
|
1259 |
if (strain1 == strain2) { |
|
1260 |
roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1 |
|
1261 |
return(roi) |
|
1262 |
} |
|
1263 |
# Launch c2c file |
|
1264 |
if (reverse) { |
|
1265 |
c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]) |
|
1266 |
} else { |
|
1267 |
c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]) |
|
1268 |
} |
|
1269 |
c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE) |
|
1270 |
# filtering it |
|
1271 |
c2c = c2c[c2c$V6=="-",] |
|
1272 |
# Reverse |
|
1273 |
if (reverse) { |
|
1274 |
tmp_col = c2c$V1 |
|
1275 |
c2c$V1 = c2c$V7 |
|
1276 |
c2c$V7 = tmp_col |
|
1277 |
tmp_col = c2c$V2 |
|
1278 |
c2c$V2 = c2c$V9 |
|
1279 |
c2c$V9 = tmp_col |
|
1280 |
tmp_col = c2c$V3 |
|
1281 |
c2c$V3 = c2c$V10 |
|
1282 |
c2c$V10 = tmp_col |
|
1283 |
} |
|
1284 |
# Restrict c2c to big_roi |
|
1285 |
# if (FALSE) { |
|
1286 |
if (!is.null(big_roi)) { |
|
1287 |
if (roi$strain_ref == big_roi$strain_ref) { |
|
1288 |
if (strain1 == "BY") { |
|
1289 |
big_chro_1 = paste("chr", ARAB2ROM()[[big_roi$chr]], sep="") |
|
1290 |
} else if (strain1 == "RM") { |
|
1291 |
big_chro_1 = paste("supercontig_1.",big_roi$chr,sep="") |
|
1292 |
} else if (strain1 == "YJM") { |
|
1293 |
big_chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[big_roi$chr]] |
|
1294 |
} |
|
1295 |
big_begin_1 = big_roi$begin |
|
1296 |
big_end_1 = big_roi$end |
|
1297 |
c2c = c2c[c2c$V1==big_chro_1,] |
|
1298 |
if (length(c2c[c2c$V3<big_begin_1, 1] > 0)) {c2c[c2c$V3 < big_begin_1,c("V2", "V3") ] = big_begin_1} |
|
1299 |
if (length(c2c[c2c$V2>big_end_1, 1] > 0)) {c2c[c2c$V2 > big_end_1, c("V2", "V3")] = big_end_1} |
|
1300 |
c2c = c2c[c2c$V2 - c2c$V3 != 0,] |
|
1301 |
} else { |
|
1302 |
stop("ERROR, big_roi and roi not in the same strain_ref") |
|
1303 |
} |
|
1304 |
} |
|
1305 |
# Convert initial roi$chr into c2c format |
|
1306 |
if (strain1 == "BY") { |
|
1307 |
chro_1 = paste("chr", ARAB2ROM()[[roi$chr]], sep="") |
|
1308 |
} else if (strain1 == "RM") { |
|
1309 |
chro_1 = paste("supercontig_1.",roi$chr,sep="") |
|
1310 |
} else if (strain1 == "YJM") { |
|
1311 |
chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[roi$chr]] |
|
1312 |
} |
|
1313 |
begin_1 = roi$begin |
|
1314 |
end_1 = roi$end |
|
1315 |
# Computing equivalent strain_2 alignment coordinates |
|
1316 |
if (reverse) { |
|
1317 |
tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),] |
|
1318 |
tmptransfostop = c2c[c2c$V1==chro_1 & ((c2c$V3>=end_1 & c2c$V2<=end_1 & c2c$V8==1) | (c2c$V2>=end_1 & c2c$V3<=end_1 & c2c$V8==-1)),] |
|
1319 |
} else { |
|
1320 |
tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,] |
|
1321 |
tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,] |
|
1322 |
} |
|
1323 |
# Never happend conditions ... |
|
1324 |
{ |
|
1325 |
if (length(tmptransfostart$V8) == 0) { |
|
1326 |
# begin_1 is between to lines: shift begin_1 to the start of 2nd line. |
|
1327 |
tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2>=begin_1,] |
|
1328 |
begin_1 = sort(tmp_c2c$V2)[1] |
|
1329 |
if (reverse) { |
|
1330 |
tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),] |
|
1331 |
} else { |
|
1332 |
tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,] |
|
1333 |
} |
|
1334 |
if (length(tmptransfostart$V8) == 0) { |
|
1335 |
if (!is.null(big_roi)) { |
|
1336 |
return(NULL) |
|
1337 |
tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$begin & c2c$V2<=big_roi$begin,] |
|
1338 |
} else { |
|
1339 |
# return(NULL) |
|
1340 |
# print(c2c[c2c$V1==chro_1 & c2c$V2<=end_1 & c2c$V3>=begin_1,]) |
|
1341 |
# print(c2c[c2c$V1==chro_1,]) |
|
1342 |
print(tmptransfostart) |
|
1343 |
print(tmptransfostop) |
|
1344 |
stop("Never happend condition 1.") |
|
1345 |
} |
|
1346 |
} |
|
1347 |
} |
|
1348 |
if (length(tmptransfostop$V8) == 0) { |
|
1349 |
# end_1 is between to lines: shift end_1 to the end of 2nd line. |
|
1350 |
tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3<=end_1,] |
|
1351 |
end_1 = sort(tmp_c2c$V3)[length(tmp_c2c$V2)] |
|
1352 |
if (reverse) { |
|
1353 |
tmptransfostop = c2c[c2c$V1==chro_1 & ((c2c$V3>=end_1 & c2c$V2<=end_1 & c2c$V8==1) | (c2c$V2>=end_1 & c2c$V3<=end_1 & c2c$V8==-1)),] |
|
1354 |
} else { |
|
1355 |
tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,] |
|
1356 |
} |
|
1357 |
if (length(tmptransfostop$V8) == 0) { |
|
1358 |
if (!is.null(big_roi)) { |
|
1359 |
return(NULL) |
|
1360 |
tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$end & c2c$V2<=big_roi$end,] |
|
1361 |
} else { |
|
1362 |
# return(NULL) |
|
1363 |
print(c2c[c2c$V1==chro_1,]) |
|
1364 |
print(tmptransfostart) |
|
1365 |
print(tmptransfostop) |
|
1366 |
stop("Never happend condition 2.") |
|
1367 |
} |
|
1368 |
} |
|
1369 |
} |
|
1370 |
if (length(tmptransfostart$V8) != 1) { |
|
1371 |
# tmptransfostart = tmptransfostart[1,] |
|
1372 |
# print("many start") |
|
1373 |
# print(c2c[c2c$V1==chro_1,]) |
|
1374 |
tmptransfostart = tmptransfostart[tmptransfostart$V1==chro_1 & tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,] |
|
1375 |
if (length(tmptransfostart$V8) != 1) { |
|
1376 |
# return(NULL) |
|
1377 |
print(tmptransfostart) |
|
1378 |
print(tmptransfostop) |
|
1379 |
stop("Never happend condition 3.") |
|
1380 |
} |
|
1381 |
} |
|
1382 |
if (length(tmptransfostop$V8) != 1) { |
|
1383 |
# tmptransfostop = tmptransfostop[length(tmptransfostop$V8),] |
|
1384 |
# print("many stop") |
|
1385 |
# print(tmptransfostop) |
|
1386 |
# print(roi) |
|
1387 |
tmptransfostop = tmptransfostop[tmptransfostop$V1==chro_1 & tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,] |
|
1388 |
if (length(tmptransfostop$V8) != 1) { |
|
1389 |
# return(NULL) |
|
1390 |
print(tmptransfostart) |
|
1391 |
print(tmptransfostop) |
|
1392 |
stop("Never happend condition 4.") |
|
1393 |
} |
|
1394 |
} |
|
1395 |
if (tmptransfostart$V7 != tmptransfostop$V7) { |
|
1396 |
print(tmptransfostart) |
|
1397 |
print(tmptransfostop) |
|
1398 |
stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.") |
|
1399 |
} |
|
1400 |
} |
|
1401 |
# Deal with strand |
|
1402 |
if (tmptransfostart$V8 == 1) { |
|
1403 |
begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2) |
|
1404 |
end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2) |
|
1405 |
} else { |
|
1406 |
begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2) |
|
1407 |
end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2) |
|
1408 |
} |
|
1409 |
# Build returned roi |
|
1410 |
roi$strain_ref = strain2 |
|
1411 |
if (roi$strain_ref == "BY") { |
|
1412 |
roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]] |
|
1413 |
} else { |
|
1414 |
roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]] |
|
1415 |
} |
|
1416 |
roi$begin = begin_2 |
|
1417 |
roi$end = end_2 |
|
1418 |
if (sign(roi$end - roi$begin) == 0) { |
|
1419 |
roi$length = 1 |
|
1420 |
} else { |
|
1421 |
roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1 |
|
1422 |
} |
|
1423 |
return(roi) |
|
1424 |
}, ex=function(){ |
|
1425 |
# Define new translate_roi function... |
|
1426 |
translate_roi = function(roi, strain2, config) { |
|
1427 |
strain1 = roi$strain_ref |
|
1428 |
if (strain1 == strain2) { |
|
1429 |
return(roi) |
|
1430 |
} else { |
|
1431 |
stop("Here is my new translate_roi function...") |
|
1432 |
} |
|
1433 |
} |
|
1434 |
# Binding it by uncomment follwing lines. |
|
1435 |
# unlockBinding("translate_roi", as.environment("package:nm")) |
|
1436 |
# unlockBinding("translate_roi", getNamespace("nm")) |
|
1437 |
# assign("translate_roi", translate_roi, "package:nm") |
|
1438 |
# assign("translate_roi", translate_roi, getNamespace("nm")) |
|
1439 |
# lockBinding("translate_roi", getNamespace("nm")) |
|
1440 |
# lockBinding("translate_roi", as.environment("package:nm")) |
|
1441 |
}) |
|
1442 |
|
|
1443 |
|
|
1444 |
|
|
1445 |
|
|
1446 |
|
|
1447 |
|
|
1448 |
|
|
1449 |
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR) |
|
1450 |
### CURs are regions that can be aligned between the genomes |
|
1451 |
diff_allowed = 10, ##<< the maximum indel width allowe din a CUR |
|
1452 |
min_cur_width = 200, ##<< The minimum width of a CUR |
|
1453 |
config=NULL, ##<< GLOBAL config variable |
|
1454 |
plot = FALSE ##<< Plot CURs or not |
|
1455 |
) { |
|
1456 |
get_inter_strain_rois = function(strain1, strain2, diff_allowed = 10, min_cur_width = 200, plot=FALSE) { |
|
1457 |
c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]) |
|
1458 |
c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE) |
|
1459 |
# Filtering unagapped |
|
1460 |
c2c = c2c[c2c$V6=="-",] |
|
1461 |
# filtering some things (chr...) |
|
1462 |
# c2c = c2c[c2c$V1 == "chrIV",] |
|
1463 |
diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)] |
|
1464 |
diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)] |
|
1465 |
# Plot diffs to define a threshold (diff_allowed) |
|
1466 |
# hist(abs(c(diff2, diff)),breaks=c(0:2000, 200000000000), xlim=c(0,100)) |
|
1467 |
# Filtering |
|
1468 |
indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed) |
|
1469 |
indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1)) |
|
1470 |
|
|
1471 |
rois = NULL |
|
1472 |
for(i in 1:length(indexes_start)) { |
|
1473 |
start = indexes_start[i] |
|
1474 |
stop = indexes_stop[i] |
|
1475 |
sub_c2c = c2c[start:stop,] |
|
1476 |
if (strain1 == "BY") { |
|
1477 |
chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]] |
|
1478 |
} else { |
|
1479 |
chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]] |
|
1480 |
} |
|
1481 |
roi = list(chr=chr, begin=sub_c2c[1,]$V2, end=sub_c2c[length(sub_c2c$V1),]$V3, strain_ref=strain1) |
|
1482 |
roi[["length"]] = roi$end - roi$begin |
|
1483 |
if (roi$length >= min_cur_width) { |
|
1484 |
rois = dfadd(rois,roi) |
|
1485 |
} |
|
1486 |
if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) { |
|
1487 |
print("*************** ERROR, non homogenous region! ********************") |
|
1488 |
} |
|
1489 |
# print(i) |
|
1490 |
# print(roi) |
|
1491 |
# print(sub_c2c) |
|
1492 |
# print("________________________________________________________________") |
|
1493 |
} |
|
1494 |
if (plot) { |
|
1495 |
print(paste(length(indexes_stop), "area of interest.")) |
|
1496 |
# Plot rois |
|
1497 |
fasta_ref = list(filename=config$FASTA_REFERENCE_GENOME_FILES[[strain1]]) |
|
1498 |
genome = get_content(fasta_ref$filename, "fasta") |
|
1499 |
plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))})))) |
|
1500 |
for (name in names(genome)) { |
|
1501 |
if (strain1 == "BY") { |
|
1502 |
chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[[strain1]][[name]]]], sep="") |
|
1503 |
} else { |
|
1504 |
chr_ref = name |
|
1505 |
} |
|
1506 |
y_lev = as.integer(config$FASTA_INDEXES[[strain1]][[name]]) |
|
1507 |
lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev)) |
|
1508 |
text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref) |
|
1509 |
} |
|
1510 |
col=1 |
|
1511 |
for (roi_index in 1:length(rois$chr)) { |
|
1512 |
roi = rois[roi_index,] |
|
1513 |
y_lev = as.integer(roi$chr) + 0.3 |
|
1514 |
lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col) |
|
1515 |
text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index) |
|
1516 |
col = col + 1 |
|
1517 |
} |
|
1518 |
} |
|
1519 |
return(rois) |
|
1520 |
} |
|
1521 |
rois = NULL |
|
1522 |
rois_BY_RM = get_inter_strain_rois("BY", "RM", min_cur_width = min_cur_width, diff_allowed = diff_allowed) |
|
1523 |
rois_BY_YJM = get_inter_strain_rois("BY", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed) |
|
1524 |
for (roi_1_index in 1:length(rois_BY_RM[,1])) { |
|
1525 |
roi_1 = rois_BY_RM[roi_1_index,] |
|
1526 |
roi_2_candidates = rois_BY_YJM[rois_BY_YJM$chr== roi_1$chr & rois_BY_YJM$begin <= roi_1$end & rois_BY_YJM$end >= roi_1$begin , ] ; |
|
1527 |
# print(length(roi_2_candidates[,1])) |
|
1528 |
if (length(roi_2_candidates[,1]) > 0) { |
|
1529 |
for(roi_2_index in 1:length(roi_2_candidates[,1])) { |
|
1530 |
roi_2 = roi_2_candidates[roi_2_index,] |
|
1531 |
roi = list(chr=roi_1$chr, begin=max(roi_1$begin, roi_2$begin), end=min(roi_1$end, roi_2$end), strain_ref="BY") |
|
1532 |
roi[["length"]] = roi$end - roi$begin + 1 |
|
1533 |
if (roi$length >= min_cur_width) { |
|
1534 |
# if (length(rois[,1]) == 153) { |
|
1535 |
# print(paste(length(rois[,1]), roi_1_index, roi_2_index )) |
|
1536 |
# print(roi_1) |
|
1537 |
# print(roi_2) |
|
1538 |
# print(roi) |
|
1539 |
# } |
|
1540 |
rois = dfadd(rois,roi) |
|
1541 |
} |
|
1542 |
} |
|
1543 |
} |
|
1544 |
} |
|
1545 |
print(length(rois[,1])) |
|
1546 |
print(sum(rois$length)) |
|
1547 |
rois_1st_round = rois |
|
1548 |
rois_2nd_round = NULL |
|
1549 |
rois_RM_YJM = get_inter_strain_rois("RM", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed) |
|
1550 |
for (roi_1_index in 1:length(rois_1st_round[,1])) { |
|
1551 |
roi_1 = rois_1st_round[roi_1_index,] |
|
1552 |
translated_roi_1 = translate_roi(roi_1, "RM", config) |
|
1553 |
t_b = min(translated_roi_1$begin, translated_roi_1$end) |
|
1554 |
t_e = max(translated_roi_1$begin, translated_roi_1$end) |
|
1555 |
roi_2_candidates = rois_RM_YJM[rois_RM_YJM$chr== translated_roi_1$chr & rois_RM_YJM$begin <= t_e & rois_RM_YJM$end >= t_b , ] ; |
|
1556 |
if (length(roi_2_candidates[,1]) > 0) { |
|
1557 |
for(roi_2_index in 1:length(roi_2_candidates[,1])) { |
|
1558 |
roi_2 = roi_2_candidates[roi_2_index,] |
|
1559 |
roi = list(chr=translated_roi_1$chr, begin=max(t_b, roi_2$begin), end=min(t_e, roi_2$end), strain_ref="RM") |
|
1560 |
roi[["length"]] = roi$end - roi$begin + 1 |
|
1561 |
if (roi$length >= min_cur_width) { |
|
1562 |
rois_2nd_round = dfadd(rois_2nd_round,roi) |
|
1563 |
} |
|
1564 |
} |
|
1565 |
} |
|
1566 |
} |
|
1567 |
print(length(rois_2nd_round[,1])) |
|
1568 |
print(sum(rois_2nd_round$length)) |
|
1569 |
rois = rois_2nd_round |
|
1570 |
|
|
1571 |
rois_translator_round = list() |
|
1572 |
for (roi_index in 1:length(rois[,1])) { |
|
1573 |
roi = rois[roi_index,] |
|
1574 |
BY_roi = translate_roi(roi, "BY", config) |
|
1575 |
tmp_BY_roi = BY_roi |
|
1576 |
BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end) |
|
1577 |
BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end) |
|
1578 |
BY_roi$length = abs(BY_roi$length) |
|
1579 |
rois_translator_round = dfadd(rois_translator_round, BY_roi) |
|
1580 |
} |
|
1581 |
rois = rois_translator_round |
|
1582 |
|
|
1583 |
rois_3rd_round = NULL |
|
1584 |
for (roi_index in 1:length(rois[,1])) { |
|
1585 |
# for (roi_index in 1:2) { |
|
1586 |
current_roi = rois[roi_index,] |
|
1587 |
# print(roi_index) |
|
1588 |
|
|
1589 |
to_be_check_rois = dfadd(NULL, current_roi) |
|
1590 |
NEED_RERUN = TRUE |
|
1591 |
while (NEED_RERUN) { |
|
1592 |
# print("RERUN"), |
|
1593 |
NEED_RERUN = FALSE |
|
1594 |
to_be_check_again = NULL |
|
1595 |
for (to_be_check_roi_index in 1:length(to_be_check_rois[,1])) { |
|
1596 |
# print(to_be_check_rois) |
|
1597 |
to_be_check_roi = to_be_check_rois[to_be_check_roi_index,] |
|
1598 |
combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"), c("RM", "BY"), c("YJM", "BY"), c("YJM", "RM")) |
|
1599 |
for (combi in combis) { |
|
1600 |
# print(combi) |
|
1601 |
strain1 = combi[1] |
|
1602 |
strain2 = combi[2] |
|
1603 |
trans_roi = translate_roi(to_be_check_roi, strain1, config) |
|
1604 |
lower_bound=min(trans_roi$begin, trans_roi$end) |
|
1605 |
upper_bound=max(trans_roi$begin, trans_roi$end) |
|
1606 |
|
|
1607 |
check_overlaping = structure(function(strain1 = "BY", strain2 = "RM", chr = NULL, lower_bound=NULL, upper_bound=NULL) { |
|
1608 |
reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM" |
|
1609 |
if (strain1 == strain2) { |
|
1610 |
roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1 |
|
1611 |
return(roi) |
|
1612 |
} |
|
1613 |
# Launch c2c file |
|
1614 |
if (reverse) { |
|
1615 |
c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]]) |
|
1616 |
} else { |
|
1617 |
c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]]) |
|
1618 |
} |
|
1619 |
c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE) |
|
1620 |
# filtering it |
|
1621 |
c2c = c2c[c2c$V6=="-",] |
|
1622 |
# Reverse |
|
1623 |
if (reverse) { |
|
1624 |
tmp_col = c2c$V1 |
|
1625 |
c2c$V1 = c2c$V7 |
|
1626 |
c2c$V7 = tmp_col |
|
1627 |
tmp_col = c2c$V2 |
|
1628 |
c2c$V2 = c2c$V9 |
|
1629 |
c2c$V9 = tmp_col |
|
1630 |
tmp_col = c2c$V3 |
|
1631 |
c2c$V3 = c2c$V10 |
|
1632 |
c2c$V10 = tmp_col |
|
1633 |
} |
|
1634 |
|
|
1635 |
if (strain1 == "BY") { |
|
1636 |
chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="") |
|
1637 |
} else if (strain1 == "RM") { |
|
1638 |
chro_1 = paste("supercontig_1.",chr,sep="") |
|
1639 |
} else if (strain1 == "YJM") { |
|
1640 |
chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]] |
|
1641 |
} |
|
1642 |
# print(chro_1) |
|
1643 |
if (!is.null(lower_bound) & !is.null(upper_bound)) { |
|
1644 |
if (reverse) { |
|
1645 |
tmp_c2c = c2c[c2c$V1==chro_1 & ((c2c$V3>=lower_bound & c2c$V2<=upper_bound & c2c$V8==1) | (c2c$V2>=lower_bound & c2c$V3<=upper_bound & c2c$V8==-1)),] |
|
1646 |
} else { |
|
1647 |
tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=lower_bound & c2c$V2<=upper_bound,] |
|
1648 |
} |
|
1649 |
} else { |
|
1650 |
tmp_c2c = c2c[c2c$V1 == chr,] |
|
1651 |
} |
|
1652 |
if (length(tmp_c2c[,1]) > 1) { |
|
1653 |
pbs = apply(t(1:(length(tmp_c2c[,1]) - 1)), 2, function(i){ |
|
1654 |
# print(paste(i, "/", length(tmp_c2c[,1]))) |
|
1655 |
apply(t((i+1):length(tmp_c2c[,1])), 2, function(j){ |
|
1656 |
l1 = tmp_c2c[i,] |
|
1657 |
b1 = min(l1$V2, l1$V3) |
|
1658 |
e1 = max(l1$V2, l1$V3) |
|
1659 |
l2 = tmp_c2c[j,] |
|
1660 |
b2 = min(l2$V2, l2$V3) |
|
1661 |
e2 = max(l2$V2, l2$V3) |
|
1662 |
if ((e1>=b2 & b1<=e2) | (e2>=b1 & b2<=e1)) { |
|
1663 |
print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ",chr, " [", b1, ",", e1, "] [", b2, ",", e2, "]", sep="")) |
|
1664 |
pb = list(strain1, strain2, chr, b1, e1, b2, e2) |
|
1665 |
pb |
|
1666 |
} else { |
|
1667 |
NULL |
|
1668 |
} |
|
1669 |
}) |
|
1670 |
}) |
|
1671 |
return(pbs) |
|
1672 |
} |
|
1673 |
|
|
1674 |
}, ex=function(){ |
|
1675 |
source("src/nucleo_miner/yeast_strain_conversion.R"); |
|
1676 |
pbs1 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=TRUE) |
|
1677 |
pbs3 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=TRUE) |
|
1678 |
pbs5 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=TRUE) |
|
1679 |
pbs2 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=FALSE) |
|
1680 |
pbs4 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=FALSE) |
|
1681 |
pbs6 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=FALSE) |
|
1682 |
}) |
|
1683 |
|
|
1684 |
|
|
1685 |
res = check_overlaping(strain1 = strain1, strain2 = strain2, chr = trans_roi$chr, lower_bound=lower_bound, upper_bound=upper_bound) |
|
1686 |
if (!is.null(res)) { |
|
1687 |
df_res = data.frame(matrix(unlist(res), ncol = 7, byrow=TRUE), stringsAsFactors=FALSE) |
|
1688 |
interval = df_res[1,] |
|
1689 |
inter_min = as.numeric(max( min(interval$X4, interval$X5), min(interval$X6, interval$X7))) |
|
1690 |
inter_max = as.numeric(min( max(interval$X4, interval$X5), max(interval$X6, interval$X7))) |
|
1691 |
# print(paste("SPLIT ROI", roi_index, "for", combi[1], combi[2])) |
|
1692 |
new_roi1 = trans_roi |
|
1693 |
new_roi2 = trans_roi |
|
1694 |
new_roi1$begin = lower_bound |
|
1695 |
new_roi1$end = inter_min - 1 |
|
1696 |
new_roi1$length = new_roi1$end - new_roi1$begin + 1 |
|
1697 |
new_roi2$begin = inter_max + 1 |
|
1698 |
new_roi2$end = upper_bound |
|
1699 |
new_roi2$length = new_roi2$end - new_roi2$begin + 1 |
|
1700 |
if (new_roi1$length > min_cur_width) { |
|
1701 |
BY_roi = translate_roi(new_roi1, "BY", config) |
|
1702 |
tmp_BY_roi = BY_roi |
|
1703 |
BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end) |
|
1704 |
BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end) |
|
1705 |
BY_roi$length = abs(BY_roi$length) |
|
1706 |
to_be_check_again = dfadd(to_be_check_again, BY_roi) |
|
1707 |
} |
|
1708 |
if (new_roi2$length > min_cur_width) { |
|
1709 |
BY_roi = translate_roi(new_roi2, "BY", config) |
|
1710 |
tmp_BY_roi = BY_roi |
|
1711 |
BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end) |
|
1712 |
BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end) |
|
1713 |
BY_roi$length = abs(BY_roi$length) |
|
1714 |
to_be_check_again = dfadd(to_be_check_again, BY_roi) |
|
1715 |
} |
|
1716 |
if (to_be_check_roi_index < length(to_be_check_rois[,1])) { |
|
1717 |
for (i in (to_be_check_roi_index + 1):length(to_be_check_rois[,1])) { |
|
1718 |
to_be_check_again = dfadd(to_be_check_again, to_be_check_rois[i,]) |
|
1719 |
} |
|
1720 |
} |
|
1721 |
NEED_RERUN = TRUE |
|
1722 |
break |
|
1723 |
} |
|
1724 |
} |
|
1725 |
if (NEED_RERUN) { |
|
1726 |
to_be_check_rois = to_be_check_again |
|
1727 |
break |
|
1728 |
} |
|
1729 |
} |
|
1730 |
} |
|
1731 |
|
|
1732 |
checked_rois = to_be_check_rois |
|
1733 |
for (checked_roi_index in 1:length(checked_rois[,1])) { |
|
1734 |
rois_3rd_round = dfadd(rois_3rd_round, checked_rois[checked_roi_index,]) |
|
1735 |
} |
|
1736 |
} |
|
1737 |
|
|
1738 |
print(length(rois_3rd_round[,1])) |
|
1739 |
print(sum(rois_3rd_round$length)) |
|
1740 |
rois = rois_3rd_round |
|
1741 |
|
|
1742 |
|
|
1743 |
if (plot) { |
|
1744 |
print(paste(length(rois$chr), "area of interest.")) |
|
1745 |
# Plot rois |
|
1746 |
fasta_ref = list(filename=config$FASTA_REFERENCE_GENOME_FILES[["BY"]]) |
|
1747 |
genome = get_content(fasta_ref$filename, "fasta") |
|
1748 |
plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))})))) |
|
1749 |
for (name in names(genome)) { |
|
1750 |
if (TRUE) { |
|
1751 |
chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[["BY"]][[name]]]], sep="") |
|
1752 |
} else { |
|
1753 |
chr_ref = name |
|
1754 |
} |
|
1755 |
y_lev = as.integer(config$FASTA_INDEXES[["BY"]][[name]]) |
|
1756 |
lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev)) |
|
1757 |
text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref) |
|
1758 |
} |
|
1759 |
col=1 |
|
1760 |
for (roi_index in 1:length(rois$chr)) { |
|
1761 |
roi = rois[roi_index,] |
|
1762 |
y_lev = as.integer(roi$chr) + 0.3 |
|
1763 |
lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col) |
|
1764 |
text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index) |
|
1765 |
col = col + 1 |
|
1766 |
} |
|
1767 |
} |
|
1768 |
return (rois) |
|
1769 |
} |
|
1770 |
|
|
1771 |
|
|
1772 |
|
|
1773 |
|
|
1774 |
|
|
1775 |
|
|
1776 |
|
|
1777 |
|
|
1778 |
|
|
1779 |
|
|
1780 |
|
|
1781 |
|
|
1782 |
|
|
1783 |
|
|
1784 |
|
|
1785 |
|
|
1786 |
|
|
1787 |
|
|
1788 |
|
|
1789 |
|
|
1790 |
|
|
1791 |
|
|
1792 |
|
|
1793 |
|
|
1794 |
|
|
1795 |
|
|
1796 |
|
|
1797 |
|
|
1798 |
|
|
1799 |
|
|
1800 |
|
|
1801 |
|
|
1802 |
|
|
1803 |
|
|
1804 |
|
|
1805 |
|
|
1806 |
|
|
1807 |
|
|
1191 | 1808 |
|
1192 | 1809 |
|
1193 | 1810 |
|
... | ... | |
1345 | 1962 |
plot_wp_nucs_4_nonmnase = FALSE, ##<< Plot (or not) clusters for non inputs samples. |
1346 | 1963 |
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed. |
1347 | 1964 |
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed. |
1348 |
height = 10 ##<< Number of reads in per million read for each sample, graphical parametre for the y axis. |
|
1965 |
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis. |
|
1966 |
config=NULL ##<< GLOBAL config variable |
|
1349 | 1967 |
){ |
1350 | 1968 |
returned_list = list() |
1351 | 1969 |
# Computing global display parameters |
... | ... | |
1513 | 2131 |
|
1514 | 2132 |
if (plot_common_nucs | plot_anovas | plot_anova_boxes) { |
1515 | 2133 |
if (is.null(aligned_inter_strain_nucs)) { |
1516 |
aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]])[[1]] |
|
2134 |
aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
|
|
1517 | 2135 |
} |
1518 | 2136 |
if (plot_common_nucs) { |
1519 | 2137 |
#Plot common wp nucs |
Formats disponibles : Unified diff