454 |
454 |
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
|
455 |
455 |
corr_thres=0.5, ##<< Correlation threshold.
|
456 |
456 |
lod_thres=-100, ##<< LOD cut off.
|
|
457 |
config=NULL, ##<< GLOBAL config variable
|
457 |
458 |
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
|
458 |
459 |
) {
|
459 |
460 |
if (length(replicates) < 2) {
|
... | ... | |
482 |
483 |
if (is.null(wp_nucs_strain_ref2)) {
|
483 |
484 |
wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
|
484 |
485 |
}
|
|
486 |
# foo <<- wp_nucs_strain_ref1
|
|
487 |
# print(apply(t(wp_nucs_strain_ref1), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
|
|
488 |
# print(apply(t(wp_nucs_strain_ref2), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
|
485 |
489 |
# dealing with matching_nas
|
486 |
490 |
lws = c()
|
487 |
491 |
ups = c()
|
... | ... | |
500 |
504 |
# Filtering on Well Positionned
|
501 |
505 |
if (nuc_strain_ref1$wp) {
|
502 |
506 |
roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
|
503 |
|
# print(roi_strain_ref1)
|
504 |
|
roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi)
|
505 |
|
if (!is.null(roi_strain_ref2)){
|
|
507 |
roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi, config)
|
|
508 |
if (!is.null(roi_strain_ref2)){
|
506 |
509 |
# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
|
507 |
510 |
lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
|
508 |
511 |
upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
|
... | ... | |
609 |
612 |
}
|
610 |
613 |
### Returns a list of clusterized nucleosomes, and all computed lod scores.
|
611 |
614 |
}, ex=function(){
|
|
615 |
|
|
616 |
# Define new translate_roi function...
|
|
617 |
translate_roi = function(roi, strain2, big_roi=NULL, config=NULL) {
|
|
618 |
return(roi)
|
|
619 |
}
|
|
620 |
# Binding it by uncomment follwing lines.
|
|
621 |
unlockBinding("translate_roi", as.environment("package:nucleominer"))
|
|
622 |
unlockBinding("translate_roi", getNamespace("nucleominer"))
|
|
623 |
assign("translate_roi", translate_roi, "package:nucleominer")
|
|
624 |
assign("translate_roi", translate_roi, getNamespace("nucleominer"))
|
|
625 |
lockBinding("translate_roi", getNamespace("nucleominer"))
|
|
626 |
lockBinding("translate_roi", as.environment("package:nucleominer"))
|
|
627 |
|
612 |
628 |
# Dealing with a region of interest
|
613 |
629 |
roi =list(name="example", begin=1000, end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
|
614 |
630 |
roi2 = translate_roi(roi, roi$strain_ref1)
|
... | ... | |
639 |
655 |
print(align_inter_strain_nucs(replicates))
|
640 |
656 |
})
|
641 |
657 |
|
642 |
|
translate_roi = structure(function(# Translate coords of a genome region.
|
643 |
|
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
|
644 |
|
roi, ##<< Original genome region of interest.
|
645 |
|
strain2, ##<< The strain in wich you want the genome region of interest.
|
646 |
|
big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
|
647 |
|
) {
|
648 |
|
### This function translate a genome region of interest from a strain coord system to an other. This is a minimal fucntion that will be called by \emph{align_inter_strain_nucs} and its exemnple, you need to overwrite it by your own fucntion.
|
649 |
|
strain1 = roi$strain_ref
|
650 |
|
if (strain1 == strain2 | strain2 == "strain_ex2") {
|
651 |
|
return(roi)
|
652 |
|
} else {
|
653 |
|
stop("ERROR, you need to overwrite your own function to convert convert strain coords. To binf the new function, have a
|
654 |
|
look in the translate_roi example.")
|
655 |
|
}
|
656 |
|
### The translated genome region of interest.
|
657 |
|
}, ex=function(){
|
658 |
|
# Define new translate_roi function...
|
659 |
|
translate_roi = function(roi, strain2) {
|
660 |
|
strain1 = roi$strain_ref
|
661 |
|
if (strain1 == strain2) {
|
662 |
|
return(roi)
|
663 |
|
} else {
|
664 |
|
stop("Here is my new translate_roi function...")
|
665 |
|
}
|
666 |
|
}
|
667 |
|
# Binding it by uncomment follwing lines.
|
668 |
|
# unlockBinding("translate_roi", as.environment("package:nm"))
|
669 |
|
# unlockBinding("translate_roi", getNamespace("nm"))
|
670 |
|
# assign("translate_roi", translate_roi, "package:nm")
|
671 |
|
# assign("translate_roi", translate_roi, getNamespace("nm"))
|
672 |
|
# lockBinding("translate_roi", getNamespace("nm"))
|
673 |
|
# lockBinding("translate_roi", as.environment("package:nm"))
|
674 |
|
})
|
675 |
|
|
676 |
|
|
677 |
658 |
|
678 |
659 |
|
679 |
660 |
|
... | ... | |
704 |
685 |
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
|
705 |
686 |
roi, ##<< Region of interest.
|
706 |
687 |
all_samples, ##<< Global list of samples.
|
707 |
|
config, ##<< GLOBAL config variable
|
|
688 |
config=NULL, ##<< GLOBAL config variable
|
708 |
689 |
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
|
709 |
690 |
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
|
710 |
691 |
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
|
... | ... | |
713 |
694 |
samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
|
714 |
695 |
for (i in samples_ids) {
|
715 |
696 |
sample = as.list(all_samples[all_samples$id==i,])
|
716 |
|
sample$roi = translate_roi(roi, sample$strain)
|
|
697 |
sample$roi = translate_roi(roi, sample$strain, config)
|
717 |
698 |
if (get_genome) {
|
718 |
699 |
# Get Genome
|
719 |
|
spl = function(l) {
|
720 |
|
ret = list()
|
721 |
|
for (name in names(l)) {
|
722 |
|
ret[[as.character(l[[name]])]] = name
|
723 |
|
}
|
724 |
|
ret
|
725 |
|
}
|
726 |
|
sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[spl(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
|
|
700 |
sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]
|
727 |
701 |
}
|
728 |
702 |
# Get inputs
|
729 |
703 |
sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE)
|
... | ... | |
856 |
830 |
regions, ##<< Regions to be translated.
|
857 |
831 |
combi, ##<< Combination of strains.
|
858 |
832 |
roi_index, ##<< The region of interest index.
|
|
833 |
config=NULL, ##<< GLOBAL config variable
|
859 |
834 |
roi ##<< The region of interest.
|
860 |
835 |
) {
|
861 |
836 |
tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
|
862 |
837 |
tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
|
863 |
|
big_roi = translate_roi(roi, tmp_regions_ref2$strain_ref)
|
|
838 |
big_roi = translate_roi(roi, tmp_regions_ref2$strain_ref, config)
|
864 |
839 |
tmp_min = min(big_roi$begin, big_roi$end)
|
865 |
840 |
tmp_max = max(big_roi$begin, big_roi$end)
|
866 |
841 |
big_roi$begin = tmp_min
|
867 |
842 |
big_roi$end = tmp_max
|
868 |
|
trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], big_roi)
|
|
843 |
trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], config, big_roi)
|
869 |
844 |
data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), roi_index=roi_index))
|
870 |
845 |
})
|
871 |
846 |
tr_regions = do.call("rbind", tr_regions)
|
... | ... | |
902 |
877 |
### The fucntion is no more necessary since we remove "big_roi" bug in translate_roi function.
|
903 |
878 |
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
|
904 |
879 |
roi, ##<< The region of interest.
|
905 |
|
strain ##<< The strain to consider.
|
|
880 |
strain, ##<< The strain to consider.
|
|
881 |
config=NULL ##<< GLOBAL config variable
|
906 |
882 |
) {
|
907 |
|
tr_roi = translate_roi(roi, strain)
|
|
883 |
tr_roi = translate_roi(roi, strain, config)
|
908 |
884 |
tr_roi_begin = min(tr_roi$begin, tr_roi$end)
|
909 |
885 |
tr_roi_end = max(tr_roi$begin, tr_roi$end)
|
910 |
886 |
if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
|
... | ... | |
930 |
906 |
roi, ##<< The region of interest.
|
931 |
907 |
roi_index, ##<< The region of interest index.
|
932 |
908 |
strain_maps, ##<< Nuc maps.
|
933 |
|
common_nuc_results ##<< Common wp nuc maps
|
|
909 |
common_nuc_results, ##<< Common wp nuc maps
|
|
910 |
config=NULL ##<< GLOBAL config variable
|
934 |
911 |
) {
|
935 |
912 |
print(roi_index)
|
936 |
913 |
PLOT = FALSE
|
... | ... | |
939 |
916 |
|
940 |
917 |
print(paste("Dealing with fuzzy from", combi[1]))
|
941 |
918 |
tmp_fuzzy_nucs_1 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[1])
|
942 |
|
tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1])
|
|
919 |
tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1], config)
|
943 |
920 |
if (length(tmp_fuzzy_nucs_1[,1]) == 0) {return(NULL)}
|
944 |
921 |
agg_fuzzy_1 = union_regions(tmp_fuzzy_nucs_1)
|
945 |
922 |
if (PLOT) for (i in 1:length(agg_fuzzy_1[,1])) {
|
... | ... | |
950 |
927 |
tmp_fuzzy_nucs_2 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[2])
|
951 |
928 |
if (length(tmp_fuzzy_nucs_2[,1]) == 0) {return(NULL)}
|
952 |
929 |
agg_fuzzy_2 = union_regions(tmp_fuzzy_nucs_2)
|
953 |
|
agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2])
|
|
930 |
agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2], config)
|
954 |
931 |
tr_agg_fuzzy_2 = translate_regions(agg_fuzzy_2, combi, roi_index, roi)
|
955 |
|
tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2])
|
|
932 |
tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2], config)
|
956 |
933 |
# tr_agg_fuzzy_2 = union_regions(tr_agg_fuzzy_2)
|
957 |
934 |
if (PLOT) for (i in 1:length(tr_agg_fuzzy_2[,1])) {
|
958 |
935 |
lines(c(tr_agg_fuzzy_2[i,]$lower_bound, tr_agg_fuzzy_2[i,]$upper_bound), c(+3.3,+3.3), col=2)
|
... | ... | |
1188 |
1165 |
|
1189 |
1166 |
|
1190 |
1167 |
|
|
1168 |
ROM2ARAB = function(# Roman to Arabic pair list.
|
|
1169 |
### Util to convert Roman to Arabic
|
|
1170 |
){list(
|
|
1171 |
"I" = 1,
|
|
1172 |
"II" = 2,
|
|
1173 |
"III" = 3,
|
|
1174 |
"IV" = 4,
|
|
1175 |
"V" = 5,
|
|
1176 |
"VI" = 6,
|
|
1177 |
"VII" = 7,
|
|
1178 |
"VIII" = 8,
|
|
1179 |
"IX" = 9,
|
|
1180 |
"X" = 10,
|
|
1181 |
"XI" = 11,
|
|
1182 |
"XII" = 12,
|
|
1183 |
"XIII" = 13,
|
|
1184 |
"XIV" = 14,
|
|
1185 |
"XV" = 15,
|
|
1186 |
"XVI" = 16,
|
|
1187 |
"XVII" = 17,
|
|
1188 |
"XVIII" = 18,
|
|
1189 |
"XIX" = 19,
|
|
1190 |
"XX" = 20
|
|
1191 |
)}
|
|
1192 |
|
|
1193 |
switch_pairlist = structure(function(# Switch a pairlist
|
|
1194 |
### Take a pairlist key:value and return the switched pairlist value:key.
|
|
1195 |
l ##<< The pairlist to switch.
|
|
1196 |
) {
|
|
1197 |
ret = list()
|
|
1198 |
for (name in names(l)) {
|
|
1199 |
ret[[as.character(l[[name]])]] = name
|
|
1200 |
}
|
|
1201 |
ret
|
|
1202 |
### The switched pairlist.
|
|
1203 |
}, ex=function(){
|
|
1204 |
l = list(key1 = "value1", key2 = "value2")
|
|
1205 |
print(switch_pairlist(l))
|
|
1206 |
})
|
|
1207 |
|
|
1208 |
ARAB2ROM = function(# Arabic to Roman pair list.
|
|
1209 |
### Util to convert Arabicto Roman
|
|
1210 |
){switch_pairlist(ROM2ARAB())}
|
|
1211 |
|
|
1212 |
|
|
1213 |
# translate_roi = structure(function(# Translate coords of a genome region.
|
|
1214 |
# ### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
|
|
1215 |
# roi, ##<< Original genome region of interest.
|
|
1216 |
# strain2, ##<< The strain in wich you want the genome region of interest.
|
|
1217 |
# big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
|
|
1218 |
# ) {
|
|
1219 |
# ### This function translate a genome region of interest from a strain coord system to an other. This is a minimal fucntion that will be called by \emph{align_inter_strain_nucs} and its exemnple, you need to overwrite it by your own fucntion.
|
|
1220 |
# strain1 = roi$strain_ref
|
|
1221 |
# if (strain1 == strain2 | strain2 == "strain_ex2") {
|
|
1222 |
# return(roi)
|
|
1223 |
# } else {
|
|
1224 |
# stop("ERROR, you need to overwrite your own function to convert convert strain coords. To binf the new function, have a
|
|
1225 |
# look in the translate_roi example.")
|
|
1226 |
# }
|
|
1227 |
# ### The translated genome region of interest.
|
|
1228 |
# }, ex=function(){
|
|
1229 |
# # Define new translate_roi function...
|
|
1230 |
# translate_roi = function(roi, strain2) {
|
|
1231 |
# strain1 = roi$strain_ref
|
|
1232 |
# if (strain1 == strain2) {
|
|
1233 |
# return(roi)
|
|
1234 |
# } else {
|
|
1235 |
# stop("Here is my new translate_roi function...")
|
|
1236 |
# }
|
|
1237 |
# }
|
|
1238 |
# # Binding it by uncomment follwing lines.
|
|
1239 |
# # unlockBinding("translate_roi", as.environment("package:nm"))
|
|
1240 |
# # unlockBinding("translate_roi", getNamespace("nm"))
|
|
1241 |
# # assign("translate_roi", translate_roi, "package:nm")
|
|
1242 |
# # assign("translate_roi", translate_roi, getNamespace("nm"))
|
|
1243 |
# # lockBinding("translate_roi", getNamespace("nm"))
|
|
1244 |
# # lockBinding("translate_roi", as.environment("package:nm"))
|
|
1245 |
# })
|
|
1246 |
#
|
|
1247 |
#
|
|
1248 |
|
|
1249 |
|
|
1250 |
translate_roi = structure(function(# Translate coords of a genome region.
|
|
1251 |
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
|
|
1252 |
roi, ##<< Original genome region of interest.
|
|
1253 |
strain2, ##<< The strain in wich you want the genome region of interest.
|
|
1254 |
config=NULL, ##<< GLOBAL config variable
|
|
1255 |
big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
|
|
1256 |
) {
|
|
1257 |
strain1 = roi$strain_ref
|
|
1258 |
reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
|
|
1259 |
if (strain1 == strain2) {
|
|
1260 |
roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
|
|
1261 |
return(roi)
|
|
1262 |
}
|
|
1263 |
# Launch c2c file
|
|
1264 |
if (reverse) {
|
|
1265 |
c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
|
|
1266 |
} else {
|
|
1267 |
c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
|
|
1268 |
}
|
|
1269 |
c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)
|
|
1270 |
# filtering it
|
|
1271 |
c2c = c2c[c2c$V6=="-",]
|
|
1272 |
# Reverse
|
|
1273 |
if (reverse) {
|
|
1274 |
tmp_col = c2c$V1
|
|
1275 |
c2c$V1 = c2c$V7
|
|
1276 |
c2c$V7 = tmp_col
|
|
1277 |
tmp_col = c2c$V2
|
|
1278 |
c2c$V2 = c2c$V9
|
|
1279 |
c2c$V9 = tmp_col
|
|
1280 |
tmp_col = c2c$V3
|
|
1281 |
c2c$V3 = c2c$V10
|
|
1282 |
c2c$V10 = tmp_col
|
|
1283 |
}
|
|
1284 |
# Restrict c2c to big_roi
|
|
1285 |
# if (FALSE) {
|
|
1286 |
if (!is.null(big_roi)) {
|
|
1287 |
if (roi$strain_ref == big_roi$strain_ref) {
|
|
1288 |
if (strain1 == "BY") {
|
|
1289 |
big_chro_1 = paste("chr", ARAB2ROM()[[big_roi$chr]], sep="")
|
|
1290 |
} else if (strain1 == "RM") {
|
|
1291 |
big_chro_1 = paste("supercontig_1.",big_roi$chr,sep="")
|
|
1292 |
} else if (strain1 == "YJM") {
|
|
1293 |
big_chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[big_roi$chr]]
|
|
1294 |
}
|
|
1295 |
big_begin_1 = big_roi$begin
|
|
1296 |
big_end_1 = big_roi$end
|
|
1297 |
c2c = c2c[c2c$V1==big_chro_1,]
|
|
1298 |
if (length(c2c[c2c$V3<big_begin_1, 1] > 0)) {c2c[c2c$V3 < big_begin_1,c("V2", "V3") ] = big_begin_1}
|
|
1299 |
if (length(c2c[c2c$V2>big_end_1, 1] > 0)) {c2c[c2c$V2 > big_end_1, c("V2", "V3")] = big_end_1}
|
|
1300 |
c2c = c2c[c2c$V2 - c2c$V3 != 0,]
|
|
1301 |
} else {
|
|
1302 |
stop("ERROR, big_roi and roi not in the same strain_ref")
|
|
1303 |
}
|
|
1304 |
}
|
|
1305 |
# Convert initial roi$chr into c2c format
|
|
1306 |
if (strain1 == "BY") {
|
|
1307 |
chro_1 = paste("chr", ARAB2ROM()[[roi$chr]], sep="")
|
|
1308 |
} else if (strain1 == "RM") {
|
|
1309 |
chro_1 = paste("supercontig_1.",roi$chr,sep="")
|
|
1310 |
} else if (strain1 == "YJM") {
|
|
1311 |
chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[roi$chr]]
|
|
1312 |
}
|
|
1313 |
begin_1 = roi$begin
|
|
1314 |
end_1 = roi$end
|
|
1315 |
# Computing equivalent strain_2 alignment coordinates
|
|
1316 |
if (reverse) {
|
|
1317 |
tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
|
|
1318 |
tmptransfostop = c2c[c2c$V1==chro_1 & ((c2c$V3>=end_1 & c2c$V2<=end_1 & c2c$V8==1) | (c2c$V2>=end_1 & c2c$V3<=end_1 & c2c$V8==-1)),]
|
|
1319 |
} else {
|
|
1320 |
tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
|
|
1321 |
tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
|
|
1322 |
}
|
|
1323 |
# Never happend conditions ...
|
|
1324 |
{
|
|
1325 |
if (length(tmptransfostart$V8) == 0) {
|
|
1326 |
# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
|
|
1327 |
tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2>=begin_1,]
|
|
1328 |
begin_1 = sort(tmp_c2c$V2)[1]
|
|
1329 |
if (reverse) {
|
|
1330 |
tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
|
|
1331 |
} else {
|
|
1332 |
tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
|
|
1333 |
}
|
|
1334 |
if (length(tmptransfostart$V8) == 0) {
|
|
1335 |
if (!is.null(big_roi)) {
|
|
1336 |
return(NULL)
|
|
1337 |
tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$begin & c2c$V2<=big_roi$begin,]
|
|
1338 |
} else {
|
|
1339 |
# return(NULL)
|
|
1340 |
# print(c2c[c2c$V1==chro_1 & c2c$V2<=end_1 & c2c$V3>=begin_1,])
|
|
1341 |
# print(c2c[c2c$V1==chro_1,])
|
|
1342 |
print(tmptransfostart)
|
|
1343 |
print(tmptransfostop)
|
|
1344 |
stop("Never happend condition 1.")
|
|
1345 |
}
|
|
1346 |
}
|
|
1347 |
}
|
|
1348 |
if (length(tmptransfostop$V8) == 0) {
|
|
1349 |
# end_1 is between to lines: shift end_1 to the end of 2nd line.
|
|
1350 |
tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3<=end_1,]
|
|
1351 |
end_1 = sort(tmp_c2c$V3)[length(tmp_c2c$V2)]
|
|
1352 |
if (reverse) {
|
|
1353 |
tmptransfostop = c2c[c2c$V1==chro_1 & ((c2c$V3>=end_1 & c2c$V2<=end_1 & c2c$V8==1) | (c2c$V2>=end_1 & c2c$V3<=end_1 & c2c$V8==-1)),]
|
|
1354 |
} else {
|
|
1355 |
tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
|
|
1356 |
}
|
|
1357 |
if (length(tmptransfostop$V8) == 0) {
|
|
1358 |
if (!is.null(big_roi)) {
|
|
1359 |
return(NULL)
|
|
1360 |
tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$end & c2c$V2<=big_roi$end,]
|
|
1361 |
} else {
|
|
1362 |
# return(NULL)
|
|
1363 |
print(c2c[c2c$V1==chro_1,])
|
|
1364 |
print(tmptransfostart)
|
|
1365 |
print(tmptransfostop)
|
|
1366 |
stop("Never happend condition 2.")
|
|
1367 |
}
|
|
1368 |
}
|
|
1369 |
}
|
|
1370 |
if (length(tmptransfostart$V8) != 1) {
|
|
1371 |
# tmptransfostart = tmptransfostart[1,]
|
|
1372 |
# print("many start")
|
|
1373 |
# print(c2c[c2c$V1==chro_1,])
|
|
1374 |
tmptransfostart = tmptransfostart[tmptransfostart$V1==chro_1 & tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
|
|
1375 |
if (length(tmptransfostart$V8) != 1) {
|
|
1376 |
# return(NULL)
|
|
1377 |
print(tmptransfostart)
|
|
1378 |
print(tmptransfostop)
|
|
1379 |
stop("Never happend condition 3.")
|
|
1380 |
}
|
|
1381 |
}
|
|
1382 |
if (length(tmptransfostop$V8) != 1) {
|
|
1383 |
# tmptransfostop = tmptransfostop[length(tmptransfostop$V8),]
|
|
1384 |
# print("many stop")
|
|
1385 |
# print(tmptransfostop)
|
|
1386 |
# print(roi)
|
|
1387 |
tmptransfostop = tmptransfostop[tmptransfostop$V1==chro_1 & tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
|
|
1388 |
if (length(tmptransfostop$V8) != 1) {
|
|
1389 |
# return(NULL)
|
|
1390 |
print(tmptransfostart)
|
|
1391 |
print(tmptransfostop)
|
|
1392 |
stop("Never happend condition 4.")
|
|
1393 |
}
|
|
1394 |
}
|
|
1395 |
if (tmptransfostart$V7 != tmptransfostop$V7) {
|
|
1396 |
print(tmptransfostart)
|
|
1397 |
print(tmptransfostop)
|
|
1398 |
stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
|
|
1399 |
}
|
|
1400 |
}
|
|
1401 |
# Deal with strand
|
|
1402 |
if (tmptransfostart$V8 == 1) {
|
|
1403 |
begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
|
|
1404 |
end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
|
|
1405 |
} else {
|
|
1406 |
begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
|
|
1407 |
end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
|
|
1408 |
}
|
|
1409 |
# Build returned roi
|
|
1410 |
roi$strain_ref = strain2
|
|
1411 |
if (roi$strain_ref == "BY") {
|
|
1412 |
roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
|
|
1413 |
} else {
|
|
1414 |
roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
|
|
1415 |
}
|
|
1416 |
roi$begin = begin_2
|
|
1417 |
roi$end = end_2
|
|
1418 |
if (sign(roi$end - roi$begin) == 0) {
|
|
1419 |
roi$length = 1
|
|
1420 |
} else {
|
|
1421 |
roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
|
|
1422 |
}
|
|
1423 |
return(roi)
|
|
1424 |
}, ex=function(){
|
|
1425 |
# Define new translate_roi function...
|
|
1426 |
translate_roi = function(roi, strain2, config) {
|
|
1427 |
strain1 = roi$strain_ref
|
|
1428 |
if (strain1 == strain2) {
|
|
1429 |
return(roi)
|
|
1430 |
} else {
|
|
1431 |
stop("Here is my new translate_roi function...")
|
|
1432 |
}
|
|
1433 |
}
|
|
1434 |
# Binding it by uncomment follwing lines.
|
|
1435 |
# unlockBinding("translate_roi", as.environment("package:nm"))
|
|
1436 |
# unlockBinding("translate_roi", getNamespace("nm"))
|
|
1437 |
# assign("translate_roi", translate_roi, "package:nm")
|
|
1438 |
# assign("translate_roi", translate_roi, getNamespace("nm"))
|
|
1439 |
# lockBinding("translate_roi", getNamespace("nm"))
|
|
1440 |
# lockBinding("translate_roi", as.environment("package:nm"))
|
|
1441 |
})
|
|
1442 |
|
|
1443 |
|
|
1444 |
|
|
1445 |
|
|
1446 |
|
|
1447 |
|
|
1448 |
|
|
1449 |
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
|
|
1450 |
### CURs are regions that can be aligned between the genomes
|
|
1451 |
diff_allowed = 10, ##<< the maximum indel width allowe din a CUR
|
|
1452 |
min_cur_width = 200, ##<< The minimum width of a CUR
|
|
1453 |
config=NULL, ##<< GLOBAL config variable
|
|
1454 |
plot = FALSE ##<< Plot CURs or not
|
|
1455 |
) {
|
|
1456 |
get_inter_strain_rois = function(strain1, strain2, diff_allowed = 10, min_cur_width = 200, plot=FALSE) {
|
|
1457 |
c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
|
|
1458 |
c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)
|
|
1459 |
# Filtering unagapped
|
|
1460 |
c2c = c2c[c2c$V6=="-",]
|
|
1461 |
# filtering some things (chr...)
|
|
1462 |
# c2c = c2c[c2c$V1 == "chrIV",]
|
|
1463 |
diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)]
|
|
1464 |
diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)]
|
|
1465 |
# Plot diffs to define a threshold (diff_allowed)
|
|
1466 |
# hist(abs(c(diff2, diff)),breaks=c(0:2000, 200000000000), xlim=c(0,100))
|
|
1467 |
# Filtering
|
|
1468 |
indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
|
|
1469 |
indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
|
|
1470 |
|
|
1471 |
rois = NULL
|
|
1472 |
for(i in 1:length(indexes_start)) {
|
|
1473 |
start = indexes_start[i]
|
|
1474 |
stop = indexes_stop[i]
|
|
1475 |
sub_c2c = c2c[start:stop,]
|
|
1476 |
if (strain1 == "BY") {
|
|
1477 |
chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
|
|
1478 |
} else {
|
|
1479 |
chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
|
|
1480 |
}
|
|
1481 |
roi = list(chr=chr, begin=sub_c2c[1,]$V2, end=sub_c2c[length(sub_c2c$V1),]$V3, strain_ref=strain1)
|
|
1482 |
roi[["length"]] = roi$end - roi$begin
|
|
1483 |
if (roi$length >= min_cur_width) {
|
|
1484 |
rois = dfadd(rois,roi)
|
|
1485 |
}
|
|
1486 |
if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
|
|
1487 |
print("*************** ERROR, non homogenous region! ********************")
|
|
1488 |
}
|
|
1489 |
# print(i)
|
|
1490 |
# print(roi)
|
|
1491 |
# print(sub_c2c)
|
|
1492 |
# print("________________________________________________________________")
|
|
1493 |
}
|
|
1494 |
if (plot) {
|
|
1495 |
print(paste(length(indexes_stop), "area of interest."))
|
|
1496 |
# Plot rois
|
|
1497 |
fasta_ref = list(filename=config$FASTA_REFERENCE_GENOME_FILES[[strain1]])
|
|
1498 |
genome = get_content(fasta_ref$filename, "fasta")
|
|
1499 |
plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
|
|
1500 |
for (name in names(genome)) {
|
|
1501 |
if (strain1 == "BY") {
|
|
1502 |
chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[[strain1]][[name]]]], sep="")
|
|
1503 |
} else {
|
|
1504 |
chr_ref = name
|
|
1505 |
}
|
|
1506 |
y_lev = as.integer(config$FASTA_INDEXES[[strain1]][[name]])
|
|
1507 |
lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
|
|
1508 |
text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)
|
|
1509 |
}
|
|
1510 |
col=1
|
|
1511 |
for (roi_index in 1:length(rois$chr)) {
|
|
1512 |
roi = rois[roi_index,]
|
|
1513 |
y_lev = as.integer(roi$chr) + 0.3
|
|
1514 |
lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
|
|
1515 |
text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
|
|
1516 |
col = col + 1
|
|
1517 |
}
|
|
1518 |
}
|
|
1519 |
return(rois)
|
|
1520 |
}
|
|
1521 |
rois = NULL
|
|
1522 |
rois_BY_RM = get_inter_strain_rois("BY", "RM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
|
|
1523 |
rois_BY_YJM = get_inter_strain_rois("BY", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
|
|
1524 |
for (roi_1_index in 1:length(rois_BY_RM[,1])) {
|
|
1525 |
roi_1 = rois_BY_RM[roi_1_index,]
|
|
1526 |
roi_2_candidates = rois_BY_YJM[rois_BY_YJM$chr== roi_1$chr & rois_BY_YJM$begin <= roi_1$end & rois_BY_YJM$end >= roi_1$begin , ] ;
|
|
1527 |
# print(length(roi_2_candidates[,1]))
|
|
1528 |
if (length(roi_2_candidates[,1]) > 0) {
|
|
1529 |
for(roi_2_index in 1:length(roi_2_candidates[,1])) {
|
|
1530 |
roi_2 = roi_2_candidates[roi_2_index,]
|
|
1531 |
roi = list(chr=roi_1$chr, begin=max(roi_1$begin, roi_2$begin), end=min(roi_1$end, roi_2$end), strain_ref="BY")
|
|
1532 |
roi[["length"]] = roi$end - roi$begin + 1
|
|
1533 |
if (roi$length >= min_cur_width) {
|
|
1534 |
# if (length(rois[,1]) == 153) {
|
|
1535 |
# print(paste(length(rois[,1]), roi_1_index, roi_2_index ))
|
|
1536 |
# print(roi_1)
|
|
1537 |
# print(roi_2)
|
|
1538 |
# print(roi)
|
|
1539 |
# }
|
|
1540 |
rois = dfadd(rois,roi)
|
|
1541 |
}
|
|
1542 |
}
|
|
1543 |
}
|
|
1544 |
}
|
|
1545 |
print(length(rois[,1]))
|
|
1546 |
print(sum(rois$length))
|
|
1547 |
rois_1st_round = rois
|
|
1548 |
rois_2nd_round = NULL
|
|
1549 |
rois_RM_YJM = get_inter_strain_rois("RM", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
|
|
1550 |
for (roi_1_index in 1:length(rois_1st_round[,1])) {
|
|
1551 |
roi_1 = rois_1st_round[roi_1_index,]
|
|
1552 |
translated_roi_1 = translate_roi(roi_1, "RM", config)
|
|
1553 |
t_b = min(translated_roi_1$begin, translated_roi_1$end)
|
|
1554 |
t_e = max(translated_roi_1$begin, translated_roi_1$end)
|
|
1555 |
roi_2_candidates = rois_RM_YJM[rois_RM_YJM$chr== translated_roi_1$chr & rois_RM_YJM$begin <= t_e & rois_RM_YJM$end >= t_b , ] ;
|
|
1556 |
if (length(roi_2_candidates[,1]) > 0) {
|
|
1557 |
for(roi_2_index in 1:length(roi_2_candidates[,1])) {
|
|
1558 |
roi_2 = roi_2_candidates[roi_2_index,]
|
|
1559 |
roi = list(chr=translated_roi_1$chr, begin=max(t_b, roi_2$begin), end=min(t_e, roi_2$end), strain_ref="RM")
|
|
1560 |
roi[["length"]] = roi$end - roi$begin + 1
|
|
1561 |
if (roi$length >= min_cur_width) {
|
|
1562 |
rois_2nd_round = dfadd(rois_2nd_round,roi)
|
|
1563 |
}
|
|
1564 |
}
|
|
1565 |
}
|
|
1566 |
}
|
|
1567 |
print(length(rois_2nd_round[,1]))
|
|
1568 |
print(sum(rois_2nd_round$length))
|
|
1569 |
rois = rois_2nd_round
|
|
1570 |
|
|
1571 |
rois_translator_round = list()
|
|
1572 |
for (roi_index in 1:length(rois[,1])) {
|
|
1573 |
roi = rois[roi_index,]
|
|
1574 |
BY_roi = translate_roi(roi, "BY", config)
|
|
1575 |
tmp_BY_roi = BY_roi
|
|
1576 |
BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
|
|
1577 |
BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
|
|
1578 |
BY_roi$length = abs(BY_roi$length)
|
|
1579 |
rois_translator_round = dfadd(rois_translator_round, BY_roi)
|
|
1580 |
}
|
|
1581 |
rois = rois_translator_round
|
|
1582 |
|
|
1583 |
rois_3rd_round = NULL
|
|
1584 |
for (roi_index in 1:length(rois[,1])) {
|
|
1585 |
# for (roi_index in 1:2) {
|
|
1586 |
current_roi = rois[roi_index,]
|
|
1587 |
# print(roi_index)
|
|
1588 |
|
|
1589 |
to_be_check_rois = dfadd(NULL, current_roi)
|
|
1590 |
NEED_RERUN = TRUE
|
|
1591 |
while (NEED_RERUN) {
|
|
1592 |
# print("RERUN"),
|
|
1593 |
NEED_RERUN = FALSE
|
|
1594 |
to_be_check_again = NULL
|
|
1595 |
for (to_be_check_roi_index in 1:length(to_be_check_rois[,1])) {
|
|
1596 |
# print(to_be_check_rois)
|
|
1597 |
to_be_check_roi = to_be_check_rois[to_be_check_roi_index,]
|
|
1598 |
combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"), c("RM", "BY"), c("YJM", "BY"), c("YJM", "RM"))
|
|
1599 |
for (combi in combis) {
|
|
1600 |
# print(combi)
|
|
1601 |
strain1 = combi[1]
|
|
1602 |
strain2 = combi[2]
|
|
1603 |
trans_roi = translate_roi(to_be_check_roi, strain1, config)
|
|
1604 |
lower_bound=min(trans_roi$begin, trans_roi$end)
|
|
1605 |
upper_bound=max(trans_roi$begin, trans_roi$end)
|
|
1606 |
|
|
1607 |
check_overlaping = structure(function(strain1 = "BY", strain2 = "RM", chr = NULL, lower_bound=NULL, upper_bound=NULL) {
|
|
1608 |
reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
|
|
1609 |
if (strain1 == strain2) {
|
|
1610 |
roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1
|
|
1611 |
return(roi)
|
|
1612 |
}
|
|
1613 |
# Launch c2c file
|
|
1614 |
if (reverse) {
|
|
1615 |
c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
|
|
1616 |
} else {
|
|
1617 |
c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
|
|
1618 |
}
|
|
1619 |
c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)
|
|
1620 |
# filtering it
|
|
1621 |
c2c = c2c[c2c$V6=="-",]
|
|
1622 |
# Reverse
|
|
1623 |
if (reverse) {
|
|
1624 |
tmp_col = c2c$V1
|
|
1625 |
c2c$V1 = c2c$V7
|
|
1626 |
c2c$V7 = tmp_col
|
|
1627 |
tmp_col = c2c$V2
|
|
1628 |
c2c$V2 = c2c$V9
|
|
1629 |
c2c$V9 = tmp_col
|
|
1630 |
tmp_col = c2c$V3
|
|
1631 |
c2c$V3 = c2c$V10
|
|
1632 |
c2c$V10 = tmp_col
|
|
1633 |
}
|
|
1634 |
|
|
1635 |
if (strain1 == "BY") {
|
|
1636 |
chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
|
|
1637 |
} else if (strain1 == "RM") {
|
|
1638 |
chro_1 = paste("supercontig_1.",chr,sep="")
|
|
1639 |
} else if (strain1 == "YJM") {
|
|
1640 |
chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
|
|
1641 |
}
|
|
1642 |
# print(chro_1)
|
|
1643 |
if (!is.null(lower_bound) & !is.null(upper_bound)) {
|
|
1644 |
if (reverse) {
|
|
1645 |
tmp_c2c = c2c[c2c$V1==chro_1 & ((c2c$V3>=lower_bound & c2c$V2<=upper_bound & c2c$V8==1) | (c2c$V2>=lower_bound & c2c$V3<=upper_bound & c2c$V8==-1)),]
|
|
1646 |
} else {
|
|
1647 |
tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=lower_bound & c2c$V2<=upper_bound,]
|
|
1648 |
}
|
|
1649 |
} else {
|
|
1650 |
tmp_c2c = c2c[c2c$V1 == chr,]
|
|
1651 |
}
|
|
1652 |
if (length(tmp_c2c[,1]) > 1) {
|
|
1653 |
pbs = apply(t(1:(length(tmp_c2c[,1]) - 1)), 2, function(i){
|
|
1654 |
# print(paste(i, "/", length(tmp_c2c[,1])))
|
|
1655 |
apply(t((i+1):length(tmp_c2c[,1])), 2, function(j){
|
|
1656 |
l1 = tmp_c2c[i,]
|
|
1657 |
b1 = min(l1$V2, l1$V3)
|
|
1658 |
e1 = max(l1$V2, l1$V3)
|
|
1659 |
l2 = tmp_c2c[j,]
|
|
1660 |
b2 = min(l2$V2, l2$V3)
|
|
1661 |
e2 = max(l2$V2, l2$V3)
|
|
1662 |
if ((e1>=b2 & b1<=e2) | (e2>=b1 & b2<=e1)) {
|
|
1663 |
print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ",chr, " [", b1, ",", e1, "] [", b2, ",", e2, "]", sep=""))
|
|
1664 |
pb = list(strain1, strain2, chr, b1, e1, b2, e2)
|
|
1665 |
pb
|
|
1666 |
} else {
|
|
1667 |
NULL
|
|
1668 |
}
|
|
1669 |
})
|
|
1670 |
})
|
|
1671 |
return(pbs)
|
|
1672 |
}
|
|
1673 |
|
|
1674 |
}, ex=function(){
|
|
1675 |
source("src/nucleo_miner/yeast_strain_conversion.R");
|
|
1676 |
pbs1 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=TRUE)
|
|
1677 |
pbs3 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=TRUE)
|
|
1678 |
pbs5 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=TRUE)
|
|
1679 |
pbs2 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=FALSE)
|
|
1680 |
pbs4 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=FALSE)
|
|
1681 |
pbs6 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=FALSE)
|
|
1682 |
})
|
|
1683 |
|
|
1684 |
|
|
1685 |
res = check_overlaping(strain1 = strain1, strain2 = strain2, chr = trans_roi$chr, lower_bound=lower_bound, upper_bound=upper_bound)
|
|
1686 |
if (!is.null(res)) {
|
|
1687 |
df_res = data.frame(matrix(unlist(res), ncol = 7, byrow=TRUE), stringsAsFactors=FALSE)
|
|
1688 |
interval = df_res[1,]
|
|
1689 |
inter_min = as.numeric(max( min(interval$X4, interval$X5), min(interval$X6, interval$X7)))
|
|
1690 |
inter_max = as.numeric(min( max(interval$X4, interval$X5), max(interval$X6, interval$X7)))
|
|
1691 |
# print(paste("SPLIT ROI", roi_index, "for", combi[1], combi[2]))
|
|
1692 |
new_roi1 = trans_roi
|
|
1693 |
new_roi2 = trans_roi
|
|
1694 |
new_roi1$begin = lower_bound
|
|
1695 |
new_roi1$end = inter_min - 1
|
|
1696 |
new_roi1$length = new_roi1$end - new_roi1$begin + 1
|
|
1697 |
new_roi2$begin = inter_max + 1
|
|
1698 |
new_roi2$end = upper_bound
|
|
1699 |
new_roi2$length = new_roi2$end - new_roi2$begin + 1
|
|
1700 |
if (new_roi1$length > min_cur_width) {
|
|
1701 |
BY_roi = translate_roi(new_roi1, "BY", config)
|
|
1702 |
tmp_BY_roi = BY_roi
|
|
1703 |
BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
|
|
1704 |
BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
|
|
1705 |
BY_roi$length = abs(BY_roi$length)
|
|
1706 |
to_be_check_again = dfadd(to_be_check_again, BY_roi)
|
|
1707 |
}
|
|
1708 |
if (new_roi2$length > min_cur_width) {
|
|
1709 |
BY_roi = translate_roi(new_roi2, "BY", config)
|
|
1710 |
tmp_BY_roi = BY_roi
|
|
1711 |
BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
|
|
1712 |
BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
|
|
1713 |
BY_roi$length = abs(BY_roi$length)
|
|
1714 |
to_be_check_again = dfadd(to_be_check_again, BY_roi)
|
|
1715 |
}
|
|
1716 |
if (to_be_check_roi_index < length(to_be_check_rois[,1])) {
|
|
1717 |
for (i in (to_be_check_roi_index + 1):length(to_be_check_rois[,1])) {
|
|
1718 |
to_be_check_again = dfadd(to_be_check_again, to_be_check_rois[i,])
|
|
1719 |
}
|
|
1720 |
}
|
|
1721 |
NEED_RERUN = TRUE
|
|
1722 |
break
|
|
1723 |
}
|
|
1724 |
}
|
|
1725 |
if (NEED_RERUN) {
|
|
1726 |
to_be_check_rois = to_be_check_again
|
|
1727 |
break
|
|
1728 |
}
|
|
1729 |
}
|
|
1730 |
}
|
|
1731 |
|
|
1732 |
checked_rois = to_be_check_rois
|
|
1733 |
for (checked_roi_index in 1:length(checked_rois[,1])) {
|
|
1734 |
rois_3rd_round = dfadd(rois_3rd_round, checked_rois[checked_roi_index,])
|
|
1735 |
}
|
|
1736 |
}
|
|
1737 |
|
|
1738 |
print(length(rois_3rd_round[,1]))
|
|
1739 |
print(sum(rois_3rd_round$length))
|
|
1740 |
rois = rois_3rd_round
|
|
1741 |
|
|
1742 |
|
|
1743 |
if (plot) {
|
|
1744 |
print(paste(length(rois$chr), "area of interest."))
|
|
1745 |
# Plot rois
|
|
1746 |
fasta_ref = list(filename=config$FASTA_REFERENCE_GENOME_FILES[["BY"]])
|
|
1747 |
genome = get_content(fasta_ref$filename, "fasta")
|
|
1748 |
plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
|
|
1749 |
for (name in names(genome)) {
|
|
1750 |
if (TRUE) {
|
|
1751 |
chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[["BY"]][[name]]]], sep="")
|
|
1752 |
} else {
|
|
1753 |
chr_ref = name
|
|
1754 |
}
|
|
1755 |
y_lev = as.integer(config$FASTA_INDEXES[["BY"]][[name]])
|
|
1756 |
lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
|
|
1757 |
text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)
|
|
1758 |
}
|
|
1759 |
col=1
|
|
1760 |
for (roi_index in 1:length(rois$chr)) {
|
|
1761 |
roi = rois[roi_index,]
|
|
1762 |
y_lev = as.integer(roi$chr) + 0.3
|
|
1763 |
lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
|
|
1764 |
text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
|
|
1765 |
col = col + 1
|
|
1766 |
}
|
|
1767 |
}
|
|
1768 |
return (rois)
|
|
1769 |
}
|
|
1770 |
|
|
1771 |
|
|
1772 |
|
|
1773 |
|
|
1774 |
|
|
1775 |
|
|
1776 |
|
|
1777 |
|
|
1778 |
|
|
1779 |
|
|
1780 |
|
|
1781 |
|
|
1782 |
|
|
1783 |
|
|
1784 |
|
|
1785 |
|
|
1786 |
|
|
1787 |
|
|
1788 |
|
|
1789 |
|
|
1790 |
|
|
1791 |
|
|
1792 |
|
|
1793 |
|
|
1794 |
|
|
1795 |
|
|
1796 |
|
|
1797 |
|
|
1798 |
|
|
1799 |
|
|
1800 |
|
|
1801 |
|
|
1802 |
|
|
1803 |
|
|
1804 |
|
|
1805 |
|
|
1806 |
|
|
1807 |
|
1191 |
1808 |
|
1192 |
1809 |
|
1193 |
1810 |
|
... | ... | |
1345 |
1962 |
plot_wp_nucs_4_nonmnase = FALSE, ##<< Plot (or not) clusters for non inputs samples.
|
1346 |
1963 |
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed.
|
1347 |
1964 |
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
|
1348 |
|
height = 10 ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
|
|
1965 |
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
|
|
1966 |
config=NULL ##<< GLOBAL config variable
|
1349 |
1967 |
){
|
1350 |
1968 |
returned_list = list()
|
1351 |
1969 |
# Computing global display parameters
|
... | ... | |
1513 |
2131 |
|
1514 |
2132 |
if (plot_common_nucs | plot_anovas | plot_anova_boxes) {
|
1515 |
2133 |
if (is.null(aligned_inter_strain_nucs)) {
|
1516 |
|
aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]])[[1]]
|
|
2134 |
aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
|
1517 |
2135 |
}
|
1518 |
2136 |
if (plot_common_nucs) {
|
1519 |
2137 |
#Plot common wp nucs
|