Révision 1d833b97

b/doc/sphinx_doc/conf.py
50 50
# built documents.
51 51
#
52 52
# The short X.Y version.
53
version = '2.3'
53
version = '2.3.1'
54 54
# The full version, including alpha/beta/rc tags.
55
release = '2.3'
55
release = '2.3.1'
56 56

  
57 57
# The language for content autogenerated by Sphinx. Refer to documentation
58 58
# for a list of supported languages.
b/doc/sphinx_doc/rref.rst
1
Arabic to Roman pair list.
2
--------------------------
3

  
4
Description
5
~~~~~~~~~~~
6

  
7
Util to convert Arabicto Roman
8

  
9
Usage
10
~~~~~
11

  
12
::
13

  
14
    ARAB2ROM()
15

  
16
Author(s)
17
~~~~~~~~~
18

  
19
Florent Chuffart
20

  
21
R: False Discovery Rate
22

  
1 23
False Discovery Rate
2 24
--------------------
3 25

  
......
42 64

  
43 65
    print("example")
44 66

  
67
R: Roman to Arabic pair list.
68

  
69
Roman to Arabic pair list.
70
--------------------------
71

  
72
Description
73
~~~~~~~~~~~
74

  
75
Util to convert Roman to Arabic
76

  
77
Usage
78
~~~~~
79

  
80
::
81

  
82
    ROM2ARAB()
83

  
84
Author(s)
85
~~~~~~~~~
86

  
87
Florent Chuffart
88

  
45 89
R: Aggregate replicated sample's nucleosomes.
46 90

  
47 91
Aggregate replicated sample's nucleosomes.
......
141 185

  
142 186
    align_inter_strain_nucs(replicates, wp_nucs_strain_ref1 = NULL, 
143 187
        wp_nucs_strain_ref2 = NULL, corr_thres = 0.5, lod_thres = -100, 
144
        ...)
188
        config = NULL, ...)
145 189

  
146 190
Arguments
147 191
~~~~~~~~~
......
168 212

  
169 213
LOD cut off.
170 214

  
215
``config``
216

  
217
GLOBAL config variable
218

  
171 219
``...``
172 220

  
173 221
A list of parameters that will be passed to
......
188 236

  
189 237
::
190 238

  
239

  
240
        # Define new translate_roi function...
241
        translate_roi = function(roi, strain2, big_roi=NULL, config=NULL) {
242
          return(roi)
243
        }
244
        # Binding it by uncomment follwing lines.
245
        unlockBinding("translate_roi", as.environment("package:nucleominer"))
246
        unlockBinding("translate_roi", getNamespace("nucleominer"))
247
        assign("translate_roi", translate_roi, "package:nucleominer")
248
        assign("translate_roi", translate_roi, getNamespace("nucleominer"))
249
        lockBinding("translate_roi", getNamespace("nucleominer"))
250
        lockBinding("translate_roi", as.environment("package:nucleominer"))  
251

  
191 252
    # Dealing with a region of interest
192 253
    roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
193 254
    roi2 = translate_roi(roi, roi$strain_ref1)
......
252 313

  
253 314
Florent Chuffart
254 315

  
316
R: Compute Common Uninterrupted Regions (CUR)
317

  
318
Compute Common Uninterrupted Regions (CUR)
319
------------------------------------------
320

  
321
Description
322
~~~~~~~~~~~
323

  
324
CURs are regions that can be aligned between the genomes
325

  
326
Usage
327
~~~~~
328

  
329
::
330

  
331
    compute_inter_all_strain_curs(diff_allowed = 10, min_cur_width = 200, 
332
        config = NULL, plot = FALSE)
333

  
334
Arguments
335
~~~~~~~~~
336

  
337
``diff_allowed``
338

  
339
the maximum indel width allowe din a CUR
340

  
341
``min_cur_width``
342

  
343
The minimum width of a CUR
344

  
345
``config``
346

  
347
GLOBAL config variable
348

  
349
``plot``
350

  
351
Plot CURs or not
352

  
353
Author(s)
354
~~~~~~~~~
355

  
356
Florent Chuffart
357

  
255 358
R: Crop bound of regions according to region of interest bound
256 359

  
257 360
Crop bound of regions according to region of interest bound
......
268 371

  
269 372
::
270 373

  
271
    crop_fuzzy(tmp_fuzzy_nucs, roi, strain)
374
    crop_fuzzy(tmp_fuzzy_nucs, roi, strain, config = NULL)
272 375

  
273 376
Arguments
274 377
~~~~~~~~~
......
285 388

  
286 389
The strain to consider.
287 390

  
391
``config``
392

  
393
GLOBAL config variable
394

  
288 395
Author(s)
289 396
~~~~~~~~~
290 397

  
......
403 510

  
404 511
::
405 512

  
406
    fetch_mnase_replicates(strain, roi, all_samples, config, only_fetch = FALSE, 
407
        get_genome = FALSE, get_ouputs = TRUE)
513
    fetch_mnase_replicates(strain, roi, all_samples, config = NULL, 
514
        only_fetch = FALSE, get_genome = FALSE, get_ouputs = TRUE)
408 515

  
409 516
Arguments
410 517
~~~~~~~~~
......
729 836

  
730 837
::
731 838

  
732
    get_fuzzy(combi, roi, roi_index, strain_maps, common_nuc_results)
839
    get_fuzzy(combi, roi, roi_index, strain_maps, common_nuc_results, 
840
        config = NULL)
733 841

  
734 842
Arguments
735 843
~~~~~~~~~
......
754 862

  
755 863
Common wp nuc maps
756 864

  
865
``config``
866

  
867
GLOBAL config variable
868

  
757 869
Author(s)
758 870
~~~~~~~~~
759 871

  
......
889 1001
+---------------+---------------------------------------------------+
890 1002
| Author:       | Florent Chuffart                                  |
891 1003
+---------------+---------------------------------------------------+
892
| Version:      | 2.3                                               |
1004
| Version:      | 2.3.1                                             |
893 1005
+---------------+---------------------------------------------------+
894 1006
| License:      | CeCILL                                            |
895 1007
+---------------+---------------------------------------------------+
......
1110 1222

  
1111 1223
Florent Chuffart
1112 1224

  
1225
R: Switch a pairlist
1226

  
1227
Switch a pairlist
1228
-----------------
1229

  
1230
Description
1231
~~~~~~~~~~~
1232

  
1233
Take a pairlist key:value and return the switched pairlist value:key.
1234

  
1235
Usage
1236
~~~~~
1237

  
1238
::
1239

  
1240
    switch_pairlist(l)
1241

  
1242
Arguments
1243
~~~~~~~~~
1244

  
1245
``l``
1246

  
1247
The pairlist to switch.
1248

  
1249
Value
1250
~~~~~
1251

  
1252
The switched pairlist.
1253

  
1254
Author(s)
1255
~~~~~~~~~
1256

  
1257
Florent Chuffart
1258

  
1259
Examples
1260
~~~~~~~~
1261

  
1262
::
1263

  
1264
    l = list(key1 = "value1", key2 = "value2")
1265
    print(switch_pairlist(l))
1266

  
1113 1267
R: Translate a list of regions from a strain ref to another.
1114 1268

  
1115 1269
Translate a list of regions from a strain ref to another.
......
1125 1279

  
1126 1280
::
1127 1281

  
1128
    translate_regions(regions, combi, roi_index, roi)
1282
    translate_regions(regions, combi, roi_index, config = NULL, roi)
1129 1283

  
1130 1284
Arguments
1131 1285
~~~~~~~~~
......
1142 1296

  
1143 1297
The region of interest index.
1144 1298

  
1299
``config``
1300

  
1301
GLOBAL config variable
1302

  
1145 1303
``roi``
1146 1304

  
1147 1305
The region of interest.
......
1168 1326

  
1169 1327
::
1170 1328

  
1171
    translate_roi(roi, strain2, big_roi = NULL)
1329
    translate_roi(roi, strain2, config = NULL, big_roi = NULL)
1172 1330

  
1173 1331
Arguments
1174 1332
~~~~~~~~~
......
1181 1339

  
1182 1340
The strain in wich you want the genome region of interest.
1183 1341

  
1184
``big_roi``
1342
``config``
1185 1343

  
1186
A largest region than roi use to filter c2c if it is needed.
1344
GLOBAL config variable
1187 1345

  
1188
Value
1189
~~~~~
1346
``big_roi``
1190 1347

  
1191
The translated genome region of interest.
1348
A largest region than roi use to filter c2c if it is needed.
1192 1349

  
1193 1350
Author(s)
1194 1351
~~~~~~~~~
......
1201 1358
::
1202 1359

  
1203 1360
    # Define new translate_roi function...
1204
    translate_roi = function(roi, strain2) {
1361
    translate_roi = function(roi, strain2, config) {
1205 1362
        strain1 = roi$strain_ref
1206 1363
        if (strain1 == strain2) {
1207 1364
            return(roi)
......
1272 1429
        plot_wp_nucs = TRUE, plot_wp_nuc_model = TRUE, plot_common_nucs = TRUE, 
1273 1430
        plot_anovas = FALSE, plot_anova_boxes = FALSE, plot_wp_nucs_4_nonmnase = FALSE, 
1274 1431
        aggregated_intra_strain_nucs = NULL, aligned_inter_strain_nucs = NULL, 
1275
        height = 10)
1432
        height = 10, config = NULL)
1276 1433

  
1277 1434
Arguments
1278 1435
~~~~~~~~~
......
1354 1511
Number of reads in per million read for each sample, graphical parametre
1355 1512
for the y axis.
1356 1513

  
1514
``config``
1515

  
1516
GLOBAL config variable
1517

  
1357 1518
Author(s)
1358 1519
~~~~~~~~~
1359 1520

  
b/src/DESCRIPTION
1 1
Package: nucleominer
2 2
Maintainer: Florent Chuffart <florent.chuffart@ens-lyon.fr>
3 3
Author: Florent Chuffart
4
Version: 2.3
4
Version: 2.3.1
5 5
License: CeCILL 
6 6
Title: nm
7 7
Depends: seqinr, plotrix, DESeq, cachecache
b/src/NAMESPACE
1
export(FDR, lod_score_vecs, dfadd, filter_tf_inputs, filter_tf_outputs, sign_from_strand, flat_reads, get_comp_strand, aggregate_intra_strain_nucs, align_inter_strain_nucs, translate_roi, fetch_mnase_replicates, substract_region, union_regions, remove_aligned_wp, translate_regions, extract_wp, crop_fuzzy, get_fuzzy, get_all_reads, get_design, plot_dist_samples, analyse_design, get_sneps, perform_anovas, watch_samples)
1
export(FDR, lod_score_vecs, dfadd, filter_tf_inputs, filter_tf_outputs, sign_from_strand, flat_reads, get_comp_strand, aggregate_intra_strain_nucs, align_inter_strain_nucs, translate_roi, fetch_mnase_replicates, substract_region, union_regions, remove_aligned_wp, translate_regions, extract_wp, crop_fuzzy, get_fuzzy, get_all_reads, get_design, plot_dist_samples, analyse_design, get_sneps, perform_anovas, watch_samples, compute_inter_all_strain_curs, switch_pairlist)
b/src/R/nucleominer.R
454 454
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
455 455
corr_thres=0.5, ##<< Correlation threshold.
456 456
lod_thres=-100, ##<< LOD cut off.
457
config=NULL, ##<< GLOBAL config variable
457 458
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.	
458 459
) {
459 460
	if (length(replicates) < 2) {
......
482 483
	if (is.null(wp_nucs_strain_ref2)) {
483 484
	  wp_nucs_strain_ref2 = aggregate_intra_strain_nucs(replicates[[2]], ...)[[1]]
484 485
  }
486
  # foo <<- wp_nucs_strain_ref1
487
  # print(apply(t(wp_nucs_strain_ref1), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
488
  # print(apply(t(wp_nucs_strain_ref2), 2, function(l){c(l[[1]]$lower_bound, l[[1]]$upper_bound, l[[1]]$wp)}))
485 489
	# dealing with matching_nas
486 490
	lws = c()
487 491
	ups = c()
......
500 504
			# Filtering on Well Positionned
501 505
			if (nuc_strain_ref1$wp) {			
502 506
				roi_strain_ref1 = list(name=paste("strain_chr_id_" , strain_ref1 , "_" , chr , "_" , "i" , "_", sep=""), begin=nuc_strain_ref1$lower_bound, end=nuc_strain_ref1$upper_bound, chr=chr, strain_ref = strain_ref1)
503
        # print(roi_strain_ref1)
504
				roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi) 
505
				if (!is.null(roi_strain_ref2)){
507
				roi_strain_ref2 = translate_roi(roi_strain_ref1, strain_ref2, big_roi, config) 
508
        if (!is.null(roi_strain_ref2)){
506 509
					# LOADING INTRA_STRAIN_NUCS_FILENAME_STRAIN_REF2 FILE(S) TO COMPUTE MATCHING_NAS (FILTER)
507 510
					lower_bound_roi_strain_ref2 = min(roi_strain_ref2$end,roi_strain_ref2$begin)
508 511
					upper_bound_roi_strain_ref2 = max(roi_strain_ref2$end,roi_strain_ref2$begin)
......
609 612
	}
610 613
### Returns a list of clusterized nucleosomes, and all computed lod scores.
611 614
}, ex=function(){	
615

  
616
    # Define new translate_roi function...
617
    translate_roi = function(roi, strain2, big_roi=NULL, config=NULL) {
618
      return(roi)
619
    }
620
    # Binding it by uncomment follwing lines.
621
    unlockBinding("translate_roi", as.environment("package:nucleominer"))
622
    unlockBinding("translate_roi", getNamespace("nucleominer"))
623
    assign("translate_roi", translate_roi, "package:nucleominer")
624
    assign("translate_roi", translate_roi, getNamespace("nucleominer"))
625
    lockBinding("translate_roi", getNamespace("nucleominer"))
626
    lockBinding("translate_roi", as.environment("package:nucleominer"))  
627

  
612 628
	# Dealing with a region of interest
613 629
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301), strain_ref1 = "STRAINREF1")
614 630
	roi2 = translate_roi(roi, roi$strain_ref1)
......
639 655
	print(align_inter_strain_nucs(replicates))
640 656
})
641 657

  
642
translate_roi = structure(function(# Translate coords of a genome region.
643
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
644
roi, ##<< Original genome region of interest.
645
strain2, ##<< The strain in wich you want the genome region of interest.
646
big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
647
) {
648
### This function translate a genome region of interest from a strain coord system to an other. This is a minimal fucntion that will be called by \emph{align_inter_strain_nucs} and its exemnple, you need to overwrite it by your own fucntion.
649
	strain1 = roi$strain_ref
650
	if (strain1 == strain2 | strain2 == "strain_ex2") {
651
		return(roi)
652
	} else {
653
	  stop("ERROR, you need to overwrite your own function to convert convert strain coords. To binf the new function, have a 
654
		look in the translate_roi example.")		
655
	}
656
### The translated genome region of interest. 
657
}, ex=function(){
658
	# Define new translate_roi function...
659
	translate_roi = function(roi, strain2) {
660
		strain1 = roi$strain_ref
661
		if (strain1 == strain2) {
662
			return(roi)
663
		} else {
664
		  stop("Here is my new translate_roi function...")		
665
		}	
666
	}
667
	# Binding it by uncomment follwing lines.
668
	# unlockBinding("translate_roi", as.environment("package:nm"))
669
	# unlockBinding("translate_roi", getNamespace("nm"))
670
	# assign("translate_roi", translate_roi, "package:nm")
671
	# assign("translate_roi", translate_roi, getNamespace("nm"))
672
	# lockBinding("translate_roi", getNamespace("nm"))
673
	# lockBinding("translate_roi", as.environment("package:nm"))	
674
})
675

  
676

  
677 658

  
678 659

  
679 660

  
......
704 685
strain, ##<< The strain we want mnase replicatesList of replicates. Each replicates is a vector of sample ids.
705 686
roi, ##<< Region of interest.
706 687
all_samples, ##<< Global list of samples.
707
config, ##<< GLOBAL config variable
688
config=NULL, ##<< GLOBAL config variable
708 689
only_fetch=FALSE, ##<< If TRUE, only fetch and not filtering. It is used tio load sample files into memory before forking.
709 690
get_genome=FALSE, ##<< If TRUE, load corresponding genome sequence.
710 691
get_ouputs=TRUE##<< If TRUE, get also ouput corresponding TF output files.
......
713 694
  samples_ids = unique(all_samples[all_samples$marker == "Mnase_Seq" & all_samples$strain == strain,]$id)
714 695
	for (i in samples_ids) {
715 696
		sample = as.list(all_samples[all_samples$id==i,])
716
    sample$roi = translate_roi(roi, sample$strain)
697
    sample$roi = translate_roi(roi, sample$strain, config)
717 698
		if (get_genome) {
718 699
			# Get Genome
719
      spl = function(l) {
720
      	ret = list()
721
      	for (name in names(l)) {
722
      		ret[[as.character(l[[name]])]] = name
723
      	}
724
      	ret
725
      }
726
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[spl(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]		
700
      sample$roi$genome = get_content(config$FASTA_REFERENCE_GENOME_FILES[[sample$strain]], "fasta")[[switch_pairlist(config$FASTA_INDEXES[[sample$strain]])[[sample$roi$chr]]]][sample$roi$begin:sample$roi$end]		
727 701
		}
728 702
		# Get inputs
729 703
		sample$inputs = get_content(paste(config$ALIGN_DIR, "/TF/sample_", i, "_TF.txt", sep=""), "table", stringsAsFactors=FALSE) 
......
856 830
regions, ##<< Regions to be translated. 
857 831
combi, ##<< Combination of strains.
858 832
roi_index, ##<< The region of interest index.
833
config=NULL, ##<< GLOBAL config variable
859 834
roi ##<< The region of interest.
860 835
) {
861 836
  tr_regions = apply(t(1:length(regions[,1])), 2, function(i) {
862 837
    tmp_regions_ref2 = list(name="foo", begin=regions[i,]$lower_bound, end=regions[i,]$upper_bound, chr=as.character(regions[i,]$chr), strain_ref = combi[2])
863
    big_roi =  translate_roi(roi, tmp_regions_ref2$strain_ref)
838
    big_roi =  translate_roi(roi, tmp_regions_ref2$strain_ref, config)
864 839
    tmp_min = min(big_roi$begin, big_roi$end)
865 840
    tmp_max = max(big_roi$begin, big_roi$end)
866 841
    big_roi$begin = tmp_min
867 842
    big_roi$end = tmp_max
868
    trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], big_roi) 
843
    trs_tmp_regions_ref2 = translate_roi(tmp_regions_ref2, combi[1], config, big_roi) 
869 844
    data.frame(list(chr=trs_tmp_regions_ref2$chr, lower_bound=min(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), upper_bound=max(trs_tmp_regions_ref2$begin, trs_tmp_regions_ref2$end), roi_index=roi_index))
870 845
    })
871 846
  tr_regions = do.call("rbind", tr_regions)      
......
902 877
### The fucntion is no more necessary since we remove "big_roi" bug in translate_roi function.
903 878
tmp_fuzzy_nucs, ##<< the regiuons to be croped.
904 879
roi, ##<< The region of interest.
905
strain ##<< The strain to consider.
880
strain, ##<< The strain to consider.
881
config=NULL ##<< GLOBAL config variable
906 882
) {
907
  tr_roi = translate_roi(roi, strain)
883
  tr_roi = translate_roi(roi, strain, config)
908 884
  tr_roi_begin = min(tr_roi$begin, tr_roi$end)
909 885
  tr_roi_end = max(tr_roi$begin, tr_roi$end)
910 886
  if (length(tmp_fuzzy_nucs[tmp_fuzzy_nucs$lower_bound < tr_roi_begin,1]) > 0) {
......
930 906
roi, ##<< The region of interest.
931 907
roi_index, ##<< The region of interest index.
932 908
strain_maps, ##<< Nuc maps.
933
common_nuc_results ##<< Common wp nuc maps
909
common_nuc_results, ##<< Common wp nuc maps
910
config=NULL ##<< GLOBAL config variable
934 911
) {
935 912
  print(roi_index)
936 913
  PLOT = FALSE
......
939 916

  
940 917
  print(paste("Dealing with fuzzy from", combi[1]))
941 918
  tmp_fuzzy_nucs_1 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[1])
942
  tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1])
919
  tmp_fuzzy_nucs_1 = crop_fuzzy(tmp_fuzzy_nucs_1, roi, combi[1], config)
943 920
  if (length(tmp_fuzzy_nucs_1[,1]) == 0) {return(NULL)}
944 921
  agg_fuzzy_1 = union_regions(tmp_fuzzy_nucs_1)    
945 922
  if (PLOT) for (i in 1:length(agg_fuzzy_1[,1])) {
......
950 927
  tmp_fuzzy_nucs_2 = remove_aligned_wp(strain_maps, roi_index, tmp_common_nucs, combi[2])
951 928
  if (length(tmp_fuzzy_nucs_2[,1]) == 0) {return(NULL)}
952 929
  agg_fuzzy_2 = union_regions(tmp_fuzzy_nucs_2)      
953
  agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2])
930
  agg_fuzzy_2 = crop_fuzzy(agg_fuzzy_2, roi, combi[2], config)
954 931
  tr_agg_fuzzy_2 = translate_regions(agg_fuzzy_2, combi, roi_index, roi)
955
  tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2])
932
  tr_agg_fuzzy_2 = crop_fuzzy(tr_agg_fuzzy_2, roi, combi[2], config)
956 933
  # tr_agg_fuzzy_2 = union_regions(tr_agg_fuzzy_2)
957 934
  if (PLOT) for (i in 1:length(tr_agg_fuzzy_2[,1])) {
958 935
    lines(c(tr_agg_fuzzy_2[i,]$lower_bound, tr_agg_fuzzy_2[i,]$upper_bound), c(+3.3,+3.3), col=2)
......
1188 1165

  
1189 1166

  
1190 1167

  
1168
ROM2ARAB = function(# Roman to Arabic pair list.
1169
### Util to convert Roman to Arabic
1170
){list(
1171
  "I" = 1,
1172
  "II" = 2,
1173
  "III" = 3,
1174
  "IV" = 4,
1175
  "V" = 5,
1176
  "VI" = 6,
1177
  "VII" = 7,
1178
  "VIII" = 8,
1179
  "IX" = 9,
1180
  "X" = 10,
1181
  "XI" = 11,
1182
  "XII" = 12,
1183
  "XIII" = 13,
1184
  "XIV" = 14,
1185
  "XV" = 15,
1186
  "XVI" = 16,
1187
  "XVII" = 17,
1188
  "XVIII" = 18,
1189
  "XIX" = 19,
1190
  "XX" = 20
1191
)}
1192

  
1193
switch_pairlist = structure(function(# Switch a pairlist
1194
### Take a pairlist key:value and return the switched pairlist value:key.
1195
l ##<< The pairlist to switch. 
1196
) {
1197
	ret = list()
1198
	for (name in names(l)) {
1199
		ret[[as.character(l[[name]])]] = name
1200
	}
1201
	ret
1202
### The switched pairlist.
1203
}, ex=function(){
1204
	l = list(key1 = "value1", key2 = "value2")
1205
	print(switch_pairlist(l))
1206
})
1207

  
1208
ARAB2ROM = function(# Arabic to Roman pair list.
1209
### Util to convert Arabicto Roman
1210
){switch_pairlist(ROM2ARAB())}
1211

  
1212

  
1213
# translate_roi = structure(function(# Translate coords of a genome region.
1214
# ### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1215
# roi, ##<< Original genome region of interest.
1216
# strain2, ##<< The strain in wich you want the genome region of interest.
1217
# big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1218
# ) {
1219
# ### This function translate a genome region of interest from a strain coord system to an other. This is a minimal fucntion that will be called by \emph{align_inter_strain_nucs} and its exemnple, you need to overwrite it by your own fucntion.
1220
#   strain1 = roi$strain_ref
1221
#   if (strain1 == strain2 | strain2 == "strain_ex2") {
1222
#     return(roi)
1223
#   } else {
1224
#     stop("ERROR, you need to overwrite your own function to convert convert strain coords. To binf the new function, have a 
1225
#     look in the translate_roi example.")    
1226
#   }
1227
# ### The translated genome region of interest. 
1228
# }, ex=function(){
1229
#   # Define new translate_roi function...
1230
#   translate_roi = function(roi, strain2) {
1231
#     strain1 = roi$strain_ref
1232
#     if (strain1 == strain2) {
1233
#       return(roi)
1234
#     } else {
1235
#       stop("Here is my new translate_roi function...")    
1236
#     }  
1237
#   }
1238
#   # Binding it by uncomment follwing lines.
1239
#   # unlockBinding("translate_roi", as.environment("package:nm"))
1240
#   # unlockBinding("translate_roi", getNamespace("nm"))
1241
#   # assign("translate_roi", translate_roi, "package:nm")
1242
#   # assign("translate_roi", translate_roi, getNamespace("nm"))
1243
#   # lockBinding("translate_roi", getNamespace("nm"))
1244
#   # lockBinding("translate_roi", as.environment("package:nm"))  
1245
# })
1246
# 
1247
# 
1248

  
1249

  
1250
translate_roi = structure(function(# Translate coords of a genome region.
1251
### This function is used in the examples, usualy you have to define your own translation function and overwrite this one using \emph{unlockBinding} features. Please, refer to the example.
1252
roi, ##<< Original genome region of interest.
1253
strain2, ##<< The strain in wich you want the genome region of interest.
1254
config=NULL, ##<< GLOBAL config variable
1255
big_roi=NULL ##<< A largest region than roi use to filter c2c if it is needed.
1256
) {
1257
	strain1 = roi$strain_ref
1258
  reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1259
	if (strain1 == strain2) {
1260
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1261
		return(roi)
1262
	}
1263
	# Launch c2c file
1264
	if (reverse) {
1265
		c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
1266
	} else {
1267
		c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
1268
	}
1269
	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)	
1270
	# filtering it
1271
  c2c = c2c[c2c$V6=="-",]
1272
	# Reverse
1273
	if (reverse) {
1274
		tmp_col = c2c$V1
1275
		c2c$V1 = c2c$V7
1276
		c2c$V7 = tmp_col
1277
		tmp_col = c2c$V2
1278
		c2c$V2 = c2c$V9
1279
		c2c$V9 = tmp_col
1280
		tmp_col = c2c$V3
1281
		c2c$V3 = c2c$V10
1282
		c2c$V10 = tmp_col
1283
	}
1284
	# Restrict c2c to big_roi
1285
	# if (FALSE) {
1286
	if (!is.null(big_roi)) {
1287
		if (roi$strain_ref == big_roi$strain_ref) {
1288
			if (strain1 == "BY") {
1289
				big_chro_1 = paste("chr", ARAB2ROM()[[big_roi$chr]], sep="")
1290
			} else if (strain1 == "RM") {			
1291
			  big_chro_1 = paste("supercontig_1.",big_roi$chr,sep="")
1292
			} else if (strain1 == "YJM") {			
1293
			  big_chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[big_roi$chr]]
1294
			}
1295
			big_begin_1 = big_roi$begin
1296
		  big_end_1 = big_roi$end
1297
			c2c = c2c[c2c$V1==big_chro_1,]
1298
      if (length(c2c[c2c$V3<big_begin_1, 1] > 0)) {c2c[c2c$V3 < big_begin_1,c("V2", "V3") ] = big_begin_1}
1299
      if (length(c2c[c2c$V2>big_end_1, 1] > 0)) {c2c[c2c$V2 > big_end_1, c("V2", "V3")] = big_end_1}
1300
      c2c = c2c[c2c$V2 - c2c$V3 != 0,]
1301
		} else {
1302
			stop("ERROR, big_roi and roi not in the same strain_ref")
1303
		}
1304
	}
1305
  #	Convert initial roi$chr into c2c format
1306
	if (strain1 == "BY") {
1307
		chro_1 = paste("chr", ARAB2ROM()[[roi$chr]], sep="")
1308
	} else if (strain1 == "RM") {			
1309
	  chro_1 = paste("supercontig_1.",roi$chr,sep="")
1310
	} else if (strain1 == "YJM") {			
1311
	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[roi$chr]]
1312
	}
1313
	begin_1 = roi$begin
1314
  end_1 = roi$end
1315
  # Computing equivalent strain_2 alignment coordinates	
1316
	if (reverse) {
1317
  	tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
1318
    tmptransfostop = c2c[c2c$V1==chro_1 &  ((c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1)),]
1319
	} else {
1320
		tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1321
	  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
1322
	}
1323
	# Never happend conditions ...	
1324
	{
1325
		if (length(tmptransfostart$V8) == 0) {
1326
			# begin_1 is between to lines: shift begin_1 to the start of 2nd line.
1327
			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V2>=begin_1,]
1328
			begin_1 = sort(tmp_c2c$V2)[1]
1329
			if (reverse) {
1330
		  	tmptransfostart = c2c[c2c$V1==chro_1 & ((c2c$V3>=begin_1 & c2c$V2<=begin_1 & c2c$V8==1) | (c2c$V2>=begin_1 & c2c$V3<=begin_1 & c2c$V8==-1)),]
1331
			} else {
1332
				tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=begin_1 & c2c$V2<=begin_1,]
1333
			}
1334
			if (length(tmptransfostart$V8) == 0) {
1335
				if (!is.null(big_roi)) {
1336
					return(NULL)
1337
					tmptransfostart = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$begin & c2c$V2<=big_roi$begin,]
1338
				} else { 
1339
					# return(NULL)
1340
					# print(c2c[c2c$V1==chro_1 & c2c$V2<=end_1 & c2c$V3>=begin_1,])
1341
					# print(c2c[c2c$V1==chro_1,])
1342
					print(tmptransfostart)
1343
					print(tmptransfostop)
1344
					stop("Never happend condition 1.")					
1345
				}
1346
			}
1347
		} 
1348
		if (length(tmptransfostop$V8) == 0) {
1349
			# end_1 is between to lines: shift end_1 to the end of 2nd line.
1350
			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3<=end_1,]
1351
			end_1 = sort(tmp_c2c$V3)[length(tmp_c2c$V2)]
1352
			if (reverse) {
1353
		    tmptransfostop = c2c[c2c$V1==chro_1 &  ((c2c$V3>=end_1   & c2c$V2<=end_1   & c2c$V8==1) | (c2c$V2>=end_1   & c2c$V3<=end_1   & c2c$V8==-1)),]
1354
			} else {
1355
			  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=end_1 & c2c$V2<=end_1,]
1356
			}
1357
			if (length(tmptransfostop$V8) == 0) {
1358
				if (!is.null(big_roi)) {
1359
					return(NULL)
1360
				  tmptransfostop = c2c[c2c$V1==chro_1 & c2c$V3>=big_roi$end & c2c$V2<=big_roi$end,]
1361
				} else {
1362
					# return(NULL)
1363
					print(c2c[c2c$V1==chro_1,])
1364
					print(tmptransfostart)
1365
					print(tmptransfostop)
1366
					stop("Never happend condition 2.")
1367
				} 
1368
			} 
1369
		}
1370
		if (length(tmptransfostart$V8) != 1) {
1371
			# tmptransfostart = tmptransfostart[1,]
1372
			# print("many start")
1373
			# print(c2c[c2c$V1==chro_1,])
1374
			tmptransfostart = tmptransfostart[tmptransfostart$V1==chro_1 & tmptransfostart$V3>=begin_1 & tmptransfostart$V2==begin_1,]
1375
			if (length(tmptransfostart$V8) != 1) {
1376
				# return(NULL)
1377
				print(tmptransfostart)
1378
				print(tmptransfostop)
1379
  			stop("Never happend condition 3.")
1380
			}
1381
		}
1382
		if (length(tmptransfostop$V8) != 1) {
1383
			# tmptransfostop = tmptransfostop[length(tmptransfostop$V8),]
1384
			# print("many stop")
1385
			# print(tmptransfostop)
1386
			# print(roi)
1387
		  tmptransfostop = tmptransfostop[tmptransfostop$V1==chro_1 & tmptransfostop$V3==end_1 & tmptransfostop$V2<=end_1,]
1388
			if (length(tmptransfostop$V8) != 1) {
1389
				# return(NULL)
1390
				print(tmptransfostart)
1391
				print(tmptransfostop)
1392
  			stop("Never happend condition 4.")
1393
			}
1394
		}		
1395
		if (tmptransfostart$V7 != tmptransfostop$V7) {
1396
			print(tmptransfostart)
1397
			print(tmptransfostop)		
1398
 			stop("Problem with genome region of interest of strain 1. \nIt is translated over many contigs into strain 2 ref. \nSorry, but you have to redefine your region of interest.")
1399
		}
1400
	} 
1401
  # Deal with strand
1402
  if (tmptransfostart$V8 == 1) {
1403
    begin_2 = tmptransfostart$V9 + (begin_1 - tmptransfostart$V2)
1404
    end_2 = tmptransfostop$V9 + (end_1 - tmptransfostop$V2)
1405
  } else {
1406
    begin_2 = tmptransfostart$V9 - (begin_1 - tmptransfostart$V2)
1407
    end_2 = tmptransfostop$V9 - (end_1 - tmptransfostop$V2)
1408
  }
1409
	# Build returned roi
1410
	roi$strain_ref = strain2
1411
	if (roi$strain_ref == "BY") {
1412
		roi$chr = ROM2ARAB()[[substr(tmptransfostart$V7, 4, 12)]]
1413
	} else {
1414
		roi$chr = config$FASTA_INDEXES[[strain2]][[tmptransfostart$V7]]
1415
	}
1416
  roi$begin = begin_2
1417
  roi$end = end_2
1418
	if (sign(roi$end - roi$begin) == 0) {
1419
		roi$length = 1			
1420
	} else {		
1421
		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1422
	}
1423
  return(roi)  
1424
}, ex=function(){
1425
	# Define new translate_roi function...
1426
	translate_roi = function(roi, strain2, config) {
1427
		strain1 = roi$strain_ref
1428
		if (strain1 == strain2) {
1429
			return(roi)
1430
		} else {
1431
		  stop("Here is my new translate_roi function...")		
1432
		}	
1433
	}
1434
	# Binding it by uncomment follwing lines.
1435
	# unlockBinding("translate_roi", as.environment("package:nm"))
1436
	# unlockBinding("translate_roi", getNamespace("nm"))
1437
	# assign("translate_roi", translate_roi, "package:nm")
1438
	# assign("translate_roi", translate_roi, getNamespace("nm"))
1439
	# lockBinding("translate_roi", getNamespace("nm"))
1440
	# lockBinding("translate_roi", as.environment("package:nm"))	
1441
})
1442

  
1443

  
1444

  
1445

  
1446

  
1447

  
1448

  
1449
compute_inter_all_strain_curs = function (# Compute Common Uninterrupted Regions (CUR)
1450
### CURs are regions that can be aligned between the genomes 
1451
diff_allowed = 10, ##<< the maximum indel width allowe din a CUR
1452
min_cur_width = 200, ##<< The minimum width of a CUR
1453
config=NULL, ##<< GLOBAL config variable
1454
plot = FALSE ##<< Plot CURs or not
1455
) {
1456
  get_inter_strain_rois = function(strain1, strain2, diff_allowed = 10, min_cur_width = 200, plot=FALSE) {
1457
  	c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
1458
  	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)
1459
    # Filtering unagapped
1460
    c2c = c2c[c2c$V6=="-",]
1461
    # filtering some things (chr...)
1462
    # c2c = c2c[c2c$V1 == "chrIV",]
1463
    diff = c2c$V2[-1] - c2c$V3[-length(c2c$V2)] 
1464
    diff2 = c2c$V9[-1] - c2c$V10[-length(c2c$V2)] 
1465
  	# Plot diffs to define a threshold (diff_allowed)
1466
  	# hist(abs(c(diff2, diff)),breaks=c(0:2000, 200000000000), xlim=c(0,100))
1467
    # Filtering
1468
  	indexes_stop = which(abs(diff) > diff_allowed | abs(diff2) > diff_allowed)
1469
  	indexes_start = c(1, indexes_stop[-length(indexes_stop)] + rep(1, length(indexes_stop) -1))
1470

  
1471
    rois = NULL	
1472
  	for(i in 1:length(indexes_start)) {
1473
  		start = indexes_start[i]
1474
  		stop = indexes_stop[i]
1475
  		sub_c2c = c2c[start:stop,]
1476
  		if (strain1 == "BY") {
1477
  			chr = ROM2ARAB()[[substr(sub_c2c[1,]$V1,4,10)]]
1478
  		} else {			
1479
  			chr = config$FASTA_INDEXES[[strain1]][[sub_c2c[1,]$V1]]
1480
  		}
1481
  		roi = list(chr=chr, begin=sub_c2c[1,]$V2, end=sub_c2c[length(sub_c2c$V1),]$V3, strain_ref=strain1)
1482
  		roi[["length"]] = roi$end - roi$begin
1483
  		if (roi$length >= min_cur_width) {
1484
        rois = dfadd(rois,roi)
1485
  	  }
1486
  		if (length(unique(sub_c2c[,c(1,7,8)])[,2]) != 1) {
1487
  			print("*************** ERROR, non homogenous region! ********************")
1488
  		}
1489
  		# print(i)
1490
  		# print(roi)
1491
  		# print(sub_c2c)
1492
  		# print("________________________________________________________________")
1493
  	}
1494
  	if (plot) {
1495
  		print(paste(length(indexes_stop), "area of interest."))
1496
  	  # Plot rois
1497
  	  fasta_ref = list(filename=config$FASTA_REFERENCE_GENOME_FILES[[strain1]])
1498
  	  genome = get_content(fasta_ref$filename, "fasta")   
1499
  		plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
1500
  		for (name in names(genome)) {
1501
  			if (strain1 == "BY") {
1502
  				chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[[strain1]][[name]]]], sep="")
1503
  			} else {			
1504
  				chr_ref = name
1505
  			}
1506
  			y_lev = as.integer(config$FASTA_INDEXES[[strain1]][[name]])
1507
  			lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
1508
  			text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)	
1509
  		}
1510
  	  col=1
1511
  	  for (roi_index in 1:length(rois$chr)) {
1512
  			roi = rois[roi_index,]	
1513
  			y_lev = as.integer(roi$chr) + 0.3
1514
  			lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
1515
  			text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
1516
  	  	col = col + 1
1517
  	  }	
1518
  	}
1519
  	return(rois)
1520
  }
1521
	rois = NULL	
1522
	rois_BY_RM = get_inter_strain_rois("BY", "RM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1523
	rois_BY_YJM = get_inter_strain_rois("BY", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1524
	for (roi_1_index in 1:length(rois_BY_RM[,1])) {
1525
		roi_1 = rois_BY_RM[roi_1_index,]
1526
		roi_2_candidates = rois_BY_YJM[rois_BY_YJM$chr== roi_1$chr & rois_BY_YJM$begin <= roi_1$end & rois_BY_YJM$end >= roi_1$begin , ] ;  
1527
		# print(length(roi_2_candidates[,1]))
1528
		if (length(roi_2_candidates[,1]) > 0) {
1529
			for(roi_2_index in 1:length(roi_2_candidates[,1])) {
1530
				roi_2 = roi_2_candidates[roi_2_index,]
1531
				roi = list(chr=roi_1$chr, begin=max(roi_1$begin, roi_2$begin), end=min(roi_1$end, roi_2$end), strain_ref="BY")
1532
				roi[["length"]] = roi$end - roi$begin + 1
1533
				if (roi$length >= min_cur_width) {
1534
					# if (length(rois[,1]) == 153) {
1535
					# 	print(paste(length(rois[,1]), roi_1_index, roi_2_index ))
1536
					# 	print(roi_1)
1537
					# 	print(roi_2)
1538
					# 	print(roi)
1539
					# }
1540
			    rois = dfadd(rois,roi)
1541
			  }			
1542
			}
1543
		}
1544
	}
1545
	print(length(rois[,1]))
1546
	print(sum(rois$length))
1547
	rois_1st_round = rois
1548
	rois_2nd_round = NULL	
1549
	rois_RM_YJM = get_inter_strain_rois("RM", "YJM", min_cur_width = min_cur_width, diff_allowed = diff_allowed)
1550
	for (roi_1_index in 1:length(rois_1st_round[,1])) {
1551
		roi_1 = rois_1st_round[roi_1_index,]
1552
		translated_roi_1 = translate_roi(roi_1, "RM", config)
1553
		t_b = min(translated_roi_1$begin, translated_roi_1$end)
1554
		t_e = max(translated_roi_1$begin, translated_roi_1$end)
1555
		roi_2_candidates = rois_RM_YJM[rois_RM_YJM$chr== translated_roi_1$chr & rois_RM_YJM$begin <= t_e & rois_RM_YJM$end >= t_b , ] ;  
1556
		if (length(roi_2_candidates[,1]) > 0) {
1557
			for(roi_2_index in 1:length(roi_2_candidates[,1])) {
1558
				roi_2 = roi_2_candidates[roi_2_index,]
1559
				roi = list(chr=translated_roi_1$chr, begin=max(t_b, roi_2$begin), end=min(t_e, roi_2$end), strain_ref="RM")
1560
				roi[["length"]] = roi$end - roi$begin + 1
1561
				if (roi$length >= min_cur_width) {
1562
			    rois_2nd_round = dfadd(rois_2nd_round,roi)
1563
			  }			
1564
			}
1565
		}
1566
	}
1567
	print(length(rois_2nd_round[,1]))
1568
	print(sum(rois_2nd_round$length))
1569
	rois = rois_2nd_round
1570

  
1571
	rois_translator_round = list()	
1572
	for (roi_index in 1:length(rois[,1])) {
1573
		roi = rois[roi_index,]
1574
		BY_roi  = translate_roi(roi, "BY", config)
1575
		tmp_BY_roi = BY_roi 
1576
		BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1577
		BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1578
		BY_roi$length = abs(BY_roi$length)
1579
		rois_translator_round = dfadd(rois_translator_round, BY_roi)
1580
	}
1581
	rois = rois_translator_round
1582

  
1583
	rois_3rd_round = NULL	
1584
	for (roi_index in 1:length(rois[,1])) {
1585
	# for (roi_index in 1:2) {
1586
		current_roi = rois[roi_index,]
1587
		# print(roi_index)
1588
	
1589
	  to_be_check_rois = dfadd(NULL, current_roi)
1590
		NEED_RERUN = TRUE
1591
		while (NEED_RERUN) {
1592
			# print("RERUN"),
1593
			NEED_RERUN = FALSE
1594
			to_be_check_again = NULL
1595
			for (to_be_check_roi_index in 1:length(to_be_check_rois[,1])) {
1596
				# print(to_be_check_rois)
1597
				to_be_check_roi = to_be_check_rois[to_be_check_roi_index,]
1598
		    combis = list(c("BY", "RM"), c("BY", "YJM"), c("RM", "YJM"), c("RM", "BY"), c("YJM", "BY"), c("YJM", "RM"))
1599
			  for (combi in combis) {
1600
					# print(combi)
1601
			    strain1 = combi[1]
1602
		      strain2 = combi[2]
1603
					trans_roi = translate_roi(to_be_check_roi, strain1, config)
1604
					lower_bound=min(trans_roi$begin, trans_roi$end)
1605
					upper_bound=max(trans_roi$begin, trans_roi$end)			
1606
          
1607
          check_overlaping = structure(function(strain1 = "BY", strain2 = "RM", chr = NULL, lower_bound=NULL, upper_bound=NULL) {
1608
            reverse = (strain1=="RM" & strain2=="BY") | strain1=="YJM"
1609
          	if (strain1 == strain2) {
1610
          		roi$length = roi$end - roi$begin + sign(roi$end - roi$begin) * 1	
1611
          		return(roi)
1612
          	}
1613
          	# Launch c2c file
1614
          	if (reverse) {
1615
          		c2c_file = list(filename=config$C2C_FILES[[paste(strain2, "-", strain1, sep="")]])
1616
          	} else {
1617
          		c2c_file = list(filename=config$C2C_FILES[[paste(strain1, "-", strain2, sep="")]])
1618
          	}
1619
          	c2c = get_content(c2c_file$filename, "table", stringsAsFactors=FALSE)	
1620
          	# filtering it
1621
            c2c = c2c[c2c$V6=="-",]
1622
          	# Reverse
1623
          	if (reverse) {
1624
          		tmp_col = c2c$V1
1625
          		c2c$V1 = c2c$V7
1626
          		c2c$V7 = tmp_col
1627
          		tmp_col = c2c$V2
1628
          		c2c$V2 = c2c$V9
1629
          		c2c$V9 = tmp_col
1630
          		tmp_col = c2c$V3
1631
          		c2c$V3 = c2c$V10
1632
          		c2c$V10 = tmp_col
1633
          	}
1634

  
1635
          	if (strain1 == "BY") {
1636
          		chro_1 = paste("chr", ARAB2ROM()[[chr]], sep="")
1637
          	} else if (strain1 == "RM") {			
1638
          	  chro_1 = paste("supercontig_1.",chr,sep="")
1639
          	} else if (strain1 == "YJM") {			
1640
          	  chro_1 = switch_pairlist(config$FASTA_INDEXES$YJM)[[chr]]
1641
          	}
1642
          	# print(chro_1)
1643
          	if (!is.null(lower_bound) & !is.null(upper_bound)) {
1644
              if (reverse) {
1645
          	  	tmp_c2c = c2c[c2c$V1==chro_1 & ((c2c$V3>=lower_bound & c2c$V2<=upper_bound & c2c$V8==1) | (c2c$V2>=lower_bound & c2c$V3<=upper_bound & c2c$V8==-1)),]
1646
          		} else {
1647
          			tmp_c2c = c2c[c2c$V1==chro_1 & c2c$V3>=lower_bound & c2c$V2<=upper_bound,]
1648
          		}
1649
          	} else {
1650
            	tmp_c2c = c2c[c2c$V1 == chr,]
1651
          	}
1652
          	if (length(tmp_c2c[,1]) > 1) {
1653
          		pbs = apply(t(1:(length(tmp_c2c[,1]) - 1)), 2, function(i){
1654
          			# print(paste(i, "/", length(tmp_c2c[,1])))
1655
          			apply(t((i+1):length(tmp_c2c[,1])), 2, function(j){
1656
          				l1 = tmp_c2c[i,] 
1657
          				b1 = min(l1$V2, l1$V3)
1658
          				e1 = max(l1$V2, l1$V3)
1659
          				l2 = tmp_c2c[j,]
1660
          				b2 = min(l2$V2, l2$V3)
1661
          				e2 = max(l2$V2, l2$V3)
1662
          				if ((e1>=b2 & b1<=e2) | (e2>=b1 & b2<=e1)) {
1663
          					print(paste("WARNING! Overlaping", " (", strain1, ",", strain2, ") chr: ",chr, " [", b1, ",", e1, "] [", b2, ",", e2, "]", sep=""))				
1664
          					pb = list(strain1, strain2, chr, b1, e1, b2, e2)					
1665
          					pb
1666
          				} else {
1667
          					NULL
1668
          				}
1669
          			})
1670
          		})
1671
          		return(pbs)
1672
          	}
1673

  
1674
          }, ex=function(){
1675
          	source("src/nucleo_miner/yeast_strain_conversion.R"); 
1676
          	pbs1 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=TRUE)
1677
          	pbs3 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=TRUE)
1678
          	pbs5 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=TRUE)
1679
          	pbs2 = check_overlaping(strain1 = "BY", strain2 = "RM", dest=FALSE)
1680
          	pbs4 = check_overlaping(strain1 = "BY", strain2 = "YJM", dest=FALSE)
1681
          	pbs6 = check_overlaping(strain1 = "RM", strain2 = "YJM", dest=FALSE)
1682
          })
1683
          
1684

  
1685
					res = check_overlaping(strain1 = strain1, strain2 = strain2, chr = trans_roi$chr, lower_bound=lower_bound, upper_bound=upper_bound) 
1686
					if (!is.null(res)) {
1687
						df_res = data.frame(matrix(unlist(res), ncol = 7, byrow=TRUE), stringsAsFactors=FALSE)		
1688
						interval = df_res[1,]
1689
						inter_min = as.numeric(max( min(interval$X4, interval$X5), min(interval$X6, interval$X7)))
1690
						inter_max = as.numeric(min( max(interval$X4, interval$X5), max(interval$X6, interval$X7)))
1691
						# print(paste("SPLIT ROI", roi_index, "for", combi[1], combi[2]))			
1692
						new_roi1 = trans_roi
1693
						new_roi2 = trans_roi
1694
						new_roi1$begin = lower_bound
1695
						new_roi1$end = inter_min - 1
1696
						new_roi1$length = new_roi1$end - new_roi1$begin + 1
1697
						new_roi2$begin = inter_max + 1
1698
						new_roi2$end = upper_bound
1699
						new_roi2$length = new_roi2$end - new_roi2$begin + 1
1700
						if (new_roi1$length > min_cur_width) {
1701
							BY_roi  = translate_roi(new_roi1, "BY", config)
1702
							tmp_BY_roi = BY_roi
1703
							BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1704
							BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1705
							BY_roi$length = abs(BY_roi$length)
1706
							to_be_check_again = dfadd(to_be_check_again, BY_roi)	
1707
						}
1708
						if (new_roi2$length > min_cur_width) {
1709
							BY_roi  = translate_roi(new_roi2, "BY", config)
1710
							tmp_BY_roi = BY_roi
1711
							BY_roi$begin = min(tmp_BY_roi$begin, tmp_BY_roi$end)
1712
							BY_roi$end = max(tmp_BY_roi$begin, tmp_BY_roi$end)
1713
							BY_roi$length = abs(BY_roi$length)
1714
							to_be_check_again = dfadd(to_be_check_again, BY_roi)	
1715
						}
1716
						if (to_be_check_roi_index < length(to_be_check_rois[,1])) {
1717
							for (i in (to_be_check_roi_index + 1):length(to_be_check_rois[,1])) {
1718
								to_be_check_again = dfadd(to_be_check_again, to_be_check_rois[i,])								
1719
							}
1720
						}
1721
						NEED_RERUN = TRUE
1722
						break
1723
					}
1724
				}
1725
				if (NEED_RERUN) {
1726
					to_be_check_rois = to_be_check_again
1727
					break
1728
				}
1729
			}
1730
		}
1731
		
1732
		checked_rois = to_be_check_rois
1733
		for (checked_roi_index in 1:length(checked_rois[,1])) {
1734
			rois_3rd_round = dfadd(rois_3rd_round, checked_rois[checked_roi_index,])	
1735
		}
1736
	}
1737

  
1738
	print(length(rois_3rd_round[,1]))
1739
	print(sum(rois_3rd_round$length))
1740
	rois = rois_3rd_round
1741
	
1742
	
1743
	if (plot) {
1744
		print(paste(length(rois$chr), "area of interest."))
1745
	  # Plot rois
1746
	  fasta_ref = list(filename=config$FASTA_REFERENCE_GENOME_FILES[["BY"]])
1747
	  genome = get_content(fasta_ref$filename, "fasta")   
1748
		plot(0,0, ylim=(c(1,length(genome))), xlim = c(0, max(apply(t(genome), 2, function(chr){length(unlist(chr))}))))
1749
		for (name in names(genome)) {
1750
			if (TRUE) {
1751
				chr_ref = paste("chr", ARAB2ROM()[[config$FASTA_INDEXES[["BY"]][[name]]]], sep="")
1752
			} else {			
1753
				chr_ref = name
1754
			}
1755
			y_lev = as.integer(config$FASTA_INDEXES[["BY"]][[name]])
1756
			lines(c(0,length(unlist(genome[[name]]))), c(y_lev,y_lev))
1757
			text( length(unlist(genome[[name]]))/2, y_lev, labels = chr_ref)	
1758
		}
1759
	  col=1
1760
	  for (roi_index in 1:length(rois$chr)) {
1761
			roi = rois[roi_index,]	
1762
			y_lev = as.integer(roi$chr) + 0.3
1763
			lines(c(roi$begin,roi$end), c(y_lev,y_lev), col=col)
1764
			text( mean(c(roi$begin,roi$end)), y_lev, labels = roi_index)
1765
	  	col = col + 1
1766
	  }	
1767
	}
1768
	return (rois)
1769
}
1770

  
1771

  
1772

  
1773

  
1774

  
1775

  
1776

  
1777

  
1778

  
1779

  
1780

  
1781

  
1782

  
1783

  
1784

  
1785

  
1786

  
1787

  
1788

  
1789

  
1790

  
1791

  
1792

  
1793

  
1794

  
1795

  
1796

  
1797

  
1798

  
1799

  
1800

  
1801

  
1802

  
1803

  
1804

  
1805

  
1806

  
1807

  
1191 1808

  
1192 1809

  
1193 1810

  
......
1345 1962
plot_wp_nucs_4_nonmnase = FALSE,  ##<< Plot (or not) clusters for non inputs samples.
1346 1963
aggregated_intra_strain_nucs = NULL, ##<< list of aggregated intra strain nucs. If NULL, it will be computed. 
1347 1964
aligned_inter_strain_nucs = NULL, ##<< list of aligned inter strain nucs. If NULL, it will be computed.
1348
height = 10 ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1965
height = 10, ##<< Number of reads in per million read for each sample, graphical parametre for the y axis.
1966
config=NULL ##<< GLOBAL config variable
1349 1967
){
1350 1968
  returned_list = list()
1351 1969
  # Computing global display parameters
......
1513 2131
  
1514 2132
	if (plot_common_nucs | plot_anovas | plot_anova_boxes) {
1515 2133
		if (is.null(aligned_inter_strain_nucs)) {
1516
			aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]])[[1]]
2134
			aligned_inter_strain_nucs = align_inter_strain_nucs(replicates, replicates_wp_nucs[[1]], replicates_wp_nucs[[2]], config=config)[[1]]
1517 2135
		}
1518 2136
		if (plot_common_nucs) {
1519 2137
      #Plot common wp nucs

Formats disponibles : Unified diff