Révision 7e2d37e1

b/doc/sphinx_doc/build/text/ref.txt
122 122
position of their center. Adajacent nucleosomes are compared two by
123 123
two. Comparison is based on a log likelihood ratio score. The issue of
124 124
comparison is adjacents nucleosomes merge or separation. Finally the
125
function returns a list of clusters and all computed *lod_scores*.
125
function returns a list of clusters and all computed *llr_scores*.
126 126
Each cluster ows an attribute *wp* for "well positionned". This
127 127
attribute is set as *TRUE* if the cluster is composed of exactly one
128 128
nucleosomes of each sample.
......
131 131
Usage
132 132
~~~~~
133 133

  
134
   aggregate_intra_strain_nucs(samples, lod_thres = 20, coord_max = 2e+07)
134
   aggregate_intra_strain_nucs(samples, llr_thres = 20, coord_max = 2e+07)
135 135

  
136 136

  
137 137
Arguments
......
143 143
marker=..., strain=..., roi=..., inputs=..., outputs=...)* with *roi =
144 144
list(name=..., begin=..., end=..., chr=..., genome=...)*.
145 145

  
146
"lod_thres"
146
"llr_thres"
147 147

  
148 148
Log likelihood ration threshold.
149 149

  
......
155 155
Value
156 156
~~~~~
157 157

  
158
Returns a list of clusterized nucleosomes, and all computed lod
158
Returns a list of clusterized nucleosomes, and all computed llr
159 159
scores.
160 160

  
161 161

  
......
210 210
~~~~~
211 211

  
212 212
   align_inter_strain_nucs(replicates, wp_nucs_strain_ref1 = NULL,
213
       wp_nucs_strain_ref2 = NULL, corr_thres = 0.5, lod_thres = 100,
213
       wp_nucs_strain_ref2 = NULL, corr_thres = 0.5, llr_thres = 100,
214 214
       config = NULL, ...)
215 215

  
216 216

  
......
235 235

  
236 236
Correlation threshold.
237 237

  
238
"lod_thres"
238
"llr_thres"
239 239

  
240 240
LOD cut off.
241 241

  
......
252 252
Value
253 253
~~~~~
254 254

  
255
Returns a list of clusterized nucleosomes, and all computed lod
255
Returns a list of clusterized nucleosomes, and all computed llr
256 256
scores.
257 257

  
258 258

  
......
1272 1272
Description
1273 1273
~~~~~~~~~~~
1274 1274

  
1275
Compute the likelihood log of two set of value from two models Vs. a
1276
unique model.
1275
Compute the log likelihood ratio of two or more set of value.
1277 1276

  
1278 1277

  
1279 1278
Usage
1280 1279
~~~~~
1281 1280

  
1282
   lod_score_vecs(x, y)
1281
   llr_score_nvecs(xs)
1283 1282

  
1284 1283

  
1285 1284
Arguments
1286 1285
~~~~~~~~~
1287 1286

  
1288
"x"
1289

  
1290
First vector.
1291

  
1292
"y"
1287
"xs"
1293 1288

  
1294
Second vector.
1289
list of vectors.
1295 1290

  
1296 1291

  
1297 1292
Value
1298 1293
~~~~~
1299 1294

  
1300
Returns the likelihood ratio.
1295
Returns the log likelihood ratio.
1301 1296

  
1302 1297

  
1303 1298
Author(s)
......
1320 1315
   lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
1321 1316
   lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
1322 1317
   lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
1323
   lod_score_vecs(x1,x2)
1318
   llr_score_nvecs(list(x1,x2))
1324 1319

  
1325 1320
R: nm
1326 1321

  
......
1346 1341
+-----------------+-----------------------------------------------------+
1347 1342
| Author:         | Florent Chuffart                                    |
1348 1343
+-----------------+-----------------------------------------------------+
1349
| Version:        | 2.3.41                                              |
1344
| Version:        | 2.3.42                                              |
1350 1345
+-----------------+-----------------------------------------------------+
1351 1346
| License:        | CeCILL                                              |
1352 1347
+-----------------+-----------------------------------------------------+
b/doc/sphinx_doc/conf.py
50 50
# built documents.
51 51
#
52 52
# The short X.Y version.
53
version = '2.3.41'
53
version = '2.3.42'
54 54
# The full version, including alpha/beta/rc tags.
55
release = '2.3.41'
55
release = '2.3.42'
56 56

  
57 57
# The language for content autogenerated by Sphinx. Refer to documentation
58 58
# for a list of supported languages.
b/doc/sphinx_doc/rref.rst
100 100
of their center. Adajacent nucleosomes are compared two by two.
101 101
Comparison is based on a log likelihood ratio score. The issue of
102 102
comparison is adjacents nucleosomes merge or separation. Finally the
103
function returns a list of clusters and all computed *lod\_scores*. Each
103
function returns a list of clusters and all computed *llr\_scores*. Each
104 104
cluster ows an attribute *wp* for "well positionned". This attribute is
105 105
set as *TRUE* if the cluster is composed of exactly one nucleosomes of
106 106
each sample.
......
110 110

  
111 111
::
112 112

  
113
    aggregate_intra_strain_nucs(samples, lod_thres = 20, coord_max = 2e+07)
113
    aggregate_intra_strain_nucs(samples, llr_thres = 20, coord_max = 2e+07)
114 114

  
115 115
Arguments
116 116
~~~~~~~~~
......
121 121
marker=..., strain=..., roi=..., inputs=..., outputs=...)* with *roi =
122 122
list(name=..., begin=..., end=..., chr=..., genome=...)*.
123 123

  
124
``lod_thres``
124
``llr_thres``
125 125

  
126 126
Log likelihood ration threshold.
127 127

  
......
132 132
Value
133 133
~~~~~
134 134

  
135
Returns a list of clusterized nucleosomes, and all computed lod scores.
135
Returns a list of clusterized nucleosomes, and all computed llr scores.
136 136

  
137 137
Author(s)
138 138
~~~~~~~~~
......
184 184
::
185 185

  
186 186
    align_inter_strain_nucs(replicates, wp_nucs_strain_ref1 = NULL, 
187
        wp_nucs_strain_ref2 = NULL, corr_thres = 0.5, lod_thres = 100, 
187
        wp_nucs_strain_ref2 = NULL, corr_thres = 0.5, llr_thres = 100, 
188 188
        config = NULL, ...)
189 189

  
190 190
Arguments
......
208 208

  
209 209
Correlation threshold.
210 210

  
211
``lod_thres``
211
``llr_thres``
212 212

  
213 213
LOD cut off.
214 214

  
......
224 224
Value
225 225
~~~~~
226 226

  
227
Returns a list of clusterized nucleosomes, and all computed lod scores.
227
Returns a list of clusterized nucleosomes, and all computed llr scores.
228 228

  
229 229
Author(s)
230 230
~~~~~~~~~
......
1182 1182
Description
1183 1183
~~~~~~~~~~~
1184 1184

  
1185
Compute the likelihood log of two set of value from two models Vs. a
1186
unique model.
1185
Compute the log likelihood ratio of two or more set of value.
1187 1186

  
1188 1187
Usage
1189 1188
~~~~~
1190 1189

  
1191 1190
::
1192 1191

  
1193
    lod_score_vecs(x, y)
1192
    llr_score_nvecs(xs)
1194 1193

  
1195 1194
Arguments
1196 1195
~~~~~~~~~
1197 1196

  
1198
``x``
1199

  
1200
First vector.
1201

  
1202
``y``
1197
``xs``
1203 1198

  
1204
Second vector.
1199
list of vectors.
1205 1200

  
1206 1201
Value
1207 1202
~~~~~
1208 1203

  
1209
Returns the likelihood ratio.
1204
Returns the log likelihood ratio.
1210 1205

  
1211 1206
Author(s)
1212 1207
~~~~~~~~~
......
1229 1224
    lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
1230 1225
    lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
1231 1226
    lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
1232
    lod_score_vecs(x1,x2)
1227
    llr_score_nvecs(list(x1,x2))
1233 1228

  
1234 1229
R: nm
1235 1230

  
......
1252 1247
+---------------+---------------------------------------------------+
1253 1248
| Author:       | Florent Chuffart                                  |
1254 1249
+---------------+---------------------------------------------------+
1255
| Version:      | 2.3.41                                            |
1250
| Version:      | 2.3.42                                            |
1256 1251
+---------------+---------------------------------------------------+
1257 1252
| License:      | CeCILL                                            |
1258 1253
+---------------+---------------------------------------------------+
b/src/DESCRIPTION
1 1
Package: nucleominer
2 2
Maintainer: Florent Chuffart <florent.chuffart@ens-lyon.fr>
3 3
Author: Florent Chuffart
4
Version: 2.3.41
4
Version: 2.3.42
5 5
License: CeCILL 
6 6
Title: nm
7 7
Depends: seqinr, plotrix, DESeq, cachecache
b/src/NAMESPACE
1
export(flat_aggregated_intra_strain_nucs, FDR, lod_score_vecs, dfadd, filter_tf_inputs, filter_tf_outputs, sign_from_strand, flat_reads, get_comp_strand, aggregate_intra_strain_nucs, align_inter_strain_nucs, translate_cur, fetch_mnase_replicates, substract_region, union_regions, collapse_regions, translate_regions, crop_fuzzy, get_all_reads, get_design, plot_dist_samples, analyse_design, get_sneps, watch_samples, compute_inter_all_strain_curs, switch_pairlist, build_replicates, ARAB2ROM, ROM2ARAB, get_intra_strain_fuzzy, get_unrs)
1
export(flat_aggregated_intra_strain_nucs, FDR, llr_score_nvecs, dfadd, filter_tf_inputs, filter_tf_outputs, sign_from_strand, flat_reads, get_comp_strand, aggregate_intra_strain_nucs, align_inter_strain_nucs, translate_cur, fetch_mnase_replicates, substract_region, union_regions, collapse_regions, translate_regions, crop_fuzzy, get_all_reads, get_design, plot_dist_samples, analyse_design, get_sneps, watch_samples, compute_inter_all_strain_curs, switch_pairlist, build_replicates, ARAB2ROM, ROM2ARAB, get_intra_strain_fuzzy, get_unrs)
b/src/R/nucleominer.R
106 106

  
107 107

  
108 108

  
109
lod_score_vecs = structure(function # Likelihood ratio
110
### Compute the likelihood log of two set of value from two models Vs. a unique model.
109
llr_score_nvecs = structure(function # Likelihood ratio
110
### Compute the log likelihood ratio of two or more set of value.
111 111
(
112
x ,##<< First vector.
113
y ##<< Second vector.
112
  xs ##<< list of vectors.
114 113
) {
115
	if (length(x) <=1 | length(y) <= 1) {
116
		return(NA)
117
	}
118
  meanX = mean(x)
119
  sdX = sd(x)
120
  meanY = mean(y)
121
  sdY = sd(y)
122
  meanXY = mean(c(x,y))
123
  sdXY = sd(c(x,y))
124
  llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
125
  llY = sum(log(dnorm(y,mean=meanY,sd=sdY)))
126
  llXY = sum(log(dnorm(c(x,y),mean=meanXY,sd=sdXY)))
127
  ratio = llX + llY - llXY
114
  l = length(xs)
115
  if (l < 1) {
116
    return(NA)
117
  }
118
  if (l == 1) {
119
    return(1)
120
  }
121
  sumllX = 0
122
  for (i in 1:l) {
123
    x = xs[[i]]
124
  	if (length(x) <= 1) {
125
  		return(NA)
126
  	}
127
    meanX = mean(x)
128
    sdX = sd(x)
129
    llX = sum(log(dnorm(x,mean=meanX,sd=sdX)))
130
    sumllX = sumllX + llX
131
  }
132
  meanXglo = mean(unlist(xs))
133
  sdXglo = sd(unlist(xs))
134
  llXYZ = sum(log(dnorm(unlist(xs),mean=meanXglo,sd=sdXglo)))
135
  ratio = sumllX - llXYZ
128 136
  return(ratio)
129
### Returns the likelihood ratio.
137
### Returns the log likelihood ratio.
130 138
}, ex=function(){
131 139
  # LOD score for 2 set of values
132 140
  mean1=5; sd1=2; card2 = 250
......
139 147
  lines(min:max,dnorm(min:max,mean1,sd1)*card1,col=2)
140 148
  lines(min:max,dnorm(min:max,mean2,sd2)*card2,col=3)
141 149
  lines(min:max,dnorm(min:max,mean(c(x1,x2)),sd(c(x1,x2)))*card2,col=4)
142
  lod_score_vecs(x1,x2)
150
  llr_score_nvecs(list(x1,x2))
143 151
 })
144 152

  
145 153
dfadd = structure(function# Adding list to a dataframe.
......
207 215
  }
208 216
  tf_outputs$lower_bound = tf_outputs$center - tf_outputs$width/2
209 217
  tf_outputs$upper_bound = tf_outputs$center + tf_outputs$width/2
210
  tf_outputs = tf_outputs[tf_outputs$correlation >= corr_thres,]
211
  tf_outputs = tf_outputs[order(tf_outputs$correlation,decreasing=TRUE),]
218
  tf_outputs = tf_outputs[tf_outputs$correlation.score >= corr_thres,]
219
  tf_outputs = tf_outputs[order(tf_outputs$correlation.score, decreasing=TRUE),]
212 220
  i = 1
213 221
  while (i <= length(tf_outputs[,1])) {
214
    lb = tf_outputs[i,]$low
215
    ub = tf_outputs[i,]$up
216
    tf_outputs = tf_outputs[!(tf_outputs$low <= (ub-ol_bp) & tf_outputs$up > ub) & !(tf_outputs$up >= (lb+ol_bp) & tf_outputs$low < lb),]
222
    lb = tf_outputs[i,]$lower_bound
223
    ub = tf_outputs[i,]$upper_bound
224
    tf_outputs = tf_outputs[!(tf_outputs$lower_bound <= (ub-ol_bp) & tf_outputs$upper_bound > ub) & !(tf_outputs$upper_bound >= (lb+ol_bp) & tf_outputs$lower_bound < lb),]
217 225
    i = i+1
218 226
  }
219 227
  return(tf_outputs)
......
265 273

  
266 274

  
267 275
aggregate_intra_strain_nucs = structure(function(# Aggregate replicated sample's nucleosomes.
268
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{lod_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
276
### This function aggregates nucleosome for replicated samples. It uses TemplateFilter ouput of each sample as replicate. Each sample owns a set of nucleosomes computed using TemplateFilter and ordered by the position of their center. Adajacent nucleosomes are compared two by two. Comparison is based on a log likelihood ratio score. The issue of comparison is adjacents nucleosomes merge or separation. Finally the function returns a list of clusters and all computed \emph{llr_scores}. Each cluster ows an attribute \emph{wp} for "well positionned". This attribute is set as \emph{TRUE} if the cluster is composed of exactly one nucleosomes of each sample.
269 277
samples, ##<< A list of samples. Each sample is a list like \emph{sample = list(id=..., marker=..., strain=..., roi=..., inputs=..., outputs=...)} with \emph{roi = list(name=..., begin=...,  end=..., chr=..., genome=...)}.
270
lod_thres=20, ##<< Log likelihood ration threshold.
278
llr_thres=20, ##<< Log likelihood ration threshold.
271 279
coord_max=20000000 ##<< A too big value to be a coord for a nucleosome lower bound.
272 280
){
273 281
	end_of_tracks = function(tracks) {
......
301 309
		return(clusters)
302 310
	}
303 311
	strain = samples[[1]]$strain
304
	lod_scores = c()
312
	llr_scores = c()
305 313
  min_nuc_center = min(samples[[1]]$roi$begin, samples[[1]]$roi$end)
306 314
	max_nuc_center = max(samples[[1]]$roi$begin, samples[[1]]$roi$end)
307 315
  # compute clusters
......
311 319
  indexes = c()
312 320
  track_readers = c()
313 321
  current_nuc = NULL
314
	lod_score = lod_thres + 1
322
	llr_score = llr_thres + 1
315 323
  # Read nucs from TF outputs
316 324
  tf_outs = list()
317 325
	i = 1
......
356 364
    new_upper_bound = new_nuc$upper_bound
357 365

  
358 366
    if (!is.null(current_nuc)) {
359
			lod_score = lod_score_vecs(current_nuc$original_reads,new_nuc$original_reads)
360
			lod_scores = c(lod_scores,lod_score)
367
			llr_score = llr_score_nvecs(list(current_nuc$original_reads,new_nuc$original_reads))
368
			llr_scores = c(llr_scores,llr_score)
361 369
		}
362
		# print(paste(lod_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
363
		if (is.na(lod_score)) {
364
			lod_score = lod_thres + 1
370
		# print(paste(llr_score, length(current_nuc$original_reads), length(new_nuc$original_reads), sep=" "))
371
		if (is.na(llr_score)) {
372
			llr_score = llr_thres + 1
365 373
		}
366
		# Store lod_score
367
		new_nuc$lod_score = lod_score
368
	  if (lod_score < lod_thres) {
374
		# Store llr_score
375
		new_nuc$llr_score = llr_score
376
	  if (llr_score < llr_thres) {
369 377
      # aggregate to current cluster
370 378
      #   update bound
371 379
      if (new_nuc$upper_bound > new_cluster$upper_bound) {
......
417 425
    # store old cluster
418 426
    clusters = store_cluster(clusters, new_cluster, nb_nucs_in_cluster,nuc_from_track,length(tf_outs),min_nuc_center, max_nuc_center)
419 427
  }
420
	return(list(clusters, lod_scores))
421
### Returns a list of clusterized nucleosomes, and all computed lod scores.
428
	return(list(clusters, llr_scores))
429
### Returns a list of clusterized nucleosomes, and all computed llr scores.
422 430
}, ex=function(){
423 431
	# Dealing with a region of interest
424 432
	roi =list(name="example", begin=1000,  end=1300, chr="1", genome=rep("A",301))
......
470 478
			tmp_nuc_as_list[["nb_reads"]] = length(all_original_reads)
471 479
			tmp_nuc_as_list[["nb_nucs"]] = length(tmp_nuc$nucs)
472 480
			if (tmp_nuc$wp) {
473
				tmp_nuc_as_list[["lod_1"]] = signif(tmp_nuc$nucs[[2]]$lod_score,5)
474
				tmp_nuc_as_list[["lod_2"]] = signif(tmp_nuc$nucs[[3]]$lod_score,5)
481
				tmp_nuc_as_list[["llr_1"]] = signif(tmp_nuc$nucs[[2]]$llr_score,5)
482
				tmp_nuc_as_list[["llr_2"]] = signif(tmp_nuc$nucs[[3]]$llr_score,5)
475 483
			} else {
476
				tmp_nuc_as_list[["lod_1"]] = NA
477
				tmp_nuc_as_list[["lod_2"]] = NA
484
				tmp_nuc_as_list[["llr_1"]] = NA
485
				tmp_nuc_as_list[["llr_2"]] = NA
478 486
			}
479 487
      return(tmp_nuc_as_list)
480 488
    })
......
490 498
wp_nucs_strain_ref1=NULL, ##<< List of aggregates nucleosome for strain 1. If it's null this list will be computed.
491 499
wp_nucs_strain_ref2=NULL, ##<< List of aggregates nucleosome for strain 2. If it's null this list will be computed.
492 500
corr_thres=0.5, ##<< Correlation threshold.
493
lod_thres=100, ##<< LOD cut off.
501
llr_thres=100, ##<< LOD cut off.
494 502
config=NULL, ##<< GLOBAL config variable
495 503
... ##<< A list of parameters that will be passed to \emph{aggregate_intra_strain_nucs} if needed.
496 504
) {
......
500 508
		print("WARNING, align_inter_strain_nucs will use 2 first sets of replicates as inputs.")
501 509
	}
502 510
	common_nuc = NULL
503
	lod_scores = c()
511
	llr_scores = c()
504 512
	chr = replicates[[1]][[1]]$roi$chr
505 513
  min_nuc_center = min(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
506 514
	max_nuc_center = max(replicates[[1]][[1]]$roi$begin, replicates[[1]][[1]]$roi$end)
......
571 579
								# tranlation of reads into strain 2 coords
572 580
								diff = ((roi_strain_ref1$begin + roi_strain_ref1$end) - (roi_strain_ref2$begin + roi_strain_ref2$end)) / 2
573 581
								reads_strain_ref1 = reads_strain_ref1 - rep(diff, length(reads_strain_ref1))
574
								lod_score = lod_score_vecs(reads_strain_ref1, reads_strain_ref2)
575
								lod_scores = c(lod_scores, lod_score)
582
								llr_score = llr_score_nvecs(list(reads_strain_ref1, reads_strain_ref2))
583
								llr_scores = c(llr_scores, llr_score)
576 584
								# Filtering on LOD Score
577
								if (lod_score < lod_thres) {
585
								if (llr_score < llr_thres) {
578 586
									tmp_nuc = list()
579 587
									# strain_ref1
580 588
									tmp_nuc[[paste("chr_", strain_ref1, sep="")]] = chr
......
599 607
									# tmp_nuc[[paste("corr2_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[2]]$corr,5)
600 608
									# tmp_nuc[[paste("corr3_", strain_ref2, sep="")]] = signif(nuc_strain_ref2$nucs[[3]]$corr,5)
601 609
									# common
602
									tmp_nuc[["lod_score"]] = signif(lod_score,5)
610
									tmp_nuc[["llr_score"]] = signif(llr_score,5)
603 611
									# print(tmp_nuc)
604 612
									common_nuc = dfadd(common_nuc, tmp_nuc)
605 613
								}
......
640 648
			common_nuc = common_nuc[-to_remove_list,]
641 649
		}
642 650

  
643
		return(list(common_nuc, lod_scores))
651
		return(list(common_nuc, llr_scores))
644 652
	} else {
645 653
		print("WARNING, no nucs for strain_ref1.")
646 654
		return(NULL)
647 655
	}
648
### Returns a list of clusterized nucleosomes, and all computed lod scores.
656
### Returns a list of clusterized nucleosomes, and all computed llr scores.
649 657
}, ex=function(){
650 658

  
651 659
    # Define new translate_cur function...
......
2028 2036
        tmp_x_next = tmp_x[-1]
2029 2037
        need_shift = apply(t(tmp_x_next - tmp_x_prev), 2, function(delta){ delta < 50})
2030 2038
        tmp_x_inter = (tmp_x_prev + tmp_x_next) / 2 + tmp_track_inter * need_shift
2031
        tmp_lod_inter =signif(unlist(tf_nucs$lod_score)[-1], 2)
2039
        tmp_llr_inter =signif(unlist(tf_nucs$llr_score)[-1], 2)
2032 2040
        new_tmp_x = c()
2033 2041
        new_tmp_y = c()
2034 2042
        index_odd = 1:length(tmp_x) * 2 - 1
......
2046 2054
        } else {
2047 2055
          pos = config$LEGEND_LOD_POS
2048 2056
        }
2049
        col_lod = sapply(tmp_lod_inter, function(lod){if (lod < 20 ) return("green") else return("red")})
2050
        text(tmp_x_inter, tmp_y_inter, tmp_lod_inter, cex=1.5, pos=pos, col=col_lod) 
2057
        col_llr = sapply(tmp_llr_inter, function(llr){if (llr < 20 ) return("green") else return("red")})
2058
        text(tmp_x_inter, tmp_y_inter, tmp_llr_inter, cex=1.5, pos=pos, col=col_llr) 
2051 2059
        
2052 2060
      }
2053 2061

  

Formats disponibles : Unified diff