Statistics
| Revision:

root / tmp / org.txm.statsengine.r.core.win32 / res / win32 / library / BH / include / boost / accumulators / statistics / weighted_p_square_quantile.hpp @ 2486

History | View | Annotate | Download (10.5 kB)

1
///////////////////////////////////////////////////////////////////////////////
2
// weighted_p_square_quantile.hpp
3
//
4
//  Copyright 2005 Daniel Egloff. Distributed under the Boost
5
//  Software License, Version 1.0. (See accompanying file
6
//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7

    
8
#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_QUANTILE_HPP_DE_01_01_2006
9
#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_QUANTILE_HPP_DE_01_01_2006
10

    
11
#include <cmath>
12
#include <functional>
13
#include <boost/array.hpp>
14
#include <boost/parameter/keyword.hpp>
15
#include <boost/mpl/placeholders.hpp>
16
#include <boost/type_traits/is_same.hpp>
17
#include <boost/accumulators/framework/accumulator_base.hpp>
18
#include <boost/accumulators/framework/extractor.hpp>
19
#include <boost/accumulators/numeric/functional.hpp>
20
#include <boost/accumulators/framework/parameters/sample.hpp>
21
#include <boost/accumulators/statistics_fwd.hpp>
22
#include <boost/accumulators/statistics/count.hpp>
23
#include <boost/accumulators/statistics/sum.hpp>
24
#include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
25

    
26
namespace boost { namespace accumulators
27
{
28

    
29
namespace impl {
30
    ///////////////////////////////////////////////////////////////////////////////
31
    // weighted_p_square_quantile_impl
32
    //  single quantile estimation with weighted samples
33
    /**
34
        @brief Single quantile estimation with the \f$P^2\f$ algorithm for weighted samples
35

36
        This version of the \f$P^2\f$ algorithm extends the \f$P^2\f$ algorithm to support weighted samples.
37
        The \f$P^2\f$ algorithm estimates a quantile dynamically without storing samples. Instead of
38
        storing the whole sample cumulative distribution, only five points (markers) are stored. The heights
39
        of these markers are the minimum and the maximum of the samples and the current estimates of the
40
        \f$(p/2)\f$-, \f$p\f$ - and \f$(1+p)/2\f$ -quantiles. Their positions are equal to the number
41
        of samples that are smaller or equal to the markers. Each time a new sample is added, the
42
        positions of the markers are updated and if necessary their heights are adjusted using a piecewise-
43
        parabolic formula.
44

45
        For further details, see
46

47
        R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and
48
        histograms without storing observations, Communications of the ACM,
49
        Volume 28 (October), Number 10, 1985, p. 1076-1085.
50

51
        @param quantile_probability
52
    */
53
    template<typename Sample, typename Weight, typename Impl>
54
    struct weighted_p_square_quantile_impl
55
      : accumulator_base
56
    {
57
        typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample;
58
        typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
59
        typedef array<float_type, 5> array_type;
60
        // for boost::result_of
61
        typedef float_type result_type;
62

    
63
        template<typename Args>
64
        weighted_p_square_quantile_impl(Args const &args)
65
          : p(is_same<Impl, for_median>::value ? 0.5 : args[quantile_probability | 0.5])
66
          , heights()
67
          , actual_positions()
68
          , desired_positions()
69
        {
70
        }
71

    
72
        template<typename Args>
73
        void operator ()(Args const &args)
74
        {
75
            std::size_t cnt = count(args);
76

    
77
            // accumulate 5 first samples
78
            if (cnt <= 5)
79
            {
80
                this->heights[cnt - 1] = args[sample];
81

    
82
                // In this initialization phase, actual_positions stores the weights of the
83
                // initial samples that are needed at the end of the initialization phase to
84
                // compute the correct initial positions of the markers.
85
                this->actual_positions[cnt - 1] = args[weight];
86

    
87
                // complete the initialization of heights and actual_positions by sorting
88
                if (cnt == 5)
89
                {
90
                    // TODO: we need to sort the initial samples (in heights) in ascending order and
91
                    // sort their weights (in actual_positions) the same way. The following lines do
92
                    // it, but there must be a better and more efficient way of doing this.
93
                    typename array_type::iterator it_begin, it_end, it_min;
94

    
95
                    it_begin = this->heights.begin();
96
                    it_end   = this->heights.end();
97

    
98
                    std::size_t pos = 0;
99

    
100
                    while (it_begin != it_end)
101
                    {
102
                        it_min = std::min_element(it_begin, it_end);
103
                        std::size_t d = std::distance(it_begin, it_min);
104
                        std::swap(*it_begin, *it_min);
105
                        std::swap(this->actual_positions[pos], this->actual_positions[pos + d]);
106
                        ++it_begin;
107
                        ++pos;
108
                    }
109

    
110
                    // calculate correct initial actual positions
111
                    for (std::size_t i = 1; i < 5; ++i)
112
                    {
113
                        this->actual_positions[i] += this->actual_positions[i - 1];
114
                    }
115
                }
116
            }
117
            else
118
            {
119
                std::size_t sample_cell = 1; // k
120

    
121
                // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values
122
                if (args[sample] < this->heights[0])
123
                {
124
                    this->heights[0] = args[sample];
125
                    this->actual_positions[0] = args[weight];
126
                    sample_cell = 1;
127
                }
128
                else if (this->heights[4] <= args[sample])
129
                {
130
                    this->heights[4] = args[sample];
131
                    sample_cell = 4;
132
                }
133
                else
134
                {
135
                    typedef typename array_type::iterator iterator;
136
                    iterator it = std::upper_bound(
137
                        this->heights.begin()
138
                      , this->heights.end()
139
                      , args[sample]
140
                    );
141

    
142
                    sample_cell = std::distance(this->heights.begin(), it);
143
                }
144

    
145
                // increment positions of markers above sample_cell
146
                for (std::size_t i = sample_cell; i < 5; ++i)
147
                {
148
                    this->actual_positions[i] += args[weight];
149
                }
150

    
151
                // update desired positions for all markers
152
                this->desired_positions[0] = this->actual_positions[0];
153
                this->desired_positions[1] = (sum_of_weights(args) - this->actual_positions[0])
154
                                           * this->p/2. + this->actual_positions[0];
155
                this->desired_positions[2] = (sum_of_weights(args) - this->actual_positions[0])
156
                                           * this->p + this->actual_positions[0];
157
                this->desired_positions[3] = (sum_of_weights(args) - this->actual_positions[0])
158
                                           * (1. + this->p)/2. + this->actual_positions[0];
159
                this->desired_positions[4] = sum_of_weights(args);
160

    
161
                // adjust height and actual positions of markers 1 to 3 if necessary
162
                for (std::size_t i = 1; i <= 3; ++i)
163
                {
164
                    // offset to desired positions
165
                    float_type d = this->desired_positions[i] - this->actual_positions[i];
166

    
167
                    // offset to next position
168
                    float_type dp = this->actual_positions[i + 1] - this->actual_positions[i];
169

    
170
                    // offset to previous position
171
                    float_type dm = this->actual_positions[i - 1] - this->actual_positions[i];
172

    
173
                    // height ds
174
                    float_type hp = (this->heights[i + 1] - this->heights[i]) / dp;
175
                    float_type hm = (this->heights[i - 1] - this->heights[i]) / dm;
176

    
177
                    if ( ( d >= 1. && dp > 1. ) || ( d <= -1. && dm < -1. ) )
178
                    {
179
                        short sign_d = static_cast<short>(d / std::abs(d));
180

    
181
                        // try adjusting heights[i] using p-squared formula
182
                        float_type h = this->heights[i] + sign_d / (dp - dm) * ( (sign_d - dm) * hp + (dp - sign_d) * hm );
183

    
184
                        if ( this->heights[i - 1] < h && h < this->heights[i + 1] )
185
                        {
186
                            this->heights[i] = h;
187
                        }
188
                        else
189
                        {
190
                            // use linear formula
191
                            if (d>0)
192
                            {
193
                                this->heights[i] += hp;
194
                            }
195
                            if (d<0)
196
                            {
197
                                this->heights[i] -= hm;
198
                            }
199
                        }
200
                        this->actual_positions[i] += sign_d;
201
                    }
202
                }
203
            }
204
        }
205

    
206
        result_type result(dont_care) const
207
        {
208
            return this->heights[2];
209
        }
210

    
211
    private:
212
        float_type p;                    // the quantile probability p
213
        array_type heights;              // q_i
214
        array_type actual_positions;     // n_i
215
        array_type desired_positions;    // n'_i
216
    };
217

    
218
} // namespace impl
219

    
220
///////////////////////////////////////////////////////////////////////////////
221
// tag::weighted_p_square_quantile
222
//
223
namespace tag
224
{
225
    struct weighted_p_square_quantile
226
      : depends_on<count, sum_of_weights>
227
    {
228
        typedef accumulators::impl::weighted_p_square_quantile_impl<mpl::_1, mpl::_2, regular> impl;
229
    };
230
    struct weighted_p_square_quantile_for_median
231
      : depends_on<count, sum_of_weights>
232
    {
233
        typedef accumulators::impl::weighted_p_square_quantile_impl<mpl::_1, mpl::_2, for_median> impl;
234
    };
235
}
236

    
237
///////////////////////////////////////////////////////////////////////////////
238
// extract::weighted_p_square_quantile
239
// extract::weighted_p_square_quantile_for_median
240
//
241
namespace extract
242
{
243
    extractor<tag::weighted_p_square_quantile> const weighted_p_square_quantile = {};
244
    extractor<tag::weighted_p_square_quantile_for_median> const weighted_p_square_quantile_for_median = {};
245

    
246
    BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_quantile)
247
    BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_quantile_for_median)
248
}
249

    
250
using extract::weighted_p_square_quantile;
251
using extract::weighted_p_square_quantile_for_median;
252

    
253
}} // namespace boost::accumulators
254

    
255
#endif