Statistics
| Revision:

root / tmp / org.txm.statsengine.r.core.win32 / res / win32 / library / BH / include / boost / accumulators / statistics / weighted_density.hpp @ 2486

History | View | Annotate | Download (8.8 kB)

1
///////////////////////////////////////////////////////////////////////////////
2
// weighted_density.hpp
3
//
4
//  Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
5
//  Software License, Version 1.0. (See accompanying file
6
//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7

    
8
#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006
9
#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006
10

    
11
#include <vector>
12
#include <limits>
13
#include <functional>
14
#include <boost/range.hpp>
15
#include <boost/parameter/keyword.hpp>
16
#include <boost/mpl/placeholders.hpp>
17
#include <boost/accumulators/framework/accumulator_base.hpp>
18
#include <boost/accumulators/framework/extractor.hpp>
19
#include <boost/accumulators/numeric/functional.hpp>
20
#include <boost/accumulators/framework/parameters/sample.hpp>
21
#include <boost/accumulators/statistics_fwd.hpp>
22
#include <boost/accumulators/statistics/sum.hpp>
23
#include <boost/accumulators/statistics/max.hpp>
24
#include <boost/accumulators/statistics/min.hpp>
25
#include <boost/accumulators/statistics/density.hpp> // for named parameters density_cache_size and density_num_bins
26

    
27
namespace boost { namespace accumulators
28
{
29

    
30
namespace impl
31
{
32
    ///////////////////////////////////////////////////////////////////////////////
33
    // weighted_density_impl
34
    //  density histogram for weighted samples
35
    /**
36
        @brief Histogram density estimator for weighted samples
37

38
        The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins
39
        are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the
40
        maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally,
41
        an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined,
42
        the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is
43
        returned, where each pair contains the position of the bin (lower bound) and the sum of the weights (normalized with the
44
        sum of all weights).
45

46
        @param density_cache_size Number of first samples used to determine min and max.
47
        @param density_num_bins Number of bins (two additional bins collect under- and overflow samples).
48
    */
49
    template<typename Sample, typename Weight>
50
    struct weighted_density_impl
51
      : accumulator_base
52
    {
53
        typedef typename numeric::functional::fdiv<Weight, std::size_t>::result_type float_type;
54
        typedef std::vector<std::pair<float_type, float_type> > histogram_type;
55
        typedef std::vector<float_type> array_type;
56
        // for boost::result_of
57
        typedef iterator_range<typename histogram_type::iterator> result_type;
58

    
59
        template<typename Args>
60
        weighted_density_impl(Args const &args)
61
            : cache_size(args[density_cache_size])
62
            , cache(cache_size)
63
            , num_bins(args[density_num_bins])
64
            , samples_in_bin(num_bins + 2, 0.)
65
            , bin_positions(num_bins + 2)
66
            , histogram(
67
                num_bins + 2
68
              , std::make_pair(
69
                    numeric::fdiv(args[sample | Sample()],(std::size_t)1)
70
                  , numeric::fdiv(args[sample | Sample()],(std::size_t)1)
71
                )
72
              )
73
            , is_dirty(true)
74
        {
75
        }
76

    
77
        template<typename Args>
78
        void operator ()(Args const &args)
79
        {
80
            this->is_dirty = true;
81

    
82
            std::size_t cnt = count(args);
83

    
84
            // Fill up cache with cache_size first samples
85
            if (cnt <= this->cache_size)
86
            {
87
                this->cache[cnt - 1] = std::make_pair(args[sample], args[weight]);
88
            }
89

    
90
            // Once cache_size samples have been accumulated, create num_bins bins of same size between
91
            // the minimum and maximum of the cached samples as well as an under- and an overflow bin.
92
            // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin).
93
            if (cnt == this->cache_size)
94
            {
95
                float_type minimum = numeric::fdiv((min)(args),(std::size_t)1);
96
                float_type maximum = numeric::fdiv((max)(args),(std::size_t)1);
97
                float_type bin_size = numeric::fdiv(maximum - minimum, this->num_bins);
98

    
99
                // determine bin positions (their lower bounds)
100
                for (std::size_t i = 0; i < this->num_bins + 2; ++i)
101
                {
102
                    this->bin_positions[i] = minimum + (i - 1.) * bin_size;
103
                }
104

    
105
                for (typename histogram_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter)
106
                {
107
                    if (iter->first < this->bin_positions[1])
108
                    {
109
                        this->samples_in_bin[0] += iter->second;
110
                    }
111
                    else if (iter->first >= this->bin_positions[this->num_bins + 1])
112
                    {
113
                        this->samples_in_bin[this->num_bins + 1] += iter->second;
114
                    }
115
                    else
116
                    {
117
                        typename array_type::iterator it = std::upper_bound(
118
                            this->bin_positions.begin()
119
                          , this->bin_positions.end()
120
                          , iter->first
121
                        );
122

    
123
                        std::size_t d = std::distance(this->bin_positions.begin(), it);
124
                        this->samples_in_bin[d - 1] += iter->second;
125
                    }
126
                }
127
            }
128
            // Add each subsequent sample to the correct bin
129
            else if (cnt > this->cache_size)
130
            {
131
                if (args[sample] < this->bin_positions[1])
132
                {
133
                    this->samples_in_bin[0] += args[weight];
134
                }
135
                else if (args[sample] >= this->bin_positions[this->num_bins + 1])
136
                {
137
                    this->samples_in_bin[this->num_bins + 1] += args[weight];
138
                }
139
                else
140
                {
141
                    typename array_type::iterator it = std::upper_bound(
142
                        this->bin_positions.begin()
143
                      , this->bin_positions.end()
144
                      , args[sample]
145
                    );
146

    
147
                    std::size_t d = std::distance(this->bin_positions.begin(), it);
148
                    this->samples_in_bin[d - 1] += args[weight];
149
                }
150
            }
151
        }
152

    
153
        template<typename Args>
154
        result_type result(Args const &args) const
155
        {
156
            if (this->is_dirty)
157
            {
158
                this->is_dirty = false;
159

    
160
                // creates a vector of std::pair where each pair i holds
161
                // the values bin_positions[i] (x-axis of histogram) and
162
                // samples_in_bin[i] / cnt (y-axis of histogram).
163

    
164
                for (std::size_t i = 0; i < this->num_bins + 2; ++i)
165
                {
166
                    this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::fdiv(this->samples_in_bin[i], sum_of_weights(args)));
167
                }
168
            }
169

    
170
            // returns a range of pairs
171
            return make_iterator_range(this->histogram);
172
        }
173

    
174
    private:
175
        std::size_t            cache_size;      // number of cached samples
176
        histogram_type         cache;           // cache to store the first cache_size samples with their weights as std::pair
177
        std::size_t            num_bins;        // number of bins
178
        array_type             samples_in_bin;  // number of samples in each bin
179
        array_type             bin_positions;   // lower bounds of bins
180
        mutable histogram_type histogram;       // histogram
181
        mutable bool is_dirty;
182
    };
183

    
184
} // namespace impl
185

    
186
///////////////////////////////////////////////////////////////////////////////
187
// tag::weighted_density
188
//
189
namespace tag
190
{
191
    struct weighted_density
192
      : depends_on<count, sum_of_weights, min, max>
193
      , density_cache_size
194
      , density_num_bins
195
    {
196
        /// INTERNAL ONLY
197
        ///
198
        typedef accumulators::impl::weighted_density_impl<mpl::_1, mpl::_2> impl;
199

    
200
        #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED
201
        static boost::parameter::keyword<density_cache_size> const cache_size;
202
        static boost::parameter::keyword<density_num_bins> const num_bins;
203
        #endif
204
    };
205
}
206

    
207
///////////////////////////////////////////////////////////////////////////////
208
// extract::weighted_density
209
//
210
namespace extract
211
{
212
    extractor<tag::density> const weighted_density = {};
213

    
214
    BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_density)
215
}
216

    
217
using extract::weighted_density;
218

    
219
}} // namespace boost::accumulators
220

    
221
#endif