Statistics
| Revision:

root / tmp / org.txm.statsengine.r.core.win32 / res / win32 / library / BH / include / boost / accumulators / statistics / density.hpp @ 2486

History | View | Annotate | Download (9.3 kB)

1

    
2
///////////////////////////////////////////////////////////////////////////////
3
// density.hpp
4
//
5
//  Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
6
//  Software License, Version 1.0. (See accompanying file
7
//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8

    
9
#ifndef BOOST_ACCUMULATORS_STATISTICS_DENSITY_HPP_DE_01_01_2006
10
#define BOOST_ACCUMULATORS_STATISTICS_DENSITY_HPP_DE_01_01_2006
11

    
12
#include <vector>
13
#include <limits>
14
#include <functional>
15
#include <boost/range.hpp>
16
#include <boost/parameter/keyword.hpp>
17
#include <boost/mpl/placeholders.hpp>
18
#include <boost/accumulators/accumulators_fwd.hpp>
19
#include <boost/accumulators/framework/accumulator_base.hpp>
20
#include <boost/accumulators/framework/extractor.hpp>
21
#include <boost/accumulators/numeric/functional.hpp>
22
#include <boost/accumulators/framework/parameters/sample.hpp>
23
#include <boost/accumulators/framework/depends_on.hpp>
24
#include <boost/accumulators/statistics_fwd.hpp>
25
#include <boost/accumulators/statistics/count.hpp>
26
#include <boost/accumulators/statistics/max.hpp>
27
#include <boost/accumulators/statistics/min.hpp>
28

    
29
namespace boost { namespace accumulators
30
{
31

    
32
///////////////////////////////////////////////////////////////////////////////
33
// cache_size and num_bins named parameters
34
//
35
BOOST_PARAMETER_NESTED_KEYWORD(tag, density_cache_size, cache_size)
36
BOOST_PARAMETER_NESTED_KEYWORD(tag, density_num_bins, num_bins)
37

    
38
BOOST_ACCUMULATORS_IGNORE_GLOBAL(density_cache_size)
39
BOOST_ACCUMULATORS_IGNORE_GLOBAL(density_num_bins)
40

    
41
namespace impl
42
{
43
    ///////////////////////////////////////////////////////////////////////////////
44
    // density_impl
45
    //  density histogram
46
    /**
47
        @brief Histogram density estimator
48

49
        The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins
50
        are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the
51
        maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally,
52
        an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined,
53
        the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is
54
        return, where each pair contains the position of the bin (lower bound) and the samples count (normalized with the
55
        total number of samples).
56

57
        @param  density_cache_size Number of first samples used to determine min and max.
58
        @param  density_num_bins Number of bins (two additional bins collect under- and overflow samples).
59
    */
60
    template<typename Sample>
61
    struct density_impl
62
      : accumulator_base
63
    {
64
        typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
65
        typedef std::vector<std::pair<float_type, float_type> > histogram_type;
66
        typedef std::vector<float_type> array_type;
67
        // for boost::result_of
68
        typedef iterator_range<typename histogram_type::iterator> result_type;
69

    
70
        template<typename Args>
71
        density_impl(Args const &args)
72
            : cache_size(args[density_cache_size])
73
            , cache(cache_size)
74
            , num_bins(args[density_num_bins])
75
            , samples_in_bin(num_bins + 2, 0.)
76
            , bin_positions(num_bins + 2)
77
            , histogram(
78
                num_bins + 2
79
              , std::make_pair(
80
                    numeric::fdiv(args[sample | Sample()],(std::size_t)1)
81
                  , numeric::fdiv(args[sample | Sample()],(std::size_t)1)
82
                )
83
              )
84
            , is_dirty(true)
85
        {
86
        }
87

    
88
        template<typename Args>
89
        void operator ()(Args const &args)
90
        {
91
            this->is_dirty = true;
92

    
93
            std::size_t cnt = count(args);
94

    
95
            // Fill up cache with cache_size first samples
96
            if (cnt <= this->cache_size)
97
            {
98
                this->cache[cnt - 1] = args[sample];
99
            }
100

    
101
            // Once cache_size samples have been accumulated, create num_bins bins of same size between
102
            // the minimum and maximum of the cached samples as well as under and overflow bins.
103
            // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin).
104
            if (cnt == this->cache_size)
105
            {
106
                float_type minimum = numeric::fdiv((min)(args), (std::size_t)1);
107
                float_type maximum = numeric::fdiv((max)(args), (std::size_t)1);
108
                float_type bin_size = numeric::fdiv(maximum - minimum, this->num_bins );
109

    
110
                // determine bin positions (their lower bounds)
111
                for (std::size_t i = 0; i < this->num_bins + 2; ++i)
112
                {
113
                    this->bin_positions[i] = minimum + (i - 1.) * bin_size;
114
                }
115

    
116
                for (typename array_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter)
117
                {
118
                    if (*iter < this->bin_positions[1])
119
                    {
120
                        ++(this->samples_in_bin[0]);
121
                    }
122
                    else if (*iter >= this->bin_positions[this->num_bins + 1])
123
                    {
124
                        ++(this->samples_in_bin[this->num_bins + 1]);
125
                    }
126
                    else
127
                    {
128
                        typename array_type::iterator it = std::upper_bound(
129
                            this->bin_positions.begin()
130
                          , this->bin_positions.end()
131
                          , *iter
132
                        );
133

    
134
                        std::size_t d = std::distance(this->bin_positions.begin(), it);
135
                        ++(this->samples_in_bin[d - 1]);
136
                    }
137
                }
138
            }
139
            // Add each subsequent sample to the correct bin
140
            else if (cnt > this->cache_size)
141
            {
142
                if (args[sample] < this->bin_positions[1])
143
                {
144
                    ++(this->samples_in_bin[0]);
145
                }
146
                else if (args[sample] >= this->bin_positions[this->num_bins + 1])
147
                {
148
                    ++(this->samples_in_bin[this->num_bins + 1]);
149
                }
150
                else
151
                {
152
                    typename array_type::iterator it = std::upper_bound(
153
                        this->bin_positions.begin()
154
                      , this->bin_positions.end()
155
                      , args[sample]
156
                    );
157

    
158
                    std::size_t d = std::distance(this->bin_positions.begin(), it);
159
                    ++(this->samples_in_bin[d - 1]);
160
                }
161
            }
162
        }
163

    
164
        /**
165
            @pre The number of samples must meet or exceed the cache size
166
        */
167
        template<typename Args>
168
        result_type result(Args const &args) const
169
        {
170
            if (this->is_dirty)
171
            {
172
                this->is_dirty = false;
173

    
174
                // creates a vector of std::pair where each pair i holds
175
                // the values bin_positions[i] (x-axis of histogram) and
176
                // samples_in_bin[i] / cnt (y-axis of histogram).
177

    
178
                for (std::size_t i = 0; i < this->num_bins + 2; ++i)
179
                {
180
                    this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::fdiv(this->samples_in_bin[i], count(args)));
181
                }
182
            }
183
            // returns a range of pairs
184
            return make_iterator_range(this->histogram);
185
        }
186

    
187
    private:
188
        std::size_t            cache_size;      // number of cached samples
189
        array_type             cache;           // cache to store the first cache_size samples
190
        std::size_t            num_bins;        // number of bins
191
        array_type             samples_in_bin;  // number of samples in each bin
192
        array_type             bin_positions;   // lower bounds of bins
193
        mutable histogram_type histogram;       // histogram
194
        mutable bool is_dirty;
195
    };
196

    
197
} // namespace impl
198

    
199
///////////////////////////////////////////////////////////////////////////////
200
// tag::density
201
//
202
namespace tag
203
{
204
    struct density
205
      : depends_on<count, min, max>
206
      , density_cache_size
207
      , density_num_bins
208
    {
209
        /// INTERNAL ONLY
210
        ///
211
        typedef accumulators::impl::density_impl<mpl::_1> impl;
212

    
213
        #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED
214
        /// tag::density::cache_size named parameter
215
        /// tag::density::num_bins named parameter
216
        static boost::parameter::keyword<density_cache_size> const cache_size;
217
        static boost::parameter::keyword<density_num_bins> const num_bins;
218
        #endif
219
    };
220
}
221

    
222
///////////////////////////////////////////////////////////////////////////////
223
// extract::density
224
//
225
namespace extract
226
{
227
    extractor<tag::density> const density = {};
228

    
229
    BOOST_ACCUMULATORS_IGNORE_GLOBAL(density)
230
}
231

    
232
using extract::density;
233

    
234
// So that density can be automatically substituted
235
// with weighted_density when the weight parameter is non-void.
236
template<>
237
struct as_weighted_feature<tag::density>
238
{
239
    typedef tag::weighted_density type;
240
};
241

    
242
template<>
243
struct feature_of<tag::weighted_density>
244
  : feature_of<tag::density>
245
{
246
};
247

    
248
}} // namespace boost::accumulators
249

    
250
#endif