OPALX (Object Oriented Parallel Accelerator Library for Exascal) MINIorX
OPALX
BinHisto.tpp
Go to the documentation of this file.
1#ifndef BINHISTO_TPP
2#define BINHISTO_TPP
3
4#include "BinHisto.h"
5
6// #include <random>
7
8namespace ParticleBinning {
9
10 template <typename size_type, typename bin_index_type, typename value_type, bool UseDualView, class... Properties>
11 void Histogram<size_type, bin_index_type, value_type, UseDualView, Properties...>::copyFields(const Histogram& other) {
12 debug_name_m = other.debug_name_m;
13 numBins_m = other.numBins_m;
14 totalBinWidth_m = other.totalBinWidth_m;
15
16 binningAlpha_m = other.binningAlpha_m;
17 binningBeta_m = other.binningBeta_m;
18 desiredWidth_m = other.desiredWidth_m;
19
20 histogram_m = other.histogram_m;
21 binWidths_m = other.binWidths_m;
22 postSum_m = other.postSum_m;
23
24 initTimers();
25 }
26
27
28 template <typename size_type, typename bin_index_type, typename value_type, bool UseDualView, class... Properties>
29 value_type
30 Histogram<size_type, bin_index_type, value_type, UseDualView, Properties...>::adaptiveBinningCostFunction(
31 const size_type& sumCount,
32 const value_type& sumWidth,
33 const size_type& totalNumParticles
34 ) {
35 # ifdef DEBUG
36 if (sumCount == 0) {
37 Inform err("mergeBins");
38 err << "Error in adaptiveBinningCostFunction: "
39 << "sumCount = " << sumCount
40 << ", sumWidth = " << sumWidth
41 << ", totalNumParticles = " << totalNumParticles << endl;
42 ippl::Comm->abort();
43 }
44 # endif
45
46 value_type totalSum = static_cast<value_type>(totalNumParticles);
47 value_type sumCountNorm = sumCount / totalSum;
48
49 value_type penalty = sumWidth - desiredWidth_m;
50 value_type wideBinPenalty = binningAlpha_m;
51 value_type binSizeBias = binningBeta_m;
52
53 value_type sparse_penalty = (sumCountNorm < desiredWidth_m)
54 ? desiredWidth_m / sumCountNorm
55 : 0.0;
56
57 // Sanity checks
58 if (sumCountNorm <= 0 || sumCountNorm > 1) {
59 Inform err("mergeBins");
60 err << "Error in adaptiveBinningCostFunction: "
61 << "sumCountNorm = " << sumCountNorm
62 << ", sumWidth = " << sumWidth
63 << ", totalNumParticles = " << totalNumParticles << endl;
64 ippl::Comm->abort();
65 }
66
67 return sumCountNorm*log(sumCountNorm)*sumWidth // minimize shannon entropy as a basis
68 + wideBinPenalty * pow(sumWidth, 2) // >0 wants smallest possible bin
69 // <0 wants largest possible bin
70 // Use ^2 to make it reasonably sensitive
71 + binSizeBias * pow(penalty, 2) // bias towards desiredWidth
72 + sparse_penalty; // penalty for too few particles (specifically "distribution tails")
73 }
74
75
76 template <typename size_type, typename bin_index_type, typename value_type, bool UseDualView, class... Properties>
77 Histogram<size_type, bin_index_type, value_type, UseDualView, Properties...>::hindex_transform_type
78 Histogram<size_type, bin_index_type, value_type, UseDualView, Properties...>::mergeBins() {
79 Inform m("Histogram");
80
81 /*
82 The following if makes sure that the mergeBins function is only called if the histogram is
83 actually available on host!.
84 */
85 if constexpr (!std::is_same<typename hview_type::memory_space, Kokkos::HostSpace>::value) {
86 m << "This does not work if the histogram is not saved in a DualView, since it needs host access to the data." << endl;
87 ippl::Comm->abort();
88 return hindex_transform_type("error", 0);
89 }
90
91 // Get host views
92 hview_type oldHistHost = getHostView<hview_type>(histogram_m);
93 hwidth_view_type oldBinWHost = getHostView<hwidth_view_type>(binWidths_m);
94
95 const bin_index_type n = numBins_m;
96 if (n < 2) {
97 // Should not happen, since this function is to be called after generating a very fine histogram, e.g. 128 bins
98 m << "Not merging, since n_bins = " << n << " is too small!" << endl;
99 hindex_transform_type oldToNewBinsView("oldToNewBinsView", n);
100 Kokkos::deep_copy(oldToNewBinsView, 0);
101 return oldToNewBinsView;
102 }
103
104
105 IpplTimings::startTimer(bMergeBinsT);
106 // ----------------------------------------------------------------
107 // 1) Build prefix sums on the host
108 // prefixCount[k] = sum of counts in bins [0..k-1]
109 // prefixWidth[k] = sum of widths in bins [0..k-1]
110 // ----------------------------------------------------------------
111 hview_type prefixCount("prefixCount", n+1);
112 hwidth_view_type prefixWidth("prefixWidth", n+1);
113
114 prefixCount(0) = 0;
115 prefixWidth(0) = 0;
116
117 for (bin_index_type i = 0; i < n; ++i) {
118 prefixCount(i+1) = prefixCount(i) + oldHistHost(i);
119 prefixWidth(i+1) = prefixWidth(i) + oldBinWHost(i);
120 }
121 const size_type totalNumParticles = prefixCount(n); // Last value in prefixCount is the total number of particles
122
123
124 // ----------------------------------------------------------------
125 // 2) Dynamic Programming arrays:
126 // dp(k) = minimal total cost covering [0..k-1]
127 // prevIdx(k) = the index i that yields that minimal cost
128 // ----------------------------------------------------------------
129 Kokkos::View<value_type*, Kokkos::HostSpace> dp("dp", n+1);
130 // Kokkos::View<value_type*, Kokkos::HostSpace> dpMoment("dpMoment", n+1); // Store cumulative moments --> allow first order moment error estimation
131 Kokkos::View<int*, Kokkos::HostSpace> prevIdx("prevIdx", n+1);
132
133 // Initialize dp with something large.
134 // Note that it is divided by 2 to prevent and overflow should sumCount==0 and dp(i)!=0
135 // in the following dp loop (step 3).
136 // Also: /2 is fine, since dp(i) should never be "largeValue". Since this can only happen
137 // for empty binning configurations. Since the first bin always contains a particle (binning
138 // is done between min/max values) and since the function is short circuited should there be
139 // less than 2 particles, we can assume that dp(i) will never remain "largeValue" (note: i<k
140 // in the dp loop!).
141 value_type largeVal = std::numeric_limits<value_type>::max() / value_type(2);
142 for (bin_index_type k = 0; k <= n; ++k) {
143 dp(k) = largeVal;
144 // dpMoment(k) = 0;
145 prevIdx(k) = -1;
146 }
147 dp(0) = value_type(0); // 0 cost to cover an empty set
148 //m << "DP arrays initialized." << endl;
149
150
151 // ----------------------------------------------------------------
152 // 3) Fill dp with an O(n^2) algorithm to find the minimal total cost
153 // ----------------------------------------------------------------
154 for (bin_index_type k = 1; k <= n; ++k) {
155 // Try all possible start indices i for the last merged bin
156 for (bin_index_type i = 0; i < k; ++i) {
157 size_type sumCount = prefixCount(k) - prefixCount(i);
158 value_type sumWidth = prefixWidth(k) - prefixWidth(i);
159 value_type segCost = largeVal;
160 if (sumCount > 0) {
161 // Division by 0 if called with sumCount=0. Merge anyways --> set cost to a "very large" value instead
162 segCost = adaptiveBinningCostFunction(sumCount, sumWidth, totalNumParticles);
163 }
164 value_type candidate = dp(i) + segCost;
165 if (candidate < dp(k)) {
166 dp(k) = candidate;
167 prevIdx(k) = i;
168 }
169 }
170 }
171 //m << "DP arrays filled." << endl;
172
173 // dp(n) is the minimal total cost for covering [0..n-1].
174 value_type totalCost = dp(n);
175 if (totalCost >= largeVal) {
176 // Means everything was effectively "impossible" => fallback (should not happen!)
177 std::cerr << "Warning: no feasible merges found. Setting cost=0, no merges." << std::endl;
178 totalCost = value_type(0);
179 }
180
181
182 // ----------------------------------------------------------------
183 // 4) Reconstruct boundaries from prevIdx
184 // We start from k=n and step backwards until k=0
185 // ----------------------------------------------------------------
186 std::vector<int> boundaries;
187 boundaries.reserve(20); // should be sufficient for most use cases (usually aim for <10)
188 int cur = n;
189 // We'll just push them in reverse
190 while (cur > 0) {
191 int start = prevIdx(cur);
192 if (start < 0) {
193 std::cerr << "Error: prevIdx(" << cur << ") < 0. "
194 << "Merging not successful, aborted loop." << std::endl;
195 // fallback, break out
196 break;
197 }
198 boundaries.push_back(start);
199 cur = start;
200 }
201 // boundaries are reversed (e.g. [startK, i2, i1, 0])
202 std::reverse(boundaries.begin(), boundaries.end());
203 // final boundary is n
204 boundaries.push_back(n);
205
206 // Now the number of merged bins is boundaries.size() - 1
207 size_type mergedBinsCount = static_cast<size_type>(boundaries.size()) - 1;
208 //m << "Merged bins (based on minimal cost partition): " << mergedBinsCount << ". Minimal total cost = " << totalCost << endl;
209
210
211 // ----------------------------------------------------------------
212 // 5) Build new arrays for the merged bins
213 // ----------------------------------------------------------------
214 Kokkos::View<size_type*, Kokkos::HostSpace> newCounts("newCounts", mergedBinsCount);
215 Kokkos::View<value_type*, Kokkos::HostSpace> newWidths("newWidths", mergedBinsCount);
216
217 for (size_type j = 0; j < mergedBinsCount; ++j) {
218 bin_index_type start = boundaries[j];
219 bin_index_type end = boundaries[j+1] - 1; // inclusive
220 size_type sumCount = prefixCount(end+1) - prefixCount(start);
221 value_type sumWidth = prefixWidth(end+1) - prefixWidth(start);
222 newCounts(j) = sumCount;
223 newWidths(j) = sumWidth;
224 }
225 //m << "New bins computed." << endl;
226
227 // Also generate a lookup table that maps the old bin index to the new bin index (for "rebin")
228 hindex_transform_type oldToNewBinsView("oldToNewBinsView", n);
229 for (size_type j = 0; j < mergedBinsCount; ++j) {
230 bin_index_type startIdx = boundaries[j];
231 bin_index_type endIdx = boundaries[j+1]; // exclusive
232 for (bin_index_type i = startIdx; i < endIdx; ++i) {
233 oldToNewBinsView(i) = j;
234 }
235 }
236 //m << "Lookup table generated." << endl;
237
238
239 // ----------------------------------------------------------------
240 // 6) Overwrite the old histogram arrays with the new merged ones
241 // ----------------------------------------------------------------
242 numBins_m = static_cast<bin_index_type>(mergedBinsCount);
243 instantiateHistograms();
244 //m << "New histograms instantiated." << endl;
245
246 // Copy the data into the new Kokkos Views (on host)
247 hview_type newHistHost = getHostView<hview_type>(histogram_m);
248 hwidth_view_type newWidthHost = getHostView<hwidth_view_type>(binWidths_m);
249 Kokkos::deep_copy(newHistHost, newCounts);
250 Kokkos::deep_copy(newWidthHost, newWidths);
251 //m << "New histograms filled." << endl;
252
253
254 // ----------------------------------------------------------------
255 // 7) If using DualView, mark host as modified & sync
256 // ----------------------------------------------------------------
257 if constexpr (UseDualView) {
258 modify_host();
259 sync();
260
261 binWidths_m.modify_host();
262
263 IpplTimings::startTimer(bDeviceSyncronizationT);
264 binWidths_m.sync_device();
265 IpplTimings::stopTimer(bDeviceSyncronizationT);
266 //m << "Host views modified/synced." << endl;
267 }
268
269
270 // ----------------------------------------------------------------
271 // 8) Recompute postSum for the new merged histogram
272 // ----------------------------------------------------------------
273 initPostSum();
274 IpplTimings::stopTimer(bMergeBinsT);
275
276 m << "Re-binned from " << n << " bins down to "
277 << numBins_m << " bins. Total deviation cost = "
278 << totalCost << endl;
279
280 // Return the old->new index transform
281 return oldToNewBinsView;
282 }
283
284
285} // namespace ParticleBinning
286
287#endif // BINHISTO_TPP