IPPL (Independent Parallel Particle Layer)
IPPL
Loading...
Searching...
No Matches
LandauDampingMixedPrecision.cpp
Go to the documentation of this file.
1// Landau Damping Test, variant with mixed precision.
2// In order to avoid eccessive error when scattering from grid-points to the particles,
3// the charge and the scalar field are kept in double precision. The Mesh object is also
4// in double precision, as it leads to a higher precision without affecting memory negatively.
5// Everything else (namely the vector field E and the particle position) are in single
6// precision, since the choice increases memory saving, without losing precision.
7// Usage:
8// srun ./LandauDamping
9// <nx> [<ny>...] <Np> <Nt> <stype>
10// <lbthres> --overallocate <ovfactor> --info 10
11// nx = No. cell-centered points in the x-direction
12// ny... = No. cell-centered points in the y-, z-, ...-direction
13// Np = Total no. of macro-particles in the simulation
14// Nt = Number of time steps
15// stype = Field solver type e.g., FFT
16// lbthres = Load balancing threshold i.e., lbthres*100 is the maximum load imbalance
17// percentage which can be tolerated and beyond which
18// particle load balancing occurs. A value of 0.01 is good for many typical
19// simulations.
20// ovfactor = Over-allocation factor for the buffers used in the communication. Typical
21// values are 1.0, 2.0. Value 1.0 means no over-allocation.
22// Example:
23// srun ./LandauDamping 128 128 128 10000 10 FFT 0.01 2.0 --info 10
24//
25
26#include <Kokkos_MathematicalConstants.hpp>
27#include <Kokkos_MathematicalFunctions.hpp>
28#include <Kokkos_Random.hpp>
29#include <chrono>
30#include <iostream>
31#include <random>
32#include <set>
33#include <string>
34#include <vector>
35
36#include "Utility/IpplTimings.h"
37
38#include "ChargedParticles.hpp"
39
40constexpr unsigned Dim = 3;
41
42template <typename T>
43struct Newton1D {
44 double tol = 1e-12;
45 int max_iter = 20;
46 T pi = Kokkos::numbers::pi_v<T>;
47
48 T k, alpha, u;
49
50 KOKKOS_INLINE_FUNCTION Newton1D() {}
51
52 KOKKOS_INLINE_FUNCTION Newton1D(const T& k_, const T& alpha_, const T& u_)
53 : k(k_)
54 , alpha(alpha_)
55 , u(u_) {}
56
57 KOKKOS_INLINE_FUNCTION ~Newton1D() {}
58
59 KOKKOS_INLINE_FUNCTION T f(T& x) {
60 T F;
61 F = x + (alpha * (Kokkos::sin(k * x) / k)) - u;
62 return F;
63 }
64
65 KOKKOS_INLINE_FUNCTION T fprime(T& x) {
66 T Fprime;
67 Fprime = 1 + (alpha * Kokkos::cos(k * x));
68 return Fprime;
69 }
70
71 KOKKOS_FUNCTION
72 void solve(T& x) {
73 int iterations = 0;
74 while (iterations < max_iter && Kokkos::fabs(f(x)) > tol) {
75 x = x - (f(x) / fprime(x));
76 iterations += 1;
77 }
78 }
79};
80
81template <typename T, class GeneratorPool, unsigned Dim>
82struct generate_random {
84 using value_type = typename T::value_type;
85 // Output View for the random numbers
86 view_type x, v;
87
88 // The GeneratorPool
89 GeneratorPool rand_pool;
90
92
93 T k, minU, maxU;
94
95 // Initialize all members
96 generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, value_type& alpha_, T& k_,
97 T& minU_, T& maxU_)
98 : x(x_)
99 , v(v_)
100 , rand_pool(rand_pool_)
101 , alpha(alpha_)
102 , k(k_)
103 , minU(minU_)
104 , maxU(maxU_) {}
105
106 KOKKOS_INLINE_FUNCTION void operator()(const size_t i) const {
107 // Get a random number state from the pool for the active thread
108 typename GeneratorPool::generator_type rand_gen = rand_pool.get_state();
109
110 value_type u;
111 for (unsigned d = 0; d < Dim; ++d) {
112 u = rand_gen.drand(minU[d], maxU[d]);
113 x(i)[d] = u / (1 + alpha);
114 Newton1D<value_type> solver(k[d], alpha, u);
115 solver.solve(x(i)[d]);
116 v(i)[d] = rand_gen.normal(0.0, 1.0);
117 }
118
119 // Give the state back, which will allow another thread to acquire it
120 rand_pool.free_state(rand_gen);
121 }
122};
123
124float CDF(const float& x, const float& alpha, const float& k) {
125 float cdf = x + (alpha / k) * std::sin(k * x);
126 return cdf;
127}
128
129KOKKOS_FUNCTION
130double PDF(const Vector_t<double, Dim>& xvec, const double& alpha, const Vector_t<double, Dim>& kw,
131 const unsigned Dim) {
132 double pdf = 1.0;
133
134 for (unsigned d = 0; d < Dim; ++d) {
135 pdf *= (1.0 + alpha * Kokkos::cos(kw[d] * xvec[d]));
136 }
137 return pdf;
138}
139
140const char* TestName = "LandauDamping";
141
142int main(int argc, char* argv[]) {
143 ippl::initialize(argc, argv);
144 {
145 Inform msg("LandauDamping");
146 Inform msg2all("LandauDamping", INFORM_ALL_NODES);
147
148 auto start = std::chrono::high_resolution_clock::now();
149 int arg = 1;
150
152 for (unsigned d = 0; d < Dim; d++) {
153 nr[d] = std::atoi(argv[arg++]);
154 }
155
156 static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total");
157 static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation");
158 static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData");
159 static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity");
160 static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition");
161 static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update");
162 static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup");
163 static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("solve");
164 static IpplTimings::TimerRef domainDecomposition = IpplTimings::getTimer("loadBalance");
165
166 IpplTimings::startTimer(mainTimer);
167
168 const size_type totalP = std::atoll(argv[arg++]);
169 const unsigned int nt = std::atoi(argv[arg++]);
170
171 msg << "Landau damping" << endl
172 << "nt " << nt << " Np= " << totalP << " grid = " << nr << endl;
173
174 using bunch_type = ChargedParticles<PLayout_t<float, Dim>, float, Dim>;
175
176 std::unique_ptr<bunch_type> P;
177
178 ippl::NDIndex<Dim> domain;
179 for (unsigned i = 0; i < Dim; i++) {
180 domain[i] = ippl::Index(nr[i]);
181 }
182
183 std::array<bool, Dim> isParallel;
184 isParallel.fill(true);
185
186 // create mesh and layout objects for this problem domain
187 Vector_t<float, Dim> kw = 0.5;
188 float alpha = 0.05;
189 Vector_t<double, Dim> rmin(0.0);
190 Vector_t<double, Dim> rmax = 2 * pi / kw;
191
192 Vector_t<double, Dim> hr = rmax / nr;
193 // Q = -\int\int f dx dv
194 double Q = std::reduce(rmax.begin(), rmax.end(), -1., std::multiplies<double>());
195 Vector_t<double, Dim> origin = rmin;
196 const double dt = 0.5 * hr[0];
197
198 const bool isAllPeriodic = true;
199 Mesh_t<Dim> mesh(domain, hr, origin);
200 FieldLayout_t<Dim> FL(MPI_COMM_WORLD, domain, isParallel, isAllPeriodic);
201 PLayout_t<float, Dim> PL(FL, mesh);
202
203 std::string solver = argv[arg++];
204 P = std::make_unique<bunch_type>(PL, hr, rmin, rmax, isParallel, Q, solver);
205
206 P->nr_m = nr;
207
208 P->initializeFields(mesh, FL);
209
210 P->initSolver();
211 P->time_m = 0.0;
212 P->loadbalancethreshold_m = std::atof(argv[arg++]);
213
214 bool isFirstRepartition;
215
216 if ((P->loadbalancethreshold_m != 1.0) && (ippl::Comm->size() > 1)) {
217 msg << "Starting first repartition" << endl;
218 IpplTimings::startTimer(domainDecomposition);
219 isFirstRepartition = true;
220 const ippl::NDIndex<Dim>& lDom = FL.getLocalNDIndex();
221 const int nghost = P->rho_m.getNghost();
222 auto rhoview = P->rho_m.getView();
223
224 using index_array_type = typename ippl::RangePolicy<Dim>::index_array_type;
226 "Assign initial rho based on PDF", ippl::getRangePolicy(rhoview, nghost),
227 KOKKOS_LAMBDA(const index_array_type& args) {
228 // local to global index conversion
229 Vector_t<double, Dim> xvec = (args + lDom.first() - nghost + 0.5) * hr + origin;
230
231 // ippl::apply accesses the view at the given indices and obtains a
232 // reference; see src/Expression/IpplOperations.h
233 ippl::apply(rhoview, args) = PDF(xvec, alpha, kw, Dim);
234 });
235
236 Kokkos::fence();
237
238 P->initializeORB(FL, mesh);
239 P->repartition(FL, mesh, isFirstRepartition);
240 IpplTimings::stopTimer(domainDecomposition);
241 }
242
243 msg << "First domain decomposition done" << endl;
244 IpplTimings::startTimer(particleCreation);
245
246 typedef ippl::detail::RegionLayout<float, Dim, Mesh_t<Dim>>::uniform_type RegionLayout_t;
247 const RegionLayout_t& RLayout = PL.getRegionLayout();
248 const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions();
249 Vector_t<float, Dim> Nr, Dr, minU, maxU;
250 int myRank = ippl::Comm->rank();
251 float factor = 1;
252 for (unsigned d = 0; d < Dim; ++d) {
253 Nr[d] = CDF(Regions(myRank)[d].max(), alpha, kw[d])
254 - CDF(Regions(myRank)[d].min(), alpha, kw[d]);
255 Dr[d] = CDF(rmax[d], alpha, kw[d]) - CDF(rmin[d], alpha, kw[d]);
256 minU[d] = CDF(Regions(myRank)[d].min(), alpha, kw[d]);
257 maxU[d] = CDF(Regions(myRank)[d].max(), alpha, kw[d]);
258 factor *= Nr[d] / Dr[d];
259 }
260
261 size_type nloc = (size_type)(factor * totalP);
262 size_type Total_particles = 0;
263
264 MPI_Allreduce(&nloc, &Total_particles, 1, MPI_UNSIGNED_LONG, MPI_SUM,
265 ippl::Comm->getCommunicator());
266
267 int rest = (int)(totalP - Total_particles);
268
269 if (ippl::Comm->rank() < rest) {
270 ++nloc;
271 }
272
273 P->create(nloc);
274 Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100 * ippl::Comm->rank()));
275 Kokkos::parallel_for(
276 nloc, generate_random<Vector_t<float, Dim>, Kokkos::Random_XorShift64_Pool<>, Dim>(
277 P->R.getView(), P->P.getView(), rand_pool64, alpha, kw, minU, maxU));
278
279 Kokkos::fence();
280 ippl::Comm->barrier();
281 IpplTimings::stopTimer(particleCreation);
282
283 P->q = P->Q_m / totalP;
284 msg << "particles created and initial conditions assigned " << endl;
285 isFirstRepartition = false;
286 // The update after the particle creation is not needed as the
287 // particles are generated locally
288
289 IpplTimings::startTimer(DummySolveTimer);
290 P->rho_m = 0.0;
291 P->runSolver();
292 IpplTimings::stopTimer(DummySolveTimer);
293
294 P->scatterCIC(totalP, 0, hr);
295
296 IpplTimings::startTimer(SolveTimer);
297 P->runSolver();
298 IpplTimings::stopTimer(SolveTimer);
299
300 auto Eview = P->getEMirror();
301
302 // gather E field
303 P->gatherCIC();
304
305 IpplTimings::startTimer(dumpDataTimer);
306 P->dumpLandau(Eview);
307 P->gatherStatistics(totalP);
308 // P->dumpLocalDomains(FL, 0);
309 IpplTimings::stopTimer(dumpDataTimer);
310
311 // begin main timestep loop
312 msg << "Starting iterations ..." << endl;
313 for (unsigned int it = 0; it < nt; it++) {
314 // LeapFrog time stepping https://en.wikipedia.org/wiki/Leapfrog_integration
315 // Here, we assume a constant charge-to-mass ratio of -1 for
316 // all the particles hence eliminating the need to store mass as
317 // an attribute
318 // kick
319
321 P->P = P->P - 0.5 * dt * P->E;
323
324 // drift
326 P->R = P->R + dt * P->P;
328
329 // Since the particles have moved spatially update them to correct processors
330 IpplTimings::startTimer(updateTimer);
331 P->update();
332 IpplTimings::stopTimer(updateTimer);
333
334 // Domain Decomposition
335 if (P->balance(totalP, it + 1)) {
336 msg << "Starting repartition" << endl;
337 IpplTimings::startTimer(domainDecomposition);
338 P->repartition(FL, mesh, isFirstRepartition);
339 IpplTimings::stopTimer(domainDecomposition);
340 // IpplTimings::startTimer(dumpDataTimer);
341 // P->dumpLocalDomains(FL, it+1);
342 // IpplTimings::stopTimer(dumpDataTimer);
343 }
344
345 // scatter the charge onto the underlying grid
346 P->scatterCIC(totalP, it + 1, hr);
347
348 // Field solve
349 IpplTimings::startTimer(SolveTimer);
350 P->runSolver();
351 IpplTimings::stopTimer(SolveTimer);
352
353 P->updateEMirror(Eview);
354
355 // gather E field
356 P->gatherCIC();
357
358 // kick
360 P->P = P->P - 0.5 * dt * P->E;
362
363 P->time_m += dt;
364 IpplTimings::startTimer(dumpDataTimer);
365 P->dumpLandau(Eview);
366 P->gatherStatistics(totalP);
367 IpplTimings::stopTimer(dumpDataTimer);
368 msg << "Finished time step: " << it + 1 << " time: " << P->time_m << endl;
369 }
370
371 msg << "LandauDamping: End." << endl;
372 IpplTimings::stopTimer(mainTimer);
374 IpplTimings::print(std::string("timing.dat"));
375 auto end = std::chrono::high_resolution_clock::now();
376
377 std::chrono::duration<double> time_chrono =
378 std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
379 std::cout << "Elapsed time: " << time_chrono.count() << std::endl;
380 }
382
383 return 0;
384}
int main(int argc, char *argv[])
float CDF(const float &x, const float &alpha, const float &k)
KOKKOS_FUNCTION double PDF(const Vector_t< double, Dim > &xvec, const double &alpha, const Vector_t< double, Dim > &kw, const unsigned Dim)
constexpr unsigned Dim
const double pi
constexpr unsigned Dim
ippl::detail::size_type size_type
Definition datatypes.h:23
const char * TestName
ippl::FieldLayout< Dim > FieldLayout_t
Definition datatypes.h:21
ippl::Vector< T, Dim > Vector_t
Definition datatypes.h:38
typename ippl::ParticleSpatialLayout< T, Dim, Mesh_t< Dim > > PLayout_t
Definition datatypes.h:15
ippl::UniformCartesian< double, Dim > Mesh_t
Definition datatypes.h:12
Inform & endl(Inform &inf)
Definition Inform.cpp:42
#define INFORM_ALL_NODES
Definition Inform.h:38
void initialize(int &argc, char *argv[], MPI_Comm comm)
Definition Ippl.cpp:16
void finalize()
Definition Ippl.cpp:94
KOKKOS_INLINE_FUNCTION constexpr decltype(auto) apply(const View &view, const Coords &coords)
RangePolicy< View::rank, typenameView::execution_space, PolicyArgs... >::policy_type getRangePolicy(const View &view, int shift=0)
void parallel_for(const std::string &name, const ExecPolicy &policy, const FunctorType &functor)
std::unique_ptr< mpi::Communicator > Comm
Definition Ippl.h:22
const NDIndex_t & getLocalNDIndex() const
KOKKOS_INLINE_FUNCTION Vector< int, Dim > first() const
Definition NDIndex.hpp:170
const host_mirror_type gethLocalRegions() const
KOKKOS_INLINE_FUNCTION constexpr iterator begin()
Definition Vector.hpp:160
KOKKOS_INLINE_FUNCTION constexpr iterator end()
Definition Vector.hpp:165
Kokkos::View< typename NPtr< T, Dim >::type, Properties... > view_type
Definition ViewTypes.h:45
Timing::TimerRef TimerRef
static TimerRef getTimer(const char *nm)
static void stopTimer(TimerRef t)
static void print()
static void startTimer(TimerRef t)
::ippl::Vector< index_type, Dim > index_array_type
KOKKOS_FUNCTION void solve(T &x)
KOKKOS_INLINE_FUNCTION Newton1D(const T &k_, const T &alpha_, const T &u_)
KOKKOS_INLINE_FUNCTION T f(T &x)
KOKKOS_INLINE_FUNCTION ~Newton1D()
KOKKOS_INLINE_FUNCTION Newton1D()
KOKKOS_INLINE_FUNCTION T fprime(T &x)
KOKKOS_INLINE_FUNCTION void operator()(const size_t i) const
typename T::value_type value_type
generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, value_type &alpha_, T &k_, T &minU_, T &maxU_)
typename ippl::detail::ViewType< T, 1 >::view_type view_type