IPPL (Independent Parallel Particle Layer)
IPPL
Loading...
Searching...
No Matches
FFTOpenPoissonSolver.hpp
Go to the documentation of this file.
1//
2// Class FFTOpenPoissonSolver
3// FFT-based Poisson Solver for open boundaries.
4// Solves laplace(phi) = -rho, and E = -grad(phi).
5//
6//
7
8// Communication specific functions (pack and unpack).
9template <typename Tb, typename Tf>
10void pack(const ippl::NDIndex<3> intersect, Kokkos::View<Tf***>& view,
11 ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
13 Kokkos::View<Tb*>& buffer = fd.buffer;
14
15 size_t size = intersect.size();
16 nsends = size;
17 if (buffer.size() < size) {
18 const int overalloc = ippl::Comm->getDefaultOverallocation();
19 Kokkos::realloc(buffer, size * overalloc);
20 }
21
22 const int first0 = intersect[0].first() + nghost - ldom[0].first();
23 const int first1 = intersect[1].first() + nghost - ldom[1].first();
24 const int first2 = intersect[2].first() + nghost - ldom[2].first();
25
26 const int last0 = intersect[0].last() + nghost - ldom[0].first() + 1;
27 const int last1 = intersect[1].last() + nghost - ldom[1].first() + 1;
28 const int last2 = intersect[2].last() + nghost - ldom[2].first() + 1;
29
30 using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
31 // This type casting to long int is necessary as otherwise Kokkos complains for
32 // intel compilers
33 Kokkos::parallel_for(
34 "pack()",
35 mdrange_type({first0, first1, first2}, {(long int)last0, (long int)last1, (long int)last2}),
36 KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
37 const int ig = i - first0;
38 const int jg = j - first1;
39 const int kg = k - first2;
40
41 int l = ig + jg * intersect[0].length()
42 + kg * intersect[1].length() * intersect[0].length();
43
44 Kokkos::complex<Tb> val = view(i, j, k);
45
46 buffer(l) = Kokkos::real(val);
47 });
48 Kokkos::fence();
49}
50
51template <int tensorRank, typename Tb, typename Tf>
52void unpack_impl(const ippl::NDIndex<3> intersect, const Kokkos::View<Tf***>& view,
53 ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
54 size_t dim1 = 0, size_t dim2 = 0, bool x = false, bool y = false, bool z = false) {
55 Kokkos::View<Tb*>& buffer = fd.buffer;
56
57 const int first0 = intersect[0].first() + nghost - ldom[0].first();
58 const int first1 = intersect[1].first() + nghost - ldom[1].first();
59 const int first2 = intersect[2].first() + nghost - ldom[2].first();
60
61 const int last0 = intersect[0].last() + nghost - ldom[0].first() + 1;
62 const int last1 = intersect[1].last() + nghost - ldom[1].first() + 1;
63 const int last2 = intersect[2].last() + nghost - ldom[2].first() + 1;
64
65 using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
66 Kokkos::parallel_for(
67 "pack()", mdrange_type({first0, first1, first2}, {last0, last1, last2}),
68 KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
69 int ig = i - first0;
70 int jg = j - first1;
71 int kg = k - first2;
72
73 ig = x * (intersect[0].length() - 2 * ig - 1) + ig;
74 jg = y * (intersect[1].length() - 2 * jg - 1) + jg;
75 kg = z * (intersect[2].length() - 2 * kg - 1) + kg;
76
77 int l = ig + jg * intersect[0].length()
78 + kg * intersect[1].length() * intersect[0].length();
79
80 ippl::detail::ViewAccess<tensorRank, decltype(view)>::get(view, dim1, dim2, i, j, k) =
81 buffer(l);
82 });
83 Kokkos::fence();
84}
85
86template <typename Tb, typename Tf>
87void unpack(const ippl::NDIndex<3> intersect, const Kokkos::View<Tf***>& view,
88 ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
89 bool x = false, bool y = false, bool z = false) {
90 unpack_impl<0, Tb, Tf>(intersect, view, fd, nghost, ldom, 0, 0, x, y, z);
91}
92
93template <typename Tb, typename Tf>
94void unpack(const ippl::NDIndex<3> intersect, const Kokkos::View<ippl::Vector<Tf, 3>***>& view,
95 size_t dim1, ippl::detail::FieldBufferData<Tb>& fd, int nghost,
96 const ippl::NDIndex<3> ldom) {
97 unpack_impl<1, Tb, ippl::Vector<Tf, 3>>(intersect, view, fd, nghost, ldom, dim1);
98}
99
100template <typename Tb, typename Tf>
101void unpack(const ippl::NDIndex<3> intersect,
102 const Kokkos::View<ippl::Vector<ippl::Vector<Tf, 3>, 3>***>& view,
103 ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
104 size_t dim1, size_t dim2) {
105 unpack_impl<2, Tb, ippl::Vector<ippl::Vector<Tf, 3>, 3>>(intersect, view, fd, nghost, ldom,
106 dim1, dim2);
107}
108
109template <typename Tb, typename Tf, unsigned Dim>
110void solver_send(int TAG, int id, int i, const ippl::NDIndex<Dim> intersection,
111 const ippl::NDIndex<Dim> ldom, int nghost, Kokkos::View<Tf***>& view,
112 ippl::detail::FieldBufferData<Tb>& fd, std::vector<MPI_Request>& requests) {
113 using memory_space = typename Kokkos::View<Tf***>::memory_space;
114
115 requests.resize(requests.size() + 1);
116
118 pack(intersection, view, fd, nghost, ldom, nsends);
119
120 // Buffer message indicates the domain intersection (x, y, z, xy, yz, xz, xyz).
122 ippl::Comm->getBuffer<memory_space, Tf>(nsends);
123
124 int tag = TAG + id;
125
126 ippl::Comm->isend(i, tag, fd, *buf, requests.back(), nsends);
127 buf->resetWritePos();
128}
129
130template <typename Tb, typename Tf, unsigned Dim>
131void solver_recv(int TAG, int id, int i, const ippl::NDIndex<Dim> intersection,
132 const ippl::NDIndex<Dim> ldom, int nghost, Kokkos::View<Tf***>& view,
133 ippl::detail::FieldBufferData<Tb>& fd, bool x = false, bool y = false,
134 bool z = false) {
135 using memory_space = typename Kokkos::View<Tf***>::memory_space;
136
138 nrecvs = intersection.size();
139
140 // Buffer message indicates the domain intersection (x, y, z, xy, yz, xz, xyz).
142 ippl::Comm->getBuffer<memory_space, Tf>(nrecvs);
143
144 int tag = TAG + id;
145
146 ippl::Comm->recv(i, tag, fd, *buf, nrecvs * sizeof(Tf), nrecvs);
147 buf->resetReadPos();
148
149 unpack(intersection, view, fd, nghost, ldom, x, y, z);
150}
151
152namespace ippl {
153
155 // constructor and destructor
156 template <typename FieldLHS, typename FieldRHS>
158 : Base()
159 , mesh_mp(nullptr)
160 , layout_mp(nullptr)
161 , mesh2_m(nullptr)
162 , layout2_m(nullptr)
163 , meshComplex_m(nullptr)
164 , layoutComplex_m(nullptr)
165 , mesh4_m(nullptr)
166 , layout4_m(nullptr)
167 , mesh2n1_m(nullptr)
168 , layout2n1_m(nullptr)
169 , isGradFD_m(false) {
171 }
172
173 template <typename FieldLHS, typename FieldRHS>
175 ParameterList& params)
176 : mesh_mp(nullptr)
177 , layout_mp(nullptr)
178 , mesh2_m(nullptr)
179 , layout2_m(nullptr)
180 , meshComplex_m(nullptr)
181 , layoutComplex_m(nullptr)
182 , mesh4_m(nullptr)
183 , layout4_m(nullptr)
184 , mesh2n1_m(nullptr)
185 , layout2n1_m(nullptr)
186 , isGradFD_m(false) {
187 using T = typename FieldLHS::value_type::value_type;
188 static_assert(std::is_floating_point<T>::value, "Not a floating point type");
189
191 this->params_m.merge(params);
192 this->params_m.update("output_type", Base::SOL);
193
194 this->setRhs(rhs);
195 }
196
197 template <typename FieldLHS, typename FieldRHS>
199 ParameterList& params)
200 : mesh_mp(nullptr)
201 , layout_mp(nullptr)
202 , mesh2_m(nullptr)
203 , layout2_m(nullptr)
204 , meshComplex_m(nullptr)
205 , layoutComplex_m(nullptr)
206 , mesh4_m(nullptr)
207 , layout4_m(nullptr)
208 , mesh2n1_m(nullptr)
209 , layout2n1_m(nullptr)
210 , isGradFD_m(false) {
211 using T = typename FieldLHS::value_type::value_type;
212 static_assert(std::is_floating_point<T>::value, "Not a floating point type");
213
215 this->params_m.merge(params);
216
217 this->setLhs(lhs);
218 this->setRhs(rhs);
219 }
220
222 // override setRhs to call class-specific initialization
223 template <typename FieldLHS, typename FieldRHS>
235
237 // allows user to set gradient of phi = Efield instead of spectral
238 // calculation of Efield (which uses FFTs)
239
240 template <typename FieldLHS, typename FieldRHS>
242 // get the output type (sol, grad, or sol & grad)
243 const int out = this->params_m.template get<int>("output_type");
244
245 if (out != Base::SOL_AND_GRAD) {
246 throw IpplException(
247 "FFTOpenPoissonSolver::setGradFD()",
248 "Cannot use gradient for Efield computation unless output type is SOL_AND_GRAD");
249 } else {
250 isGradFD_m = true;
251 }
252 }
253
255 // initializeFields method, called in constructor
256
257 template <typename FieldLHS, typename FieldRHS>
259 // get algorithm and hessian flag from parameter list
260 const int alg = this->params_m.template get<int>("algorithm");
261 const bool hessian = this->params_m.template get<bool>("hessian");
262
263 // first check if valid algorithm choice
264 if ((alg != Algorithm::VICO) && (alg != Algorithm::HOCKNEY)
265 && (alg != Algorithm::BIHARMONIC) && (alg != Algorithm::DCT_VICO)) {
266 throw IpplException("FFTOpenPoissonSolver::initializeFields()",
267 "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
268 "supported for open BCs");
269 }
270
271 // get layout and mesh
272 layout_mp = &(this->rhs_mp->getLayout());
273 mesh_mp = &(this->rhs_mp->get_mesh());
274 mpi::Communicator comm = layout_mp->comm;
275
276 // get mesh spacing and origin
277 hr_m = mesh_mp->getMeshSpacing();
278 vector_type origin = mesh_mp->getOrigin();
279
280 // create domain for the real fields
281 domain_m = layout_mp->getDomain();
282
283 // get the mesh spacings and sizes for each dimension
284 for (unsigned int i = 0; i < Dim; ++i) {
285 nr_m[i] = domain_m[i].length();
286
287 // create the doubled domain for the FFT procedure
288 domain2_m[i] = Index(2 * nr_m[i]);
289 }
290
291 // define decomposition (parallel / serial)
292 std::array<bool, Dim> isParallel = layout_mp->isParallel();
293
294 // create double sized mesh and layout objects using the previously defined domain2_m
295 using mesh_type = typename lhs_type::Mesh_t;
296 mesh2_m = std::unique_ptr<mesh_type>(new mesh_type(domain2_m, hr_m, origin));
297 layout2_m = std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domain2_m, isParallel));
298
299 // create the domain for the transformed (complex) fields
300 // since we use HeFFTe for the transforms it doesn't require permuting to the right
301 // one of the dimensions has only (n/2 +1) as our original fields are fully real
302 // the dimension is given by the user via r2c_direction
303 unsigned int RCDirection = this->params_m.template get<int>("r2c_direction");
304 for (unsigned int i = 0; i < Dim; ++i) {
305 if (i == RCDirection) {
306 domainComplex_m[RCDirection] = Index(nr_m[RCDirection] + 1);
307 } else {
308 domainComplex_m[i] = Index(2 * nr_m[i]);
309 }
310 }
311
312 // create mesh and layout for the real to complex FFT transformed fields
313 meshComplex_m = std::unique_ptr<mesh_type>(new mesh_type(domainComplex_m, hr_m, origin));
315 std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domainComplex_m, isParallel));
316
317 // initialize fields
318 storage_field.initialize(*mesh2_m, *layout2_m);
321
322 int out = this->params_m.template get<int>("output_type");
323 if (((out == Base::GRAD || out == Base::SOL_AND_GRAD) && !isGradFD_m) || hessian) {
324 temp_m.initialize(*meshComplex_m, *layoutComplex_m);
325 }
326
327 if (hessian) {
328 hess_m.initialize(*mesh_mp, *layout_mp);
329 }
330
331 // create the FFT object
332 fft_m = std::make_unique<FFT_t>(*layout2_m, *layoutComplex_m, this->params_m);
333
334 // if Vico, also need to create mesh and layout for 4N Fourier domain
335 // on this domain, the truncated Green's function is defined
336 // also need to create the 4N complex grid, on which precomputation step done
337 if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) {
338 // start a timer
339 static IpplTimings::TimerRef initialize_vico =
340 IpplTimings::getTimer("Initialize: extra Vico");
341 IpplTimings::startTimer(initialize_vico);
342
343 for (unsigned int i = 0; i < Dim; ++i) {
344 domain4_m[i] = Index(4 * nr_m[i]);
345 }
346
347 // 4N grid
348 using mesh_type = typename lhs_type::Mesh_t;
349 mesh4_m = std::unique_ptr<mesh_type>(new mesh_type(domain4_m, hr_m, origin));
350 layout4_m =
351 std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domain4_m, isParallel));
352
353 // initialize fields
354 grnL_m.initialize(*mesh4_m, *layout4_m);
355
356 // create a Complex-to-Complex FFT object to transform for layout4
357 fft4n_m = std::make_unique<FFT<CCTransform, CxField_gt>>(*layout4_m, this->params_m);
358
359 IpplTimings::stopTimer(initialize_vico);
360 }
361
362 // if DCT_VICO, need 2N+1 mesh, layout, domain, and green's function field for
363 // precomputation
364 if (alg == Algorithm::DCT_VICO) {
365 // start a timer
366 static IpplTimings::TimerRef initialize_vico =
367 IpplTimings::getTimer("Initialize: extra Vico");
368 IpplTimings::startTimer(initialize_vico);
369
370 // 2N+1 domain for DCT
371 for (unsigned int i = 0; i < Dim; ++i) {
372 domain2n1_m[i] = Index(2 * nr_m[i] + 1);
373 }
374
375 // 2N+1 grid
376 mesh2n1_m = std::unique_ptr<mesh_type>(new mesh_type(domain2n1_m, hr_m, origin));
378 std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domain2n1_m, isParallel));
379
380 // initialize fields
381 grn2n1_m.initialize(*mesh2n1_m, *layout2n1_m); // 2N+1 grnL
382
383 // create real to real FFT object for 2N+1 grid
384 fft2n1_m = std::make_unique<FFT<Cos1Transform, Field_t>>(*layout2n1_m, this->params_m);
385
386 IpplTimings::stopTimer(initialize_vico);
387 }
388
389 // these are fields that are used for calculating the Green's function for Hockney
390 if (alg == Algorithm::HOCKNEY) {
391 // start a timer
392 static IpplTimings::TimerRef initialize_hockney =
393 IpplTimings::getTimer("Initialize: extra Hockney");
394 IpplTimings::startTimer(initialize_hockney);
395
396 for (unsigned int d = 0; d < Dim; ++d) {
397 grnIField_m[d].initialize(*mesh2_m, *layout2_m);
398
399 // get number of ghost points and the Kokkos view to iterate over field
400 auto view = grnIField_m[d].getView();
401 const int nghost = grnIField_m[d].getNghost();
402 const auto& ldom = layout2_m->getLocalNDIndex();
403
404 // the length of the physical domain
405 const int size = nr_m[d];
406
407 // Kokkos parallel for loop to initialize grnIField[d]
408 switch (d) {
409 case 0:
410 Kokkos::parallel_for(
411 "Helper index Green field initialization",
412 grnIField_m[d].getFieldRangePolicy(),
413 KOKKOS_LAMBDA(const int i, const int j, const int k) {
414 // go from local indices to global
415 const int ig = i + ldom[0].first() - nghost;
416 const int jg = j + ldom[1].first() - nghost;
417 const int kg = k + ldom[2].first() - nghost;
418
419 // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere
420 const bool outsideN = (ig >= size);
421 view(i, j, k) =
422 (2 * size * outsideN - ig) * (2 * size * outsideN - ig);
423
424 // add 1.0 if at (0,0,0) to avoid singularity
425 const bool isOrig = ((ig == 0) && (jg == 0) && (kg == 0));
426 view(i, j, k) += isOrig * 1.0;
427 });
428 break;
429 case 1:
430 Kokkos::parallel_for(
431 "Helper index Green field initialization",
432 grnIField_m[d].getFieldRangePolicy(),
433 KOKKOS_LAMBDA(const int i, const int j, const int k) {
434 // go from local indices to global
435 const int jg = j + ldom[1].first() - nghost;
436
437 // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere
438 const bool outsideN = (jg >= size);
439 view(i, j, k) =
440 (2 * size * outsideN - jg) * (2 * size * outsideN - jg);
441 });
442 break;
443 case 2:
444 Kokkos::parallel_for(
445 "Helper index Green field initialization",
446 grnIField_m[d].getFieldRangePolicy(),
447 KOKKOS_LAMBDA(const int i, const int j, const int k) {
448 // go from local indices to global
449 const int kg = k + ldom[2].first() - nghost;
450
451 // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere
452 const bool outsideN = (kg >= size);
453 view(i, j, k) =
454 (2 * size * outsideN - kg) * (2 * size * outsideN - kg);
455 });
456 break;
457 }
458 }
459 IpplTimings::stopTimer(initialize_hockney);
460 }
461
462 static IpplTimings::TimerRef warmup = IpplTimings::getTimer("Warmup");
464
465 // "empty" transforms to warmup all the FFTs
466 fft_m->warmup(rho2_mr, rho2tr_m);
467 if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) {
468 fft4n_m->warmup(grnL_m);
469 }
470 if (alg == Algorithm::DCT_VICO) {
471 fft2n1_m->warmup(grn2n1_m);
472 }
473
475
476 rho2_mr = 0.0;
477 rho2tr_m = 0.0;
478 grnL_m = 0.0;
479 grn2n1_m = 0.0;
480
481 // call greensFunction and we will get the transformed G in the class attribute
482 // this is done in initialization so that we already have the precomputed fct
483 // for all timesteps (green's fct will only change if mesh size changes)
484 static IpplTimings::TimerRef ginit = IpplTimings::getTimer("Green Init");
488 };
489
491 // compute electric potential by solving Poisson's eq given a field rho and mesh spacings hr
492 template <typename FieldLHS, typename FieldRHS>
494 // start a timer
497
498 // get the output type (sol, grad, or sol & grad)
499 const int out = this->params_m.template get<int>("output_type");
500
501 // get the algorithm (hockney, vico, or biharmonic)
502 const int alg = this->params_m.template get<int>("algorithm");
503
504 // get hessian flag (if true, we compute the Hessian)
505 const bool hessian = this->params_m.template get<bool>("hessian");
506
507 // set the mesh & spacing, which may change each timestep
508 mesh_mp = &(this->rhs_mp->get_mesh());
509
510 // check whether the mesh spacing has changed with respect to the old one
511 // if yes, update and set green flag to true
512 bool green = false;
513 for (unsigned int i = 0; i < Dim; ++i) {
514 if (hr_m[i] != mesh_mp->getMeshSpacing(i)) {
515 hr_m[i] = mesh_mp->getMeshSpacing(i);
516 green = true;
517 }
518 }
519
520 // set mesh spacing on the other grids again
521 mesh2_m->setMeshSpacing(hr_m);
522 meshComplex_m->setMeshSpacing(hr_m);
523
524 // field object on the doubled grid; zero-padded
525 rho2_mr = 0.0;
526
527 // start a timer
528 static IpplTimings::TimerRef stod = IpplTimings::getTimer("Solve: Physical to double");
530
531 // store rho (RHS) in the lower left quadrant of the doubled grid
532 // with or without communication (if only 1 rank)
533
534 const int ranks = Comm->size();
535
536 auto view2 = rho2_mr.getView();
537 auto view1 = this->rhs_mp->getView();
538
539 const int nghost2 = rho2_mr.getNghost();
540 const int nghost1 = this->rhs_mp->getNghost();
541
542 const auto& ldom2 = layout2_m->getLocalNDIndex();
543 const auto& ldom1 = layout_mp->getLocalNDIndex();
544
545 if (ranks > 1) {
546 // COMMUNICATION
547 const auto& lDomains2 = layout2_m->getHostLocalDomains();
548
549 // send
550 std::vector<MPI_Request> requests(0);
551
552 for (int i = 0; i < ranks; ++i) {
553 if (lDomains2[i].touches(ldom1)) {
554 auto intersection = lDomains2[i].intersect(ldom1);
555
556 solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom1, nghost1, view1,
557 fd_m, requests);
558 }
559 }
560
561 // receive
562 const auto& lDomains1 = layout_mp->getHostLocalDomains();
563
564 for (int i = 0; i < ranks; ++i) {
565 if (lDomains1[i].touches(ldom2)) {
566 auto intersection = lDomains1[i].intersect(ldom2);
567
569 nrecvs = intersection.size();
570
571 buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
572
573 Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf, nrecvs * sizeof(Trhs), nrecvs);
574 buf->resetReadPos();
575
576 unpack(intersection, view2, fd_m, nghost2, ldom2);
577 }
578 }
579
580 // wait for all messages to be received
581 if (requests.size() > 0) {
582 MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
583 }
584 ippl::Comm->freeAllBuffers();
585
586 } else {
587 Kokkos::parallel_for(
588 "Write rho on the doubled grid", this->rhs_mp->getFieldRangePolicy(),
589 KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
590 const size_t ig2 = i + ldom2[0].first() - nghost2;
591 const size_t jg2 = j + ldom2[1].first() - nghost2;
592 const size_t kg2 = k + ldom2[2].first() - nghost2;
593
594 const size_t ig1 = i + ldom1[0].first() - nghost1;
595 const size_t jg1 = j + ldom1[1].first() - nghost1;
596 const size_t kg1 = k + ldom1[2].first() - nghost1;
597
598 // write physical rho on [0,N-1] of doubled field
599 const bool isQuadrant1 = ((ig1 == ig2) && (jg1 == jg2) && (kg1 == kg2));
600 view2(i, j, k) = view1(i, j, k) * isQuadrant1;
601 });
602 }
603
605
606 // start a timer
607 static IpplTimings::TimerRef fftrho = IpplTimings::getTimer("FFT: Rho");
609
610 // forward FFT of the charge density field on doubled grid
611 fft_m->transform(FORWARD, rho2_mr, rho2tr_m);
612
614
615 // call greensFunction to recompute if the mesh spacing has changed
616 if (green) {
618 }
619
620 // multiply FFT(rho2)*FFT(green)
621 // convolution becomes multiplication in FFT
622 // minus sign since we are solving laplace(phi) = -rho
624
625 // if output_type is SOL or SOL_AND_GRAD, we caculate solution
626 if ((out == Base::SOL) || (out == Base::SOL_AND_GRAD)) {
627 // start a timer
628 static IpplTimings::TimerRef fftc = IpplTimings::getTimer("FFT: Convolution");
630
631 // inverse FFT of the product and store the electrostatic potential in rho2_mr
632 fft_m->transform(BACKWARD, rho2_mr, rho2tr_m);
633
635 // Hockney: multiply the rho2_mr field by the total number of points to account for
636 // double counting (rho and green) of normalization factor in forward transform
637 // also multiply by the mesh spacing^3 (to account for discretization)
638 // Vico: need to multiply by normalization factor of 1/4N^3,
639 // since only backward transform was performed on the 4N grid
640 // DCT_VICO: need to multiply by a factor of (2N)^3 to match the normalization factor in
641 // the transform.
642 for (unsigned int i = 0; i < Dim; ++i) {
643 switch (alg) {
645 rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i];
646 break;
647 case Algorithm::VICO:
649 rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0);
650 break;
652 rho2_mr = rho2_mr * (1.0 / 4.0);
653 break;
654 default:
655 throw IpplException(
656 "FFTOpenPoissonSolver::initializeFields()",
657 "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
658 "supported for open BCs");
659 }
660 }
661
662 // start a timer
663 static IpplTimings::TimerRef dtos = IpplTimings::getTimer("Solve: Double to physical");
665
666 // get the physical part only --> physical electrostatic potential is now given in RHS
667 // need communication if more than one rank
668
669 if (ranks > 1) {
670 // COMMUNICATION
671
672 // send
673 const auto& lDomains1 = layout_mp->getHostLocalDomains();
674
675 std::vector<MPI_Request> requests(0);
676
677 for (int i = 0; i < ranks; ++i) {
678 if (lDomains1[i].touches(ldom2)) {
679 auto intersection = lDomains1[i].intersect(ldom2);
680
681 solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom2, nghost2,
682 view2, fd_m, requests);
683 }
684 }
685
686 // receive
687 const auto& lDomains2 = layout2_m->getHostLocalDomains();
688
689 for (int i = 0; i < ranks; ++i) {
690 if (ldom1.touches(lDomains2[i])) {
691 auto intersection = ldom1.intersect(lDomains2[i]);
692
694 nrecvs = intersection.size();
695
696 buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
697
698 Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf, nrecvs * sizeof(Trhs),
699 nrecvs);
700 buf->resetReadPos();
701
702 unpack(intersection, view1, fd_m, nghost1, ldom1);
703 }
704 }
705
706 // wait for all messages to be received
707 if (requests.size() > 0) {
708 MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
709 }
710 ippl::Comm->freeAllBuffers();
711
712 } else {
713 Kokkos::parallel_for(
714 "Write the solution into the LHS on physical grid",
715 this->rhs_mp->getFieldRangePolicy(),
716 KOKKOS_LAMBDA(const int i, const int j, const int k) {
717 const int ig2 = i + ldom2[0].first() - nghost2;
718 const int jg2 = j + ldom2[1].first() - nghost2;
719 const int kg2 = k + ldom2[2].first() - nghost2;
720
721 const int ig = i + ldom1[0].first() - nghost1;
722 const int jg = j + ldom1[1].first() - nghost1;
723 const int kg = k + ldom1[2].first() - nghost1;
724
725 // take [0,N-1] as physical solution
726 const bool isQuadrant1 = ((ig == ig2) && (jg == jg2) && (kg == kg2));
727 view1(i, j, k) = view2(i, j, k) * isQuadrant1;
728 });
729 }
731 }
732
733 // if we want finite differences Efield = -grad(phi)
734 // instead of computing it in Fourier domain
735 // this is only possible if SOL_AND_GRAD is the output type
736 if (isGradFD_m && (out == Base::SOL_AND_GRAD)) {
737 *(this->lhs_mp) = -grad(*this->rhs_mp);
738 }
739
740 // if output_type is GRAD or SOL_AND_GRAD, we calculate E-field (gradient in Fourier domain)
741 if (((out == Base::GRAD) || (out == Base::SOL_AND_GRAD)) && (!isGradFD_m)) {
742 // start a timer
743 static IpplTimings::TimerRef efield = IpplTimings::getTimer("Solve: Electric field");
745
746 // get E field view (LHS)
747 auto viewL = this->lhs_mp->getView();
748 const int nghostL = this->lhs_mp->getNghost();
749
750 // get rho2tr_m view (as we want to multiply by ik then transform)
751 auto viewR = rho2tr_m.getView();
752 const int nghostR = rho2tr_m.getNghost();
753 const auto& ldomR = layoutComplex_m->getLocalNDIndex();
754
755 // use temp_m as a temporary complex field
756 auto view_g = temp_m.getView();
757
758 // define some constants
759 const scalar_type pi = Kokkos::numbers::pi_v<scalar_type>;
760 const Kokkos::complex<Trhs> I = {0.0, 1.0};
761
762 // define some member variables in local scope for the parallel_for
763 vector_type hsize = hr_m;
765
766 // loop over each component (E = vector field)
767 for (size_t gd = 0; gd < Dim; ++gd) {
768 // loop over rho2tr_m to multiply by -ik (gradient in Fourier space)
769 Kokkos::parallel_for(
770 "Gradient - E field", rho2tr_m.getFieldRangePolicy(),
771 KOKKOS_LAMBDA(const int i, const int j, const int k) {
772 // global indices for 2N rhotr_m
773 const int ig = i + ldomR[0].first() - nghostR;
774 const int jg = j + ldomR[1].first() - nghostR;
775 const int kg = k + ldomR[2].first() - nghostR;
776
777 Vector<int, 3> iVec = {ig, jg, kg};
778
779 scalar_type k_gd;
780 const scalar_type Len = N[gd] * hsize[gd];
781 const bool shift = (iVec[gd] > N[gd]);
782 const bool notMid = (iVec[gd] != N[gd]);
783
784 k_gd = notMid * (pi / Len) * (iVec[gd] - shift * 2 * N[gd]);
785
786 view_g(i, j, k) = -(I * k_gd) * viewR(i, j, k);
787 });
788
789 // start a timer
790 static IpplTimings::TimerRef ffte = IpplTimings::getTimer("FFT: Efield");
792
793 // transform to get E-field
794 fft_m->transform(BACKWARD, rho2_mr, temp_m);
795
797
798 // apply proper normalization
799 for (unsigned int i = 0; i < Dim; ++i) {
800 switch (alg) {
802 rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i];
803 break;
804 case Algorithm::VICO:
806 rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0);
807 break;
809 rho2_mr = rho2_mr * (1.0 / 4.0);
810 break;
811 default:
812 throw IpplException(
813 "FFTOpenPoissonSolver::initializeFields()",
814 "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
815 "supported for open BCs");
816 }
817 }
818
819 // start a timer
820 static IpplTimings::TimerRef edtos =
821 IpplTimings::getTimer("Efield: double to phys.");
823
824 // restrict to physical grid (N^3) and assign to LHS (E-field)
825 // communication needed if more than one rank
826 if (ranks > 1) {
827 // COMMUNICATION
828
829 // send
830 const auto& lDomains1 = layout_mp->getHostLocalDomains();
831 std::vector<MPI_Request> requests(0);
832
833 for (int i = 0; i < ranks; ++i) {
834 if (lDomains1[i].touches(ldom2)) {
835 auto intersection = lDomains1[i].intersect(ldom2);
836
837 solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom2, nghost2,
838 view2, fd_m, requests);
839 }
840 }
841
842 // receive
843 const auto& lDomains2 = layout2_m->getHostLocalDomains();
844
845 for (int i = 0; i < ranks; ++i) {
846 if (ldom1.touches(lDomains2[i])) {
847 auto intersection = ldom1.intersect(lDomains2[i]);
848
850 nrecvs = intersection.size();
851
852 buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
853
854 Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf, nrecvs * sizeof(Trhs),
855 nrecvs);
856 buf->resetReadPos();
857
858 unpack(intersection, viewL, gd, fd_m, nghostL, ldom1);
859 }
860 }
861
862 // wait for all messages to be received
863 if (requests.size() > 0) {
864 MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
865 }
866 ippl::Comm->freeAllBuffers();
867
868 } else {
869 Kokkos::parallel_for(
870 "Write the E-field on physical grid", this->lhs_mp->getFieldRangePolicy(),
871 KOKKOS_LAMBDA(const int i, const int j, const int k) {
872 const int ig2 = i + ldom2[0].first() - nghost2;
873 const int jg2 = j + ldom2[1].first() - nghost2;
874 const int kg2 = k + ldom2[2].first() - nghost2;
875
876 const int ig = i + ldom1[0].first() - nghostL;
877 const int jg = j + ldom1[1].first() - nghostL;
878 const int kg = k + ldom1[2].first() - nghostL;
879
880 // take [0,N-1] as physical solution
881 const bool isQuadrant1 = ((ig == ig2) && (jg == jg2) && (kg == kg2));
882 viewL(i, j, k)[gd] = view2(i, j, k) * isQuadrant1;
883 });
884 }
886 }
888 }
889
890 // if user asked for Hessian, compute in Fourier domain (-k^2 multiplication)
891 if (hessian) {
892 // start a timer
893 static IpplTimings::TimerRef hess = IpplTimings::getTimer("Solve: Hessian");
895
896 // get Hessian matrix view (LHS)
897 auto viewH = hess_m.getView();
898 const int nghostH = hess_m.getNghost();
899
900 // get rho2tr_m view (as we want to multiply by -k^2 then transform)
901 auto viewR = rho2tr_m.getView();
902 const int nghostR = rho2tr_m.getNghost();
903 const auto& ldomR = layoutComplex_m->getLocalNDIndex();
904
905 // use temp_m as a temporary complex field
906 auto view_g = temp_m.getView();
907
908 // define some constants
909 const scalar_type pi = Kokkos::numbers::pi_v<scalar_type>;
910
911 // define some member variables in local scope for the parallel_for
912 vector_type hsize = hr_m;
914
915 // loop over each component (Hessian = Matrix field)
916 for (size_t row = 0; row < Dim; ++row) {
917 for (size_t col = 0; col < Dim; ++col) {
918 // loop over rho2tr_m to multiply by -k^2 (second derivative in Fourier space)
919 // if diagonal element (row = col), do not need N/2 term = 0
920 // else, if mixed derivative, need kVec = 0 at N/2
921
922 Kokkos::parallel_for(
923 "Hessian", rho2tr_m.getFieldRangePolicy(),
924 KOKKOS_LAMBDA(const int i, const int j, const int k) {
925 // global indices for 2N rhotr_m
926 const int ig = i + ldomR[0].first() - nghostR;
927 const int jg = j + ldomR[1].first() - nghostR;
928 const int kg = k + ldomR[2].first() - nghostR;
929
930 Vector<int, 3> iVec = {ig, jg, kg};
931 Vector_t kVec;
932
933 for (size_t d = 0; d < Dim; ++d) {
934 const scalar_type Len = N[d] * hsize[d];
935 const bool shift = (iVec[d] > N[d]);
936 const bool isMid = (iVec[d] == N[d]);
937 const bool notDiag = (row != col);
938
939 kVec[d] = (1 - (notDiag * isMid)) * (pi / Len)
940 * (iVec[d] - shift * 2 * N[d]);
941 }
942
943 view_g(i, j, k) = -(kVec[col] * kVec[row]) * viewR(i, j, k);
944 });
945
946 // start a timer
947 static IpplTimings::TimerRef ffth = IpplTimings::getTimer("FFT: Hessian");
949
950 // transform to get Hessian
951 fft_m->transform(BACKWARD, rho2_mr, temp_m);
952
954
955 // apply proper normalization
956 for (unsigned int i = 0; i < Dim; ++i) {
957 switch (alg) {
959 rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i];
960 break;
961 case Algorithm::VICO:
963 rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0);
964 break;
966 rho2_mr = rho2_mr * (1.0 / 4.0);
967 break;
968 default:
969 throw IpplException(
970 "FFTOpenPoissonSolver::initializeFields()",
971 "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
972 "supported for open BCs");
973 }
974 }
975
976 // start a timer
977 static IpplTimings::TimerRef hdtos =
978 IpplTimings::getTimer("Hessian: double to phys.");
980
981 // restrict to physical grid (N^3) and assign to Matrix field (Hessian)
982 // communication needed if more than one rank
983 if (ranks > 1) {
984 // COMMUNICATION
985
986 // send
987 const auto& lDomains1 = layout_mp->getHostLocalDomains();
988 std::vector<MPI_Request> requests(0);
989
990 for (int i = 0; i < ranks; ++i) {
991 if (lDomains1[i].touches(ldom2)) {
992 auto intersection = lDomains1[i].intersect(ldom2);
993
994 solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom2,
995 nghost2, view2, fd_m, requests);
996 }
997 }
998
999 // receive
1000 const auto& lDomains2 = layout2_m->getHostLocalDomains();
1001
1002 for (int i = 0; i < ranks; ++i) {
1003 if (ldom1.touches(lDomains2[i])) {
1004 auto intersection = ldom1.intersect(lDomains2[i]);
1005
1007 nrecvs = intersection.size();
1008
1009 buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
1010
1011 Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf,
1012 nrecvs * sizeof(Trhs), nrecvs);
1013 buf->resetReadPos();
1014
1015 unpack(intersection, viewH, fd_m, nghostH, ldom1, row, col);
1016 }
1017 }
1018
1019 // wait for all messages to be received
1020 if (requests.size() > 0) {
1021 MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
1022 }
1023 ippl::Comm->freeAllBuffers();
1024
1025 } else {
1026 Kokkos::parallel_for(
1027 "Write Hessian on physical grid", hess_m.getFieldRangePolicy(),
1028 KOKKOS_LAMBDA(const int i, const int j, const int k) {
1029 const int ig2 = i + ldom2[0].first() - nghost2;
1030 const int jg2 = j + ldom2[1].first() - nghost2;
1031 const int kg2 = k + ldom2[2].first() - nghost2;
1032
1033 const int ig = i + ldom1[0].first() - nghostH;
1034 const int jg = j + ldom1[1].first() - nghostH;
1035 const int kg = k + ldom1[2].first() - nghostH;
1036
1037 // take [0,N-1] as physical solution
1038 const bool isQuadrant1 =
1039 ((ig == ig2) && (jg == jg2) && (kg == kg2));
1040 viewH(i, j, k)[row][col] = view2(i, j, k) * isQuadrant1;
1041 });
1042 }
1044 }
1045 }
1047 }
1049 };
1050
1052 // calculate FFT of the Green's function
1053
1054 template <typename FieldLHS, typename FieldRHS>
1056 const scalar_type pi = Kokkos::numbers::pi_v<scalar_type>;
1057 grn_mr = 0.0;
1058
1059 const int alg = this->params_m.template get<int>("algorithm");
1060
1061 if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) {
1062 Vector_t l(hr_m * nr_m);
1063 Vector_t hs_m;
1064 double L_sum(0.0);
1065
1066 // compute length of the physical domain
1067 // compute Fourier domain spacing
1068 for (unsigned int i = 0; i < Dim; ++i) {
1069 hs_m[i] = pi * 0.5 / l[i];
1070 L_sum = L_sum + l[i] * l[i];
1071 }
1072
1073 // define the origin of the 4N grid
1074 vector_type origin;
1075
1076 for (unsigned int i = 0; i < Dim; ++i) {
1077 origin[i] = -2 * nr_m[i] * pi / l[i];
1078 }
1079
1080 // set mesh for the 4N mesh
1081 mesh4_m->setMeshSpacing(hs_m);
1082
1083 // size of truncation window
1084 L_sum = std::sqrt(L_sum);
1085 // we choose a window 10% larger than domain (arbitrary choice)
1086 L_sum = 1.1 * L_sum;
1087
1088 // initialize grnL_m
1089 typename CxField_gt::view_type view_g = grnL_m.getView();
1090 const int nghost_g = grnL_m.getNghost();
1091 const auto& ldom_g = layout4_m->getLocalNDIndex();
1092
1093 Vector<int, Dim> size = nr_m;
1094
1095 // Kokkos parallel for loop to assign analytic grnL_m
1096 if (alg == Algorithm::VICO) {
1097 Kokkos::parallel_for(
1098 "Initialize Green's function ", grnL_m.getFieldRangePolicy(),
1099 KOKKOS_LAMBDA(const int i, const int j, const int k) {
1100 // go from local indices to global
1101 const int ig = i + ldom_g[0].first() - nghost_g;
1102 const int jg = j + ldom_g[1].first() - nghost_g;
1103 const int kg = k + ldom_g[2].first() - nghost_g;
1104
1105 bool isOutside = (ig > 2 * size[0] - 1);
1106 const Tg t = ig * hs_m[0] + isOutside * origin[0];
1107
1108 isOutside = (jg > 2 * size[1] - 1);
1109 const Tg u = jg * hs_m[1] + isOutside * origin[1];
1110
1111 isOutside = (kg > 2 * size[2] - 1);
1112 const Tg v = kg * hs_m[2] + isOutside * origin[2];
1113
1114 Tg s = (t * t) + (u * u) + (v * v);
1115 s = Kokkos::sqrt(s);
1116
1117 // assign the green's function value
1118 // if (0,0,0), assign L^2/2 (analytical limit of sinc)
1119
1120 const bool isOrig = ((ig == 0 && jg == 0 && kg == 0));
1121 const Tg analyticLim = -L_sum * L_sum * 0.5;
1122 const Tg value = -2.0 * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0))
1123 * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0));
1124
1125 view_g(i, j, k) = (!isOrig) * value + isOrig * analyticLim;
1126 });
1127 } else if (alg == Algorithm::BIHARMONIC) {
1128 Kokkos::parallel_for(
1129 "Initialize Green's function ", grnL_m.getFieldRangePolicy(),
1130 KOKKOS_LAMBDA(const int i, const int j, const int k) {
1131 // go from local indices to global
1132 const int ig = i + ldom_g[0].first() - nghost_g;
1133 const int jg = j + ldom_g[1].first() - nghost_g;
1134 const int kg = k + ldom_g[2].first() - nghost_g;
1135
1136 bool isOutside = (ig > 2 * size[0] - 1);
1137 const Tg t = ig * hs_m[0] + isOutside * origin[0];
1138
1139 isOutside = (jg > 2 * size[1] - 1);
1140 const Tg u = jg * hs_m[1] + isOutside * origin[1];
1141
1142 isOutside = (kg > 2 * size[2] - 1);
1143 const Tg v = kg * hs_m[2] + isOutside * origin[2];
1144
1145 Tg s = (t * t) + (u * u) + (v * v);
1146 s = Kokkos::sqrt(s);
1147
1148 // assign value and replace with analytic limit at origin (0,0,0)
1149 const bool isOrig = ((ig == 0 && jg == 0 && kg == 0));
1150 const Tg analyticLim = -L_sum * L_sum * L_sum * L_sum / 8.0;
1151 const Tg value = -((2 - (L_sum * L_sum * s * s)) * Kokkos::cos(L_sum * s)
1152 + 2 * L_sum * s * Kokkos::sin(L_sum * s) - 2)
1153 / (2 * s * s * s * s + isOrig * 1.0);
1154
1155 view_g(i, j, k) = (!isOrig) * value + isOrig * analyticLim;
1156 });
1157 }
1158
1159 // start a timer
1160 static IpplTimings::TimerRef fft4 = IpplTimings::getTimer("FFT: Precomputation");
1162
1163 // inverse Fourier transform of the green's function for precomputation
1164 fft4n_m->transform(BACKWARD, grnL_m);
1165
1167
1168 // Restrict transformed grnL_m to 2N domain after precomputation step
1169
1170 // get the field data first
1171 typename Field_t::view_type view = grn_mr.getView();
1172 const int nghost = grn_mr.getNghost();
1173 const auto& ldom = layout2_m->getLocalNDIndex();
1174
1175 // start a timer
1176 static IpplTimings::TimerRef ifftshift = IpplTimings::getTimer("Vico shift loop");
1177 IpplTimings::startTimer(ifftshift);
1178
1179 // get number of ranks to see if need communication
1180 const int ranks = Comm->size();
1181
1182 if (ranks > 1) {
1183 communicateVico(size, view_g, ldom_g, nghost_g, view, ldom, nghost);
1184 } else {
1185 // restrict the green's function to a (2N)^3 grid from the (4N)^3 grid
1186 using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
1187 Kokkos::parallel_for(
1188 "Restrict domain of Green's function from 4N to 2N",
1189 mdrange_type({nghost, nghost, nghost}, {view.extent(0) - nghost - size[0],
1190 view.extent(1) - nghost - size[1],
1191 view.extent(2) - nghost - size[2]}),
1192 KOKKOS_LAMBDA(const int i, const int j, const int k) {
1193 // go from local indices to global
1194 const int ig = i + ldom[0].first() - nghost;
1195 const int jg = j + ldom[1].first() - nghost;
1196 const int kg = k + ldom[2].first() - nghost;
1197
1198 const int ig2 = i + ldom_g[0].first() - nghost_g;
1199 const int jg2 = j + ldom_g[1].first() - nghost_g;
1200 const int kg2 = k + ldom_g[2].first() - nghost_g;
1201
1202 const bool isOrig = (ig == ig2) && (jg == jg2) && (kg == kg2);
1203 view(i, j, k) = isOrig * real(view_g(i, j, k));
1204
1205 // Now fill the rest of the field
1206 const int s = 2 * size[0] - ig - 1 - ldom_g[0].first() + nghost_g;
1207 const int p = 2 * size[1] - jg - 1 - ldom_g[1].first() + nghost_g;
1208 const int q = 2 * size[2] - kg - 1 - ldom_g[2].first() + nghost_g;
1209
1210 view(s, j, k) = real(view_g(i + 1, j, k));
1211 view(i, p, k) = real(view_g(i, j + 1, k));
1212 view(i, j, q) = real(view_g(i, j, k + 1));
1213 view(s, j, q) = real(view_g(i + 1, j, k + 1));
1214 view(s, p, k) = real(view_g(i + 1, j + 1, k));
1215 view(i, p, q) = real(view_g(i, j + 1, k + 1));
1216 view(s, p, q) = real(view_g(i + 1, j + 1, k + 1));
1217 });
1218 }
1219 IpplTimings::stopTimer(ifftshift);
1220
1221 } else if (alg == Algorithm::DCT_VICO) {
1222 Vector_t l(hr_m * nr_m);
1223 Vector_t hs_m;
1224 double L_sum(0.0);
1225
1226 // compute length of the physical domain
1227 // compute Fourier domain spacing
1228 for (unsigned int i = 0; i < Dim; ++i) {
1229 hs_m[i] = Kokkos::numbers::pi_v<Trhs> * 0.5 / l[i];
1230 L_sum = L_sum + l[i] * l[i];
1231 }
1232
1233 // define the origin of the 2N+1 grid
1234 Vector_t origin = 0.0;
1235
1236 // set mesh for the 2N+1 mesh
1237 mesh2n1_m->setMeshSpacing(hs_m);
1238
1239 // size of truncation window
1240 L_sum = Kokkos::sqrt(L_sum);
1241 L_sum = 1.1 * L_sum;
1242
1243 // initialize grn2n1_m
1244 Vector<int, Dim> size = nr_m;
1245
1246 // initialize grn2n1_m
1247 typename Field_t::view_type view_g2n1 = grn2n1_m.getView();
1248 const int nghost_g2n1 = grn2n1_m.getNghost();
1249 const auto& ldom_g2n1 = layout2n1_m->getLocalNDIndex();
1250
1251 Kokkos::parallel_for(
1252 "Initialize 2N+1 Green's function ", grn2n1_m.getFieldRangePolicy(),
1253 KOKKOS_LAMBDA(const int i, const int j, const int k) {
1254 // go from local indices to global
1255 const int ig = i + ldom_g2n1[0].first() - nghost_g2n1;
1256 const int jg = j + ldom_g2n1[1].first() - nghost_g2n1;
1257 const int kg = k + ldom_g2n1[2].first() - nghost_g2n1;
1258
1259 double t = ig * hs_m[0];
1260 double u = jg * hs_m[1];
1261 double v = kg * hs_m[2];
1262
1263 double s = (t * t) + (u * u) + (v * v);
1264 s = Kokkos::sqrt(s);
1265
1266 const bool isOrig = ((ig == 0 && jg == 0 && kg == 0));
1267 const double val = -2.0 * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0))
1268 * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0));
1269 const double analyticLim = -L_sum * L_sum * 0.5;
1270
1271 view_g2n1(i, j, k) = ((!isOrig) * val) + (isOrig * analyticLim);
1272 });
1273
1274 // start a timer
1275 static IpplTimings::TimerRef fft4 = IpplTimings::getTimer("FFT: Precomputation");
1277
1278 // inverse DCT transform of 2N+1 green's function for the precomputation
1279 fft2n1_m->transform(BACKWARD, grn2n1_m);
1280
1282
1283 // Restrict transformed grn2n1_m to 2N domain after precomputation step
1284
1285 // get the field data first
1286 typename Field_t::view_type view = grn_mr.getView();
1287 const int nghost = grn_mr.getNghost();
1288 const auto& ldom = layout2_m->getLocalNDIndex();
1289
1290 // start a timer
1291 static IpplTimings::TimerRef ifftshift = IpplTimings::getTimer("Vico shift loop");
1292 IpplTimings::startTimer(ifftshift);
1293
1294 // get number of ranks to see if need communication
1295 int ranks = ippl::Comm->size();
1296
1297 // range type for Kokkos loop
1298 using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
1299
1300 if (ranks > 1) {
1301 communicateVico(size, view_g2n1, ldom_g2n1, nghost_g2n1, view, ldom, nghost);
1302 } else {
1303 // restrict the green's function to a (2N)^3 grid from the (2N+1)^3 grid
1304 Kokkos::parallel_for(
1305 "Restrict domain of Green's function from 2N+1 to 2N",
1306 mdrange_type({nghost, nghost, nghost}, {view.extent(0) - nghost - size[0],
1307 view.extent(1) - nghost - size[1],
1308 view.extent(2) - nghost - size[2]}),
1309 KOKKOS_LAMBDA(const int i, const int j, const int k) {
1310 // go from local indices to global
1311 const int ig = i + ldom[0].first() - nghost;
1312 const int jg = j + ldom[1].first() - nghost;
1313 const int kg = k + ldom[2].first() - nghost;
1314
1315 const int ig2 = i + ldom_g2n1[0].first() - nghost_g2n1;
1316 const int jg2 = j + ldom_g2n1[1].first() - nghost_g2n1;
1317 const int kg2 = k + ldom_g2n1[2].first() - nghost_g2n1;
1318
1319 const bool isOrig = ((ig == ig2) && (jg == jg2) && (kg == kg2));
1320 view(i, j, k) = isOrig * view_g2n1(i, j, k);
1321
1322 // Now fill the rest of the field
1323 const int s = 2 * size[0] - ig - 1 - ldom_g2n1[0].first() + nghost_g2n1;
1324 const int p = 2 * size[1] - jg - 1 - ldom_g2n1[1].first() + nghost_g2n1;
1325 const int q = 2 * size[2] - kg - 1 - ldom_g2n1[2].first() + nghost_g2n1;
1326
1327 view(s, j, k) = view_g2n1(i + 1, j, k);
1328 view(i, p, k) = view_g2n1(i, j + 1, k);
1329 view(i, j, q) = view_g2n1(i, j, k + 1);
1330 view(s, j, q) = view_g2n1(i + 1, j, k + 1);
1331 view(s, p, k) = view_g2n1(i + 1, j + 1, k);
1332 view(i, p, q) = view_g2n1(i, j + 1, k + 1);
1333 view(s, p, q) = view_g2n1(i + 1, j + 1, k + 1);
1334 });
1335 }
1336
1337 } else {
1338 // Hockney case
1339
1340 // calculate square of the mesh spacing for each dimension
1341 Vector_t hrsq(hr_m * hr_m);
1342
1343 // use the grnIField_m helper field to compute Green's function
1344 for (unsigned int i = 0; i < Dim; ++i) {
1345 grn_mr = grn_mr + grnIField_m[i] * hrsq[i];
1346 }
1347
1348 grn_mr = -1.0 / (4.0 * pi * sqrt(grn_mr));
1349
1350 typename Field_t::view_type view = grn_mr.getView();
1351 const int nghost = grn_mr.getNghost();
1352 const auto& ldom = layout2_m->getLocalNDIndex();
1353
1354 // Kokkos parallel for loop to find (0,0,0) point and regularize
1355 Kokkos::parallel_for(
1356 "Regularize Green's function ", grn_mr.getFieldRangePolicy(),
1357 KOKKOS_LAMBDA(const int i, const int j, const int k) {
1358 // go from local indices to global
1359 const int ig = i + ldom[0].first() - nghost;
1360 const int jg = j + ldom[1].first() - nghost;
1361 const int kg = k + ldom[2].first() - nghost;
1362
1363 // if (0,0,0), assign to it 1/(4*pi)
1364 const bool isOrig = (ig == 0 && jg == 0 && kg == 0);
1365 view(i, j, k) = isOrig * (-1.0 / (4.0 * pi)) + (!isOrig) * view(i, j, k);
1366 });
1367 }
1368
1369 // start a timer
1370 static IpplTimings::TimerRef fftg = IpplTimings::getTimer("FFT: Green");
1372
1373 // perform the FFT of the Green's function for the convolution
1374 fft_m->transform(FORWARD, grn_mr, grntr_m);
1375
1377 };
1378
1379 template <typename FieldLHS, typename FieldRHS>
1381 Vector<int, Dim> size, typename CxField_gt::view_type view_g,
1382 const ippl::NDIndex<Dim> ldom_g, const int nghost_g, typename Field_t::view_type view,
1383 const ippl::NDIndex<Dim> ldom, const int nghost) {
1384 const auto& lDomains2 = layout2_m->getHostLocalDomains();
1385 const auto& lDomains4 = layout4_m->getHostLocalDomains();
1386
1387 std::vector<MPI_Request> requests(0);
1388 const int ranks = Comm->size();
1389
1390 // 1st step: Define 8 domains corresponding to the different quadrants
1391 ippl::NDIndex<Dim> none;
1392 for (unsigned i = 0; i < Dim; i++) {
1393 none[i] = ippl::Index(size[i]);
1394 }
1395
1397 x[0] = ippl::Index(size[0], 2 * size[0] - 1);
1398 x[1] = ippl::Index(size[1]);
1399 x[2] = ippl::Index(size[2]);
1400
1402 y[0] = ippl::Index(size[0]);
1403 y[1] = ippl::Index(size[1], 2 * size[1] - 1);
1404 y[2] = ippl::Index(size[2]);
1405
1407 z[0] = ippl::Index(size[0]);
1408 z[1] = ippl::Index(size[1]);
1409 z[2] = ippl::Index(size[2], 2 * size[2] - 1);
1410
1412 xy[0] = ippl::Index(size[0], 2 * size[0] - 1);
1413 xy[1] = ippl::Index(size[1], 2 * size[1] - 1);
1414 xy[2] = ippl::Index(size[2]);
1415
1417 xz[0] = ippl::Index(size[0], 2 * size[0] - 1);
1418 xz[1] = ippl::Index(size[1]);
1419 xz[2] = ippl::Index(size[2], 2 * size[2] - 1);
1420
1422 yz[0] = ippl::Index(size[0]);
1423 yz[1] = ippl::Index(size[1], 2 * size[1] - 1);
1424 yz[2] = ippl::Index(size[2], 2 * size[2] - 1);
1425
1427 for (unsigned i = 0; i < Dim; i++) {
1428 xyz[i] = ippl::Index(size[i], 2 * size[i] - 1);
1429 }
1430
1431 // 2nd step: send
1432 for (int i = 0; i < ranks; ++i) {
1433 auto domain2 = lDomains2[i];
1434
1435 if (domain2.touches(none)) {
1436 auto intersection = domain2.intersect(none);
1437
1438 if (ldom_g.touches(intersection)) {
1439 intersection = intersection.intersect(ldom_g);
1440
1441 solver_send(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom_g, nghost_g, view_g,
1442 fd_m, requests);
1443 }
1444 }
1445
1446 if (domain2.touches(x)) {
1447 auto intersection = domain2.intersect(x);
1448 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1449 (2 * size[0] - intersection[0].last()), -1);
1450
1451 ippl::NDIndex<Dim> domain4;
1452 domain4[0] = xdom;
1453 domain4[1] = intersection[1];
1454 domain4[2] = intersection[2];
1455
1456 if (ldom_g.touches(domain4)) {
1457 intersection = ldom_g.intersect(domain4);
1458
1459 solver_send(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom_g, nghost_g, view_g,
1460 fd_m, requests);
1461 }
1462 }
1463
1464 if (domain2.touches(y)) {
1465 auto intersection = domain2.intersect(y);
1466 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1467 (2 * size[1] - intersection[1].last()), -1);
1468
1469 ippl::NDIndex<Dim> domain4;
1470 domain4[0] = intersection[0];
1471 domain4[1] = ydom;
1472 domain4[2] = intersection[2];
1473
1474 if (ldom_g.touches(domain4)) {
1475 intersection = ldom_g.intersect(domain4);
1476
1477 solver_send(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom_g, nghost_g, view_g,
1478 fd_m, requests);
1479 }
1480 }
1481
1482 if (domain2.touches(z)) {
1483 auto intersection = domain2.intersect(z);
1484 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1485 (2 * size[2] - intersection[2].last()), -1);
1486
1487 ippl::NDIndex<Dim> domain4;
1488 domain4[0] = intersection[0];
1489 domain4[1] = intersection[1];
1490 domain4[2] = zdom;
1491
1492 if (ldom_g.touches(domain4)) {
1493 intersection = ldom_g.intersect(domain4);
1494
1495 solver_send(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom_g, nghost_g, view_g,
1496 fd_m, requests);
1497 }
1498 }
1499
1500 if (domain2.touches(xy)) {
1501 auto intersection = domain2.intersect(xy);
1502 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1503 (2 * size[0] - intersection[0].last()), -1);
1504 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1505 (2 * size[1] - intersection[1].last()), -1);
1506
1507 ippl::NDIndex<Dim> domain4;
1508 domain4[0] = xdom;
1509 domain4[1] = ydom;
1510 domain4[2] = intersection[2];
1511
1512 if (ldom_g.touches(domain4)) {
1513 intersection = ldom_g.intersect(domain4);
1514
1515 solver_send(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom_g, nghost_g, view_g,
1516 fd_m, requests);
1517 }
1518 }
1519
1520 if (domain2.touches(yz)) {
1521 auto intersection = domain2.intersect(yz);
1522 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1523 (2 * size[1] - intersection[1].last()), -1);
1524 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1525 (2 * size[2] - intersection[2].last()), -1);
1526
1527 ippl::NDIndex<Dim> domain4;
1528 domain4[0] = intersection[0];
1529 domain4[1] = ydom;
1530 domain4[2] = zdom;
1531
1532 if (ldom_g.touches(domain4)) {
1533 intersection = ldom_g.intersect(domain4);
1534 solver_send(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom_g, nghost_g, view_g,
1535 fd_m, requests);
1536 }
1537 }
1538
1539 if (domain2.touches(xz)) {
1540 auto intersection = domain2.intersect(xz);
1541 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1542 (2 * size[0] - intersection[0].last()), -1);
1543 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1544 (2 * size[2] - intersection[2].last()), -1);
1545
1546 ippl::NDIndex<Dim> domain4;
1547 domain4[0] = xdom;
1548 domain4[1] = intersection[1];
1549 domain4[2] = zdom;
1550
1551 if (ldom_g.touches(domain4)) {
1552 intersection = ldom_g.intersect(domain4);
1553 solver_send(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom_g, nghost_g, view_g,
1554 fd_m, requests);
1555 }
1556 }
1557
1558 if (domain2.touches(xyz)) {
1559 auto intersection = domain2.intersect(xyz);
1560 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1561 (2 * size[0] - intersection[0].last()), -1);
1562 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1563 (2 * size[1] - intersection[1].last()), -1);
1564 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1565 (2 * size[2] - intersection[2].last()), -1);
1566
1567 ippl::NDIndex<Dim> domain4;
1568 domain4[0] = xdom;
1569 domain4[1] = ydom;
1570 domain4[2] = zdom;
1571
1572 if (ldom_g.touches(domain4)) {
1573 intersection = ldom_g.intersect(domain4);
1574 solver_send(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom_g, nghost_g, view_g,
1575 fd_m, requests);
1576 }
1577 }
1578 }
1579
1580 // 3rd step: receive
1581 for (int i = 0; i < ranks; ++i) {
1582 if (ldom.touches(none)) {
1583 auto intersection = ldom.intersect(none);
1584
1585 if (lDomains4[i].touches(intersection)) {
1586 intersection = intersection.intersect(lDomains4[i]);
1587
1588 solver_recv(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom, nghost, view,
1589 fd_m);
1590 }
1591 }
1592
1593 if (ldom.touches(x)) {
1594 auto intersection = ldom.intersect(x);
1595
1596 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1597 (2 * size[0] - intersection[0].last()), -1);
1598
1599 ippl::NDIndex<Dim> domain4;
1600 domain4[0] = xdom;
1601 domain4[1] = intersection[1];
1602 domain4[2] = intersection[2];
1603
1604 if (lDomains4[i].touches(domain4)) {
1605 domain4 = lDomains4[i].intersect(domain4);
1606 domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
1607 2 * size[0] - domain4[0].last(), -1);
1608
1609 intersection = intersection.intersect(domain4);
1610
1611 solver_recv(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom, nghost, view, fd_m,
1612 true, false, false);
1613 }
1614 }
1615
1616 if (ldom.touches(y)) {
1617 auto intersection = ldom.intersect(y);
1618
1619 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1620 (2 * size[1] - intersection[1].last()), -1);
1621
1622 ippl::NDIndex<Dim> domain4;
1623 domain4[0] = intersection[0];
1624 domain4[1] = ydom;
1625 domain4[2] = intersection[2];
1626
1627 if (lDomains4[i].touches(domain4)) {
1628 domain4 = lDomains4[i].intersect(domain4);
1629 domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
1630 2 * size[1] - domain4[1].last(), -1);
1631
1632 intersection = intersection.intersect(domain4);
1633
1634 solver_recv(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom, nghost, view, fd_m,
1635 false, true, false);
1636 }
1637 }
1638
1639 if (ldom.touches(z)) {
1640 auto intersection = ldom.intersect(z);
1641
1642 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1643 (2 * size[2] - intersection[2].last()), -1);
1644
1645 ippl::NDIndex<Dim> domain4;
1646 domain4[0] = intersection[0];
1647 domain4[1] = intersection[1];
1648 domain4[2] = zdom;
1649
1650 if (lDomains4[i].touches(domain4)) {
1651 domain4 = lDomains4[i].intersect(domain4);
1652 domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
1653 2 * size[2] - domain4[2].last(), -1);
1654
1655 intersection = intersection.intersect(domain4);
1656
1657 solver_recv(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom, nghost, view, fd_m,
1658 false, false, true);
1659 }
1660 }
1661
1662 if (ldom.touches(xy)) {
1663 auto intersection = ldom.intersect(xy);
1664
1665 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1666 (2 * size[0] - intersection[0].last()), -1);
1667 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1668 (2 * size[1] - intersection[1].last()), -1);
1669
1670 ippl::NDIndex<Dim> domain4;
1671 domain4[0] = xdom;
1672 domain4[1] = ydom;
1673 domain4[2] = intersection[2];
1674
1675 if (lDomains4[i].touches(domain4)) {
1676 domain4 = lDomains4[i].intersect(domain4);
1677 domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
1678 2 * size[0] - domain4[0].last(), -1);
1679 domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
1680 2 * size[1] - domain4[1].last(), -1);
1681
1682 intersection = intersection.intersect(domain4);
1683
1684 solver_recv(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom, nghost, view, fd_m,
1685 true, true, false);
1686 }
1687 }
1688
1689 if (ldom.touches(yz)) {
1690 auto intersection = ldom.intersect(yz);
1691
1692 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1693 (2 * size[1] - intersection[1].last()), -1);
1694 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1695 (2 * size[2] - intersection[2].last()), -1);
1696
1697 ippl::NDIndex<Dim> domain4;
1698 domain4[0] = intersection[0];
1699 domain4[1] = ydom;
1700 domain4[2] = zdom;
1701
1702 if (lDomains4[i].touches(domain4)) {
1703 domain4 = lDomains4[i].intersect(domain4);
1704 domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
1705 2 * size[1] - domain4[1].last(), -1);
1706 domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
1707 2 * size[2] - domain4[2].last(), -1);
1708
1709 intersection = intersection.intersect(domain4);
1710
1711 solver_recv(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom, nghost, view, fd_m,
1712 false, true, true);
1713 }
1714 }
1715
1716 if (ldom.touches(xz)) {
1717 auto intersection = ldom.intersect(xz);
1718
1719 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1720 (2 * size[0] - intersection[0].last()), -1);
1721 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1722 (2 * size[2] - intersection[2].last()), -1);
1723
1724 ippl::NDIndex<Dim> domain4;
1725 domain4[0] = xdom;
1726 domain4[1] = intersection[1];
1727 domain4[2] = zdom;
1728
1729 if (lDomains4[i].touches(domain4)) {
1730 domain4 = lDomains4[i].intersect(domain4);
1731 domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
1732 2 * size[0] - domain4[0].last(), -1);
1733 domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
1734 2 * size[2] - domain4[2].last(), -1);
1735
1736 intersection = intersection.intersect(domain4);
1737
1738 solver_recv(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom, nghost, view, fd_m,
1739 true, false, true);
1740 }
1741 }
1742
1743 if (ldom.touches(xyz)) {
1744 auto intersection = ldom.intersect(xyz);
1745
1746 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1747 (2 * size[0] - intersection[0].last()), -1);
1748 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1749 (2 * size[1] - intersection[1].last()), -1);
1750 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1751 (2 * size[2] - intersection[2].last()), -1);
1752
1753 ippl::NDIndex<Dim> domain4;
1754 domain4[0] = xdom;
1755 domain4[1] = ydom;
1756 domain4[2] = zdom;
1757
1758 if (lDomains4[i].touches(domain4)) {
1759 domain4 = lDomains4[i].intersect(domain4);
1760 domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
1761 2 * size[0] - domain4[0].last(), -1);
1762 domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
1763 2 * size[1] - domain4[1].last(), -1);
1764 domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
1765 2 * size[2] - domain4[2].last(), -1);
1766
1767 intersection = intersection.intersect(domain4);
1768
1769 solver_recv(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom, nghost, view, fd_m,
1770 true, true, true);
1771 }
1772 }
1773 }
1774
1775 if (requests.size() > 0) {
1776 MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
1777 }
1778 ippl::Comm->freeAllBuffers();
1779 };
1780
1781 // CommunicateVico for DCT_VICO (2N+1 to 2N)
1782 template <typename FieldLHS, typename FieldRHS>
1784 Vector<int, Dim> size, typename Field_t::view_type view_g, const ippl::NDIndex<Dim> ldom_g,
1785 const int nghost_g, typename Field_t::view_type view, const ippl::NDIndex<Dim> ldom,
1786 const int nghost) {
1787 const auto& lDomains2 = layout2_m->getHostLocalDomains();
1788 const auto& lDomains2n1 = layout2n1_m->getHostLocalDomains();
1789
1790 std::vector<MPI_Request> requests(0);
1791 const int ranks = ippl::Comm->size();
1792
1793 // 1st step: Define 8 domains corresponding to the different quadrants
1794 ippl::NDIndex<Dim> none;
1795 for (unsigned i = 0; i < Dim; i++) {
1796 none[i] = ippl::Index(size[i]);
1797 }
1798
1800 x[0] = ippl::Index(size[0], 2 * size[0] - 1);
1801 x[1] = ippl::Index(size[1]);
1802 x[2] = ippl::Index(size[2]);
1803
1805 y[0] = ippl::Index(size[0]);
1806 y[1] = ippl::Index(size[1], 2 * size[1] - 1);
1807 y[2] = ippl::Index(size[2]);
1808
1810 z[0] = ippl::Index(size[0]);
1811 z[1] = ippl::Index(size[1]);
1812 z[2] = ippl::Index(size[2], 2 * size[2] - 1);
1813
1815 xy[0] = ippl::Index(size[0], 2 * size[0] - 1);
1816 xy[1] = ippl::Index(size[1], 2 * size[1] - 1);
1817 xy[2] = ippl::Index(size[2]);
1818
1820 xz[0] = ippl::Index(size[0], 2 * size[0] - 1);
1821 xz[1] = ippl::Index(size[1]);
1822 xz[2] = ippl::Index(size[2], 2 * size[2] - 1);
1823
1825 yz[0] = ippl::Index(size[0]);
1826 yz[1] = ippl::Index(size[1], 2 * size[1] - 1);
1827 yz[2] = ippl::Index(size[2], 2 * size[2] - 1);
1828
1830 for (unsigned i = 0; i < Dim; i++) {
1831 xyz[i] = ippl::Index(size[i], 2 * size[i] - 1);
1832 }
1833
1834 // 2nd step: send
1835 for (int i = 0; i < ranks; ++i) {
1836 auto domain2 = lDomains2[i];
1837
1838 if (domain2.touches(none)) {
1839 auto intersection = domain2.intersect(none);
1840
1841 if (ldom_g.touches(intersection)) {
1842 intersection = intersection.intersect(ldom_g);
1843
1844 solver_send(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom_g, nghost_g, view_g,
1845 fd_m, requests);
1846 }
1847 }
1848
1849 if (domain2.touches(x)) {
1850 auto intersection = domain2.intersect(x);
1851 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1852 (2 * size[0] - intersection[0].last()), -1);
1853
1854 ippl::NDIndex<Dim> domain2n1;
1855 domain2n1[0] = xdom;
1856 domain2n1[1] = intersection[1];
1857 domain2n1[2] = intersection[2];
1858
1859 if (ldom_g.touches(domain2n1)) {
1860 intersection = ldom_g.intersect(domain2n1);
1861
1862 solver_send(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom_g, nghost_g, view_g,
1863 fd_m, requests);
1864 }
1865 }
1866
1867 if (domain2.touches(y)) {
1868 auto intersection = domain2.intersect(y);
1869 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1870 (2 * size[1] - intersection[1].last()), -1);
1871
1872 ippl::NDIndex<Dim> domain2n1;
1873 domain2n1[0] = intersection[0];
1874 domain2n1[1] = ydom;
1875 domain2n1[2] = intersection[2];
1876
1877 if (ldom_g.touches(domain2n1)) {
1878 intersection = ldom_g.intersect(domain2n1);
1879
1880 solver_send(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom_g, nghost_g, view_g,
1881 fd_m, requests);
1882 }
1883 }
1884
1885 if (domain2.touches(z)) {
1886 auto intersection = domain2.intersect(z);
1887 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1888 (2 * size[2] - intersection[2].last()), -1);
1889
1890 ippl::NDIndex<Dim> domain2n1;
1891 domain2n1[0] = intersection[0];
1892 domain2n1[1] = intersection[1];
1893 domain2n1[2] = zdom;
1894
1895 if (ldom_g.touches(domain2n1)) {
1896 intersection = ldom_g.intersect(domain2n1);
1897
1898 solver_send(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom_g, nghost_g, view_g,
1899 fd_m, requests);
1900 }
1901 }
1902
1903 if (domain2.touches(xy)) {
1904 auto intersection = domain2.intersect(xy);
1905 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1906 (2 * size[0] - intersection[0].last()), -1);
1907 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1908 (2 * size[1] - intersection[1].last()), -1);
1909
1910 ippl::NDIndex<Dim> domain2n1;
1911 domain2n1[0] = xdom;
1912 domain2n1[1] = ydom;
1913 domain2n1[2] = intersection[2];
1914
1915 if (ldom_g.touches(domain2n1)) {
1916 intersection = ldom_g.intersect(domain2n1);
1917
1918 solver_send(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom_g, nghost_g, view_g,
1919 fd_m, requests);
1920 }
1921 }
1922
1923 if (domain2.touches(yz)) {
1924 auto intersection = domain2.intersect(yz);
1925 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1926 (2 * size[1] - intersection[1].last()), -1);
1927 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1928 (2 * size[2] - intersection[2].last()), -1);
1929
1930 ippl::NDIndex<Dim> domain2n1;
1931 domain2n1[0] = intersection[0];
1932 domain2n1[1] = ydom;
1933 domain2n1[2] = zdom;
1934
1935 if (ldom_g.touches(domain2n1)) {
1936 intersection = ldom_g.intersect(domain2n1);
1937
1938 solver_send(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom_g, nghost_g, view_g,
1939 fd_m, requests);
1940 }
1941 }
1942
1943 if (domain2.touches(xz)) {
1944 auto intersection = domain2.intersect(xz);
1945 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1946 (2 * size[0] - intersection[0].last()), -1);
1947 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1948 (2 * size[2] - intersection[2].last()), -1);
1949
1950 ippl::NDIndex<Dim> domain2n1;
1951 domain2n1[0] = xdom;
1952 domain2n1[1] = intersection[1];
1953 domain2n1[2] = zdom;
1954
1955 if (ldom_g.touches(domain2n1)) {
1956 intersection = ldom_g.intersect(domain2n1);
1957
1958 solver_send(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom_g, nghost_g, view_g,
1959 fd_m, requests);
1960 }
1961 }
1962
1963 if (domain2.touches(xyz)) {
1964 auto intersection = domain2.intersect(xyz);
1965 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1966 (2 * size[0] - intersection[0].last()), -1);
1967 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1968 (2 * size[1] - intersection[1].last()), -1);
1969 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1970 (2 * size[2] - intersection[2].last()), -1);
1971
1972 ippl::NDIndex<Dim> domain2n1;
1973 domain2n1[0] = xdom;
1974 domain2n1[1] = ydom;
1975 domain2n1[2] = zdom;
1976
1977 if (ldom_g.touches(domain2n1)) {
1978 intersection = ldom_g.intersect(domain2n1);
1979
1980 solver_send(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom_g, nghost_g, view_g,
1981 fd_m, requests);
1982 }
1983 }
1984 }
1985
1986 // 3rd step: receive
1987 for (int i = 0; i < ranks; ++i) {
1988 if (ldom.touches(none)) {
1989 auto intersection = ldom.intersect(none);
1990
1991 if (lDomains2n1[i].touches(intersection)) {
1992 intersection = intersection.intersect(lDomains2n1[i]);
1993
1994 solver_recv(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom, nghost, view,
1995 fd_m);
1996 }
1997 }
1998
1999 if (ldom.touches(x)) {
2000 auto intersection = ldom.intersect(x);
2001
2002 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
2003 (2 * size[0] - intersection[0].last()), -1);
2004
2005 ippl::NDIndex<Dim> domain2n1;
2006 domain2n1[0] = xdom;
2007 domain2n1[1] = intersection[1];
2008 domain2n1[2] = intersection[2];
2009
2010 if (lDomains2n1[i].touches(domain2n1)) {
2011 domain2n1 = lDomains2n1[i].intersect(domain2n1);
2012 domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
2013 2 * size[0] - domain2n1[0].last(), -1);
2014
2015 intersection = intersection.intersect(domain2n1);
2016
2017 solver_recv(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom, nghost, view, fd_m,
2018 true, false, false);
2019 }
2020 }
2021
2022 if (ldom.touches(y)) {
2023 auto intersection = ldom.intersect(y);
2024
2025 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
2026 (2 * size[1] - intersection[1].last()), -1);
2027
2028 ippl::NDIndex<Dim> domain2n1;
2029 domain2n1[0] = intersection[0];
2030 domain2n1[1] = ydom;
2031 domain2n1[2] = intersection[2];
2032
2033 if (lDomains2n1[i].touches(domain2n1)) {
2034 domain2n1 = lDomains2n1[i].intersect(domain2n1);
2035 domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
2036 2 * size[1] - domain2n1[1].last(), -1);
2037
2038 intersection = intersection.intersect(domain2n1);
2039
2040 solver_recv(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom, nghost, view, fd_m,
2041 false, true, false);
2042 }
2043 }
2044
2045 if (ldom.touches(z)) {
2046 auto intersection = ldom.intersect(z);
2047
2048 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
2049 (2 * size[2] - intersection[2].last()), -1);
2050
2051 ippl::NDIndex<Dim> domain2n1;
2052 domain2n1[0] = intersection[0];
2053 domain2n1[1] = intersection[1];
2054 domain2n1[2] = zdom;
2055
2056 if (lDomains2n1[i].touches(domain2n1)) {
2057 domain2n1 = lDomains2n1[i].intersect(domain2n1);
2058 domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
2059 2 * size[2] - domain2n1[2].last(), -1);
2060
2061 intersection = intersection.intersect(domain2n1);
2062
2063 solver_recv(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom, nghost, view, fd_m,
2064 false, false, true);
2065 }
2066 }
2067
2068 if (ldom.touches(xy)) {
2069 auto intersection = ldom.intersect(xy);
2070
2071 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
2072 (2 * size[0] - intersection[0].last()), -1);
2073 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
2074 (2 * size[1] - intersection[1].last()), -1);
2075
2076 ippl::NDIndex<Dim> domain2n1;
2077 domain2n1[0] = xdom;
2078 domain2n1[1] = ydom;
2079 domain2n1[2] = intersection[2];
2080
2081 if (lDomains2n1[i].touches(domain2n1)) {
2082 domain2n1 = lDomains2n1[i].intersect(domain2n1);
2083 domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
2084 2 * size[0] - domain2n1[0].last(), -1);
2085 domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
2086 2 * size[1] - domain2n1[1].last(), -1);
2087
2088 intersection = intersection.intersect(domain2n1);
2089
2090 solver_recv(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom, nghost, view, fd_m,
2091 true, true, false);
2092 }
2093 }
2094
2095 if (ldom.touches(yz)) {
2096 auto intersection = ldom.intersect(yz);
2097
2098 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
2099 (2 * size[1] - intersection[1].last()), -1);
2100 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
2101 (2 * size[2] - intersection[2].last()), -1);
2102
2103 ippl::NDIndex<Dim> domain2n1;
2104 domain2n1[0] = intersection[0];
2105 domain2n1[1] = ydom;
2106 domain2n1[2] = zdom;
2107
2108 if (lDomains2n1[i].touches(domain2n1)) {
2109 domain2n1 = lDomains2n1[i].intersect(domain2n1);
2110 domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
2111 2 * size[1] - domain2n1[1].last(), -1);
2112 domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
2113 2 * size[2] - domain2n1[2].last(), -1);
2114
2115 intersection = intersection.intersect(domain2n1);
2116
2117 solver_recv(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom, nghost, view, fd_m,
2118 false, true, true);
2119 }
2120 }
2121
2122 if (ldom.touches(xz)) {
2123 auto intersection = ldom.intersect(xz);
2124
2125 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
2126 (2 * size[0] - intersection[0].last()), -1);
2127 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
2128 (2 * size[2] - intersection[2].last()), -1);
2129
2130 ippl::NDIndex<Dim> domain2n1;
2131 domain2n1[0] = xdom;
2132 domain2n1[1] = intersection[1];
2133 domain2n1[2] = zdom;
2134
2135 if (lDomains2n1[i].touches(domain2n1)) {
2136 domain2n1 = lDomains2n1[i].intersect(domain2n1);
2137 domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
2138 2 * size[0] - domain2n1[0].last(), -1);
2139 domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
2140 2 * size[2] - domain2n1[2].last(), -1);
2141
2142 intersection = intersection.intersect(domain2n1);
2143
2144 solver_recv(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom, nghost, view, fd_m,
2145 true, false, true);
2146 }
2147 }
2148
2149 if (ldom.touches(xyz)) {
2150 auto intersection = ldom.intersect(xyz);
2151
2152 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
2153 (2 * size[0] - intersection[0].last()), -1);
2154 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
2155 (2 * size[1] - intersection[1].last()), -1);
2156 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
2157 (2 * size[2] - intersection[2].last()), -1);
2158
2159 ippl::NDIndex<Dim> domain2n1;
2160 domain2n1[0] = xdom;
2161 domain2n1[1] = ydom;
2162 domain2n1[2] = zdom;
2163
2164 if (lDomains2n1[i].touches(domain2n1)) {
2165 domain2n1 = lDomains2n1[i].intersect(domain2n1);
2166 domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
2167 2 * size[0] - domain2n1[0].last(), -1);
2168 domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
2169 2 * size[1] - domain2n1[1].last(), -1);
2170 domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
2171 2 * size[2] - domain2n1[2].last(), -1);
2172
2173 intersection = intersection.intersect(domain2n1);
2174
2175 solver_recv(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom, nghost, view, fd_m,
2176 true, true, true);
2177 }
2178 }
2179 }
2180
2181 if (requests.size() > 0) {
2182 MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
2183 }
2184 ippl::Comm->freeAllBuffers();
2185 };
2186} // namespace ippl
const double pi
ippl::FieldLayout< Dim > FieldLayout_t
Definition datatypes.h:21
void solver_recv(int TAG, int id, int i, const ippl::NDIndex< Dim > intersection, const ippl::NDIndex< Dim > ldom, int nghost, Kokkos::View< Tf *** > &view, ippl::detail::FieldBufferData< Tb > &fd, bool x=false, bool y=false, bool z=false)
void unpack_impl(const ippl::NDIndex< 3 > intersect, const Kokkos::View< Tf *** > &view, ippl::detail::FieldBufferData< Tb > &fd, int nghost, const ippl::NDIndex< 3 > ldom, size_t dim1=0, size_t dim2=0, bool x=false, bool y=false, bool z=false)
void solver_send(int TAG, int id, int i, const ippl::NDIndex< Dim > intersection, const ippl::NDIndex< Dim > ldom, int nghost, Kokkos::View< Tf *** > &view, ippl::detail::FieldBufferData< Tb > &fd, std::vector< MPI_Request > &requests)
void unpack(const ippl::NDIndex< 3 > intersect, const Kokkos::View< Tf *** > &view, ippl::detail::FieldBufferData< Tb > &fd, int nghost, const ippl::NDIndex< 3 > ldom, bool x=false, bool y=false, bool z=false)
void pack(const ippl::NDIndex< 3 > intersect, Kokkos::View< Tf *** > &view, ippl::detail::FieldBufferData< Tb > &fd, int nghost, const ippl::NDIndex< 3 > ldom, ippl::mpi::Communicator::size_type &nsends)
constexpr KOKKOS_INLINE_FUNCTION auto first()
Definition AbsorbingBC.h:10
Definition Archive.h:20
void initialize(int &argc, char *argv[], MPI_Comm comm)
Definition Ippl.cpp:16
detail::meta_hess< Field > hess(Field &u)
@ FORWARD
Definition FFT.h:63
@ BACKWARD
Definition FFT.h:64
detail::meta_grad< Field > grad(Field &u)
std::unique_ptr< mpi::Communicator > Comm
Definition Ippl.h:22
KOKKOS_INLINE_FUNCTION auto & get(Tuple< Ts... > &t)
Accessor function to get an element mutable reference at a specific index from a Tuple.
Definition Tuple.h:314
@ OPEN_SOLVER
Definition Tags.h:40
@ VICO_SOLVER
Definition Tags.h:41
std::shared_ptr< archive_type< MemorySpace > > buffer_type
detail::size_type size_type
KOKKOS_INLINE_FUNCTION bool touches(const NDIndex< Dim > &) const
Definition NDIndex.hpp:95
KOKKOS_INLINE_FUNCTION Vector< int, Dim > last() const
Definition NDIndex.hpp:178
KOKKOS_INLINE_FUNCTION unsigned size() const noexcept
Definition NDIndex.hpp:43
KOKKOS_INLINE_FUNCTION Vector< int, Dim > first() const
Definition NDIndex.hpp:170
KOKKOS_INLINE_FUNCTION NDIndex< Dim > intersect(const NDIndex< Dim > &) const
Definition NDIndex.hpp:70
KOKKOS_INLINE_FUNCTION Vector< size_t, Dim > length() const
Definition NDIndex.hpp:162
typename FieldLHS::value_type::value_type Tg
std::unique_ptr< mesh_type > mesh2n1_m
typename mesh_type::vector_type vector_type
typename FieldLHS::memory_space memory_space
std::unique_ptr< FieldLayout_t > layout2_m
typename FieldRHS::value_type Trhs
std::unique_ptr< FieldLayout_t > layout4_m
std::unique_ptr< mesh_type > mesh2_m
virtual void setDefaultParameters() override
std::unique_ptr< FFT< CCTransform, CxField_gt > > fft4n_m
std::unique_ptr< mesh_type > meshComplex_m
Poisson< FieldLHS, FieldRHS > Base
std::unique_ptr< FFT< Cos1Transform, Field_t > > fft2n1_m
std::unique_ptr< FieldLayout_t > layout2n1_m
std::unique_ptr< FFT_t > fft_m
static constexpr unsigned Dim
typename FieldLHS::Mesh_t mesh_type
detail::FieldBufferData< Trhs > fd_m
void setRhs(rhs_type &rhs) override
std::unique_ptr< FieldLayout_t > layoutComplex_m
std::unique_ptr< mesh_type > mesh4_m
mpi::Communicator::buffer_type< memory_space > buffer_type
typename mesh_type::value_type scalar_type
void communicateVico(Vector< int, Dim > size, typename CxField_gt::view_type view_g, const ippl::NDIndex< Dim > ldom_g, const int nghost_g, typename Field_t::view_type view, const ippl::NDIndex< Dim > ldom, const int nghost)
lhs_type * lhs_mp
Definition Poisson.h:123
ParameterList params_m
Definition Poisson.h:120
void setLhs(lhs_type &lhs)
Definition Poisson.h:90
virtual void setRhs(rhs_type &rhs)
Definition Poisson.h:96
rhs_type * rhs_mp
Definition Poisson.h:122
Timing::TimerRef TimerRef
static TimerRef getTimer(const char *nm)
static void stopTimer(TimerRef t)
static void startTimer(TimerRef t)