Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
operators.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
37 use num_types, only : rp, i8
38 use opr_cpu, only : opr_cpu_cfl, opr_cpu_curl, opr_cpu_opgrad, &
39 opr_cpu_conv1, opr_cpu_convect_scalar, opr_cpu_cdtp, &
40 opr_cpu_dudxyz, opr_cpu_lambda2, opr_cpu_set_convect_rst
41 use opr_sx, only : opr_sx_cfl, opr_sx_curl, opr_sx_opgrad, &
42 opr_sx_conv1, opr_sx_convect_scalar, opr_sx_cdtp, &
43 opr_sx_dudxyz, opr_sx_lambda2, opr_sx_set_convect_rst
50 use space, only : space_t
51 use coefs, only : coef_t
52 use field, only : field_t
53 use field_list, only : field_list_t
54 use field_math, only : field_rzero
56 use math, only : glsum, cmult, add2, add3s2, cadd, copy, col2, invcol2, &
64 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_max, mpi_sum
65 use, intrinsic :: iso_c_binding, only : c_ptr
66 implicit none
67 private
68
71
72contains
73
81 subroutine dudxyz (du, u, dr, ds, dt, coef)
82 type(coef_t), intent(in), target :: coef
83 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: du
84 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: u, dr, ds, dt
85
86 if (neko_bcknd_sx .eq. 1) then
87 call opr_sx_dudxyz(du, u, dr, ds, dt, coef)
88 else if (neko_bcknd_xsmm .eq. 1) then
89 call opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
90 else if (neko_bcknd_device .eq. 1) then
91 call opr_device_dudxyz(du, u, dr, ds, dt, coef)
92 else
93 call opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
94 end if
95
96 end subroutine dudxyz
97
104 subroutine div(res, ux, uy, uz, coef)
105 type(coef_t), intent(in), target :: coef
106 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: res
107 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: ux, uy, uz
108 type(field_t), pointer :: work
109 integer :: ind
110 type(c_ptr) :: res_d
111
112 if (neko_bcknd_device .eq. 1) then
113 res_d = device_get_ptr(res)
114 end if
115
116 call neko_scratch_registry%request_field(work, ind)
117
118 ! Get dux / dx
119 call dudxyz(res, ux, coef%drdx, coef%dsdx, coef%dtdx, coef)
120
121 ! Get duy / dy
122 call dudxyz(work%x, uy, coef%drdy, coef%dsdy, coef%dtdy, coef)
123 if (neko_bcknd_device .eq. 1) then
124 call device_add2(res_d, work%x_d, work%size())
125 else
126 call add2(res, work%x, work%size())
127 end if
128
129 ! Get dux / dz
130 call dudxyz(work%x, uz, coef%drdz, coef%dsdz, coef%dtdz, coef)
131 if (neko_bcknd_device .eq. 1) then
132 call device_add2(res_d, work%x_d, work%size())
133 else
134 call add2(res, work%x, work%size())
135 end if
136
137 call neko_scratch_registry%relinquish_field(ind)
138
139 end subroutine div
140
147 subroutine grad(ux, uy, uz, u, coef)
148 type(coef_t), intent(in) :: coef
149 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
150 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
151 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
152 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
153
154 call dudxyz(ux, u, coef%drdx, coef%dsdx, coef%dtdx, coef)
155 call dudxyz(uy, u, coef%drdy, coef%dsdy, coef%dtdy, coef)
156 call dudxyz(uz, u, coef%drdz, coef%dsdz, coef%dtdz, coef)
157
158 end subroutine grad
159
172 subroutine opgrad(ux, uy, uz, u, coef, es, ee)
173 type(coef_t), intent(in) :: coef
174 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
175 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
176 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
177 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
178 integer, optional :: es, ee
179 integer :: eblk_start, eblk_end
180
181 if (present(es)) then
182 eblk_start = es
183 else
184 eblk_start = 1
185 end if
186
187 if (present(ee)) then
188 eblk_end = ee
189 else
190 eblk_end = coef%msh%nelv
191 end if
192
193 if (neko_bcknd_sx .eq. 1) then
194 call opr_sx_opgrad(ux, uy, uz, u, coef)
195 else if (neko_bcknd_xsmm .eq. 1) then
196 call opr_xsmm_opgrad(ux, uy, uz, u, coef)
197 else if (neko_bcknd_device .eq. 1) then
198 call opr_device_opgrad(ux, uy, uz, u, coef)
199 else
200 call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end)
201 end if
202
203 end subroutine opgrad
204
209 subroutine ortho(x, glb_n_points, n)
210 integer, intent(in) :: n
211 integer(kind=i8), intent(in) :: glb_n_points
212 real(kind=rp), dimension(n), intent(inout) :: x
213 real(kind=rp) :: c
214 type(c_ptr) :: x_d
215 if (neko_bcknd_device .eq. 1) then
216 x_d = device_get_ptr(x)
217 c = device_glsum(x_d, n)/glb_n_points
218 call device_cadd(x_d, -c, n)
219 else
220 c = glsum(x, n)/glb_n_points
221 call cadd(x, -c, n)
222 end if
223
224 end subroutine ortho
225
237 subroutine cdtp (dtx, x, dr, ds, dt, coef, es, ee)
238 type(coef_t), intent(in) :: coef
239 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
240 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
241 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
242 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
243 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
244 integer, optional :: es, ee
245 integer :: eblk_start, eblk_end
246
247 if (present(es)) then
248 eblk_start = es
249 else
250 eblk_start = 1
251 end if
252
253 if (present(ee)) then
254 eblk_end = ee
255 else
256 eblk_end = coef%msh%nelv
257 end if
258
259 if (neko_bcknd_sx .eq. 1) then
260 call opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
261 else if (neko_bcknd_xsmm .eq. 1) then
262 call opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
263 else if (neko_bcknd_device .eq. 1) then
264 call opr_device_cdtp(dtx, x, dr, ds, dt, coef)
265 else
266 call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, eblk_start, eblk_end)
267 end if
268
269 end subroutine cdtp
270
281 subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
282 type(space_t), intent(in) :: xh
283 type(coef_t), intent(in) :: coef
284 real(kind=rp), intent(inout) :: du(xh%lxyz, coef%msh%nelv)
285 real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
286 real(kind=rp), intent(inout) :: vx(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
287 real(kind=rp), intent(inout) :: vy(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
288 real(kind=rp), intent(inout) :: vz(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
289 integer, optional :: es, ee
290 integer :: eblk_end, eblk_start
291
292 associate(nelv => coef%msh%nelv, gdim => coef%msh%gdim)
293 if (present(es)) then
294 eblk_start = es
295 else
296 eblk_start = 1
297 end if
298
299 if (present(ee)) then
300 eblk_end = ee
301 else
302 eblk_end = coef%msh%nelv
303 end if
304
305 if (neko_bcknd_sx .eq. 1) then
306 call opr_sx_conv1(du, u, vx, vy, vz, xh, coef, nelv)
307 else if (neko_bcknd_xsmm .eq. 1) then
308 call opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
309 else if (neko_bcknd_device .eq. 1) then
310 call opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
311 else
312 call opr_cpu_conv1(du, u, vx, vy, vz, xh, coef, eblk_start, eblk_end)
313 end if
314 end associate
315
316 end subroutine conv1
317
335 subroutine convect_scalar(du, u, cr, cs, ct, Xh_GLL, Xh_GL, coef_GLL, &
336 coef_GL, GLL_to_GL)
337 type(space_t), intent(in) :: xh_gl
338 type(space_t), intent(in) :: xh_gll
339 type(coef_t), intent(in) :: coef_GLL
340 type(coef_t), intent(in) :: coef_GL
341 type(interpolator_t), intent(inout) :: GLL_to_GL
342 real(kind=rp), intent(inout) :: &
343 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
344 real(kind=rp), intent(inout) :: &
345 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
346 type(field_t), intent(inout) :: cr, cs, ct
347 type(c_ptr) :: u_d
348
349 if (neko_bcknd_sx .eq. 1) then
350 call opr_sx_convect_scalar(du, u, cr%x, cs%x, ct%x, &
351 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
352 else if (neko_bcknd_xsmm .eq. 1) then
353 call opr_xsmm_convect_scalar(du, u, cr%x, cs%x, ct%x, &
354 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
355 else if (neko_bcknd_device .eq. 1) then
356 u_d = device_get_ptr(u)
357 call opr_device_convect_scalar(du, u_d, cr%x_d, cs%x_d, ct%x_d, &
358 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
359 else
360 call opr_cpu_convect_scalar(du, u, cr%x, cs%x, ct%x, &
361 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
362 end if
363
364 end subroutine convect_scalar
365
366 !! Compute the curl fo a vector field.
367 !! @param w1 Will store the x component of the curl.
368 !! @param w2 Will store the y component of the curl.
369 !! @param w3 Will store the z component of the curl.
370 !! @param u1 The x component of the vector field.
371 !! @param u2 The y component of the vector field.
372 !! @param u3 The z component of the vector field.
373 !! @param work1 A temporary array for computations.
374 !! @param work2 A temporary array for computations.
375 !! @param coef The SEM coefficients.
376 subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
377 type(field_t), intent(inout) :: w1
378 type(field_t), intent(inout) :: w2
379 type(field_t), intent(inout) :: w3
380 type(field_t), intent(in) :: u1
381 type(field_t), intent(in) :: u2
382 type(field_t), intent(in) :: u3
383 type(field_t), intent(inout) :: work1
384 type(field_t), intent(inout) :: work2
385 type(coef_t), intent(in) :: coef
386 type(c_ptr), optional, intent(inout) :: event
387
388 if (neko_bcknd_sx .eq. 1) then
389 call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
390 else if (neko_bcknd_xsmm .eq. 1) then
391 call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
392 else if (neko_bcknd_device .eq. 1) then
393 if (present(event)) then
394 call opr_device_curl(w1, w2, w3, u1, u2, u3, &
395 work1, work2, coef, event)
396 else
397 call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
398 end if
399 else
400 call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
401 end if
402
403 end subroutine curl
404
405 !! Compute the CFL number
406 !! @param dt The timestep.
407 !! @param u The x component of velocity.
408 !! @param v The y component of velocity.
409 !! @param w The z component of velocity.
410 !! @param Xh The SEM function space.
411 !! @param coef The SEM coefficients.
412 !! @param nelv The total number of elements.
413 !! @param gdim Number of geometric dimensions.
414 function cfl(dt, u, v, w, Xh, coef, nelv, gdim)
415 type(space_t), intent(in) :: xh
416 type(coef_t), intent(in) :: coef
417 integer, intent(in) :: nelv, gdim
418 real(kind=rp), intent(in) :: dt
419 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv), intent(in) :: u, v, w
420 real(kind=rp) :: cfl
421 integer :: ierr
422
423 if (neko_bcknd_sx .eq. 1) then
424 cfl = opr_sx_cfl(dt, u, v, w, xh, coef, nelv)
425 else if (neko_bcknd_device .eq. 1) then
426 cfl = opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
427 else
428 cfl = opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
429 end if
430
431 if (.not. neko_device_mpi) then
432 call mpi_allreduce(mpi_in_place, cfl, 1, &
433 mpi_real_precision, mpi_max, neko_comm, ierr)
434 end if
435
436 end function cfl
437
438 !! Compute the CFL number for compressible flows
439 !! @param dt The timestep.
440 !! @param max_wave_speed The precomputed maximum wave speed field.
441 !! @param Xh The SEM function space.
442 !! @param coef The SEM coefficients.
443 !! @param nelv The total number of elements.
444 !! @param gdim Number of geometric dimensions.
445 function cfl_compressible(dt, max_wave_speed, Xh, coef, nelv, gdim)
446 type(space_t), intent(in) :: xh
447 type(coef_t), intent(in) :: coef
448 integer, intent(in) :: nelv, gdim
449 real(kind=rp), intent(in) :: dt
450 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv), intent(in) :: max_wave_speed
451 real(kind=rp) :: cfl_compressible
452 integer :: ierr, n
453 type(field_t), pointer :: zero_vector
454 integer :: ind
455
456 n = xh%lx * xh%ly * xh%lz * nelv
457
458 ! Request a scratch field for zero vector
459 call neko_scratch_registry%request_field(zero_vector, ind)
460
461 ! Initialize zero vector
462 call field_rzero(zero_vector)
463
464 ! Use incompressible CFL with max_wave_speed as u-component, zero v and w
465 cfl_compressible = cfl(dt, max_wave_speed, zero_vector%x, zero_vector%x, xh, coef, nelv, gdim)
466
467 ! Release the scratch field
468 call neko_scratch_registry%relinquish_field(ind)
469
470 end function cfl_compressible
471
484 subroutine strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
485 type(field_t), intent(in) :: u, v, w
486 type(coef_t), intent(in) :: coef
487 real(kind=rp), intent(inout) :: s11(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
488 real(kind=rp), intent(inout) :: s22(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
489 real(kind=rp), intent(inout) :: s33(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
490 real(kind=rp), intent(inout) :: s12(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
491 real(kind=rp), intent(inout) :: s13(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
492 real(kind=rp), intent(inout) :: s23(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
493
494 type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d
495
496 integer :: nelv, lxyz
497
498 if (neko_bcknd_device .eq. 1) then
499 s11_d = device_get_ptr(s11)
500 s22_d = device_get_ptr(s22)
501 s33_d = device_get_ptr(s33)
502 s12_d = device_get_ptr(s12)
503 s23_d = device_get_ptr(s23)
504 s13_d = device_get_ptr(s13)
505 end if
506
507 nelv = u%msh%nelv
508 lxyz = u%Xh%lxyz
509
510 ! we use s11 as a work array here
511 call dudxyz (s12, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
512 call dudxyz (s11, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
513 if (neko_bcknd_device .eq. 1) then
514 call device_add2(s12_d, s11_d, nelv*lxyz)
515 else
516 call add2(s12, s11, nelv*lxyz)
517 end if
518
519 call dudxyz (s13, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
520 call dudxyz (s11, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
521 if (neko_bcknd_device .eq. 1) then
522 call device_add2(s13_d, s11_d, nelv*lxyz)
523 else
524 call add2(s13, s11, nelv*lxyz)
525 end if
526
527 call dudxyz (s23, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
528 call dudxyz (s11, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
529 if (neko_bcknd_device .eq. 1) then
530 call device_add2(s23_d, s11_d, nelv*lxyz)
531 else
532 call add2(s23, s11, nelv*lxyz)
533 end if
534
535 call dudxyz (s11, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
536 call dudxyz (s22, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
537 call dudxyz (s33, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
538
539 if (neko_bcknd_device .eq. 1) then
540 call device_cmult(s12_d, 0.5_rp, nelv*lxyz)
541 call device_cmult(s13_d, 0.5_rp, nelv*lxyz)
542 call device_cmult(s23_d, 0.5_rp, nelv*lxyz)
543 else
544 call cmult(s12, 0.5_rp, nelv*lxyz)
545 call cmult(s13, 0.5_rp, nelv*lxyz)
546 call cmult(s23, 0.5_rp, nelv*lxyz)
547 end if
548
549 end subroutine strain_rate
550
557 subroutine lambda2op(lambda2, u, v, w, coef)
558 type(coef_t), intent(in) :: coef
559 type(field_t), intent(inout) :: lambda2
560 type(field_t), intent(in) :: u, v, w
562 if (neko_bcknd_sx .eq. 1) then
563 call opr_sx_lambda2(lambda2, u, v, w, coef)
564 else if (neko_bcknd_device .eq. 1) then
565 call opr_device_lambda2(lambda2, u, v, w, coef)
566 else
567 call opr_cpu_lambda2(lambda2, u, v, w, coef)
568 end if
569
570 end subroutine lambda2op
571
582 subroutine set_convect_rst(cr, cs, ct, cx, cy, cz, Xh, coef)
583 type(space_t), intent(inout) :: xh
584 type(coef_t), intent(inout) :: coef
585 type(field_t), intent(inout) :: cr, cs, ct
586 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
587 intent(in) :: cx, cy, cz
588 type(c_ptr) :: cx_d, cy_d, cz_d
589
590 if (neko_bcknd_sx .eq. 1) then
591 call opr_sx_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
592 else if (neko_bcknd_xsmm .eq. 1) then
593 call opr_xsmm_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
594 else if (neko_bcknd_device .eq. 1) then
595 cx_d = device_get_ptr(cx)
596 cy_d = device_get_ptr(cy)
597 cz_d = device_get_ptr(cz)
598 call opr_device_set_convect_rst(cr%x_d, cs%x_d, ct%x_d, &
599 cx_d, cy_d, cz_d, xh, coef)
600 else
601 call opr_cpu_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
602 end if
603
604 end subroutine set_convect_rst
605
621 subroutine runge_kutta(phi, conv_k1, conv_k23, conv_k4, Xh_GLL, Xh_GL, &
622 coef, coef_GL, GLL_to_GL, tau, dtau, n, nel, n_GL)
623 type(space_t), intent(in) :: xh_gll
624 type(space_t), intent(inout) :: xh_gl
625 type(coef_t), intent(in) :: coef
626 type(coef_t), intent(inout) :: coef_gl
627 type(interpolator_t) :: gll_to_gl
628 real(kind=rp), intent(inout) :: tau, dtau
629 integer, intent(in) :: n, nel, n_gl
630 type(field_t), intent(inout) :: phi
631 type(field_list_t) :: conv_k1, conv_k23, conv_k4
632 real(kind=rp) :: c1, c2, c3
633 type(field_t), pointer :: u1, k1, k2, k3, k4
634 real(kind=rp), dimension(n_GL) :: u1_gl
635 integer :: ind(5), i, e
636 type(c_ptr) :: u1_gl_d
637
638 call neko_scratch_registry%request_field(u1, ind(1))
639 call neko_scratch_registry%request_field(k1, ind(2))
640 call neko_scratch_registry%request_field(k2, ind(3))
641 call neko_scratch_registry%request_field(k3, ind(4))
642 call neko_scratch_registry%request_field(k4, ind(5))
643
644 c1 = 1.0_rp
645 c2 = -dtau/2.
646 c3 = -dtau
647
648 if (neko_bcknd_device .eq. 1) then
649 call device_map(u1_gl, u1_gl_d, n_gl)
650
651 ! Stage 1:
652 call device_invcol3(u1%x_d, phi%x_d, coef%B_d, n)
653 call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
654 call convect_scalar(k1%x, u1_gl, conv_k1%items(1)%ptr, &
655 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
656 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
657 call device_col2(k1%x_d, coef%B_d, n)
658
659 ! Stage 2:
660 call device_add3s2(u1%x_d, phi%x_d, k1%x_d, c1, c2, n)
661 call device_invcol2(u1%x_d, coef%B_d, n)
662 call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
663 call convect_scalar(k2%x, u1_gl, conv_k23%items(1)%ptr, &
664 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
665 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
666 call device_col2(k2%x_d, coef%B_d, n)
667
668 ! Stage 3:
669 call device_add3s2(u1%x_d, phi%x_d, k2%x_d, c1, c2, n)
670 call device_invcol2(u1%x_d, coef%B_d, n)
671 call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
672 call convect_scalar(k3%x, u1_gl, conv_k23%items(1)%ptr, &
673 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
674 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
675 call device_col2(k3%x_d, coef%B_d, n)
676
677 ! Stage 4:
678 call device_add3s2(u1%x_d, phi%x_d, k3%x_d, c1, c3, n)
679 call device_invcol2(u1%x_d, coef%B_d, n)
680 call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
681 call convect_scalar(k4%x, u1_gl, conv_k4%items(1)%ptr, &
682 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
683 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
684 call device_col2(k4%x_d, coef%B_d, n)
685
686 c1 = -dtau/6.
687 c2 = -dtau/3.
688
689 call device_add5s4(phi%x_d, k1%x_d, k2%x_d, k3%x_d, k4%x_d, &
690 c1, c2, c2, c1, n)
691
692 call device_free(u1_gl_d)
693
694 else
695
696 ! Stage 1:
697 call invcol3(u1%x, phi%x, coef%B, n)
698 call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
699 call convect_scalar(k1%x, u1_gl, conv_k1%items(1)%ptr, &
700 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
701 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
702 call col2(k1%x, coef%B, n)
703
704 ! Stage 2:
705 call add3s2(u1%x, phi%x, k1%x, c1, c2, n)
706 call invcol2(u1%x, coef%B, n)
707 call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
708 call convect_scalar(k2%x, u1_gl, conv_k23%items(1)%ptr, &
709 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
710 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
711 call col2(k2%x, coef%B, n)
712
713 ! Stage 3:
714 call add3s2(u1%x, phi%x, k2%x, c1, c2, n)
715 call invcol2(u1%x, coef%B, n)
716 call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
717 call convect_scalar(k3%x, u1_gl, conv_k23%items(1)%ptr, &
718 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
719 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
720 call col2(k3%x, coef%B, n)
721
722 ! Stage 4:
723 call add3s2(u1%x, phi%x, k3%x, c1, c3, n)
724 call invcol2(u1%x, coef%B, n)
725 call gll_to_gl%map(u1_gl, u1%x, nel, xh_gl)
726 call convect_scalar(k4%x, u1_gl, conv_k4%items(1)%ptr, &
727 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
728 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
729 call col2(k4%x, coef%B, n)
730
731 c1 = -dtau/6.
732 c2 = -dtau/3.
733 call add5s4(phi%x, k1%x, k2%x, k3%x, k4%x, c1, c2, c2, c1, n)
734 end if
735
736 call neko_scratch_registry%relinquish_field(ind)
737
738 end subroutine runge_kutta
739
740end module operators
741
Return the device pointer for an associated Fortran array.
Definition device.F90:96
Map a Fortran array to a device (allocate and associate)
Definition device.F90:72
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:50
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:42
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:214
subroutine, public field_rzero(a, n)
Zero a real vector.
Defines a field.
Definition field.f90:34
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:411
subroutine, public invcol2(a, b, n)
Vector division .
Definition math.f90:840
subroutine, public cadd(a, s, n)
Add a scalar to vector .
Definition math.f90:462
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:499
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public invcol3(a, b, c, n)
Invert a vector .
Definition math.f90:626
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:895
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:854
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
Returns .
Definition math.f90:926
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_xsmm
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Transforms the convecting velocity field to the rst form of the GL space.
subroutine, public ortho(x, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
real(kind=rp) function, public cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Apply the convecting velocity c to the to the scalar field u, used in the OIFS scheme.
subroutine, public div(res, ux, uy, uz, coef)
Compute the divergence of a vector field.
real(kind=rp) function, public cfl_compressible(dt, max_wave_speed, xh, coef, nelv, gdim)
subroutine, public conv1(du, u, vx, vy, vz, xh, coef, es, ee)
Compute the advection term.
subroutine, public grad(ux, uy, uz, u, coef)
Compute the gradient of a scalar field.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine, public strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine, public lambda2op(lambda2, u, v, w, coef)
Compute the Lambda2 field for a given velocity field.
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine, public dudxyz(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
Definition operators.f90:82
subroutine, public runge_kutta(phi, conv_k1, conv_k23, conv_k4, xh_gll, xh_gl, coef, coef_gl, gll_to_gl, tau, dtau, n, nel, n_gl)
Compute one step of Runge Kutta time interpolation for OIFS scheme.
Operators CPU backend.
Definition opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:126
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:177
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:237
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Operators SX-Aurora backend.
Definition opr_sx.f90:2
subroutine, public opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_sx.f90:102
Operators libxsmm backend.
Definition opr_xsmm.F90:61
subroutine, public opr_xsmm_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Definition opr_xsmm.F90:371
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
Definition opr_xsmm.F90:297
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
Definition opr_xsmm.F90:242
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_xsmm.F90:418
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
Definition opr_xsmm.F90:92
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
Definition opr_xsmm.F90:146
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Definition opr_xsmm.F90:469
Defines a registry for storing and requesting temporary fields This can be used when you have a funct...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
field_list_t, To be able to group fields together
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:62