Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
operators.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
37 use num_types, only : rp, i8
38 use opr_cpu, only : opr_cpu_cfl, opr_cpu_curl, opr_cpu_opgrad, &
39 opr_cpu_conv1, opr_cpu_convect_scalar, opr_cpu_cdtp, &
40 opr_cpu_dudxyz, opr_cpu_lambda2, opr_cpu_set_convect_rst, &
42 use opr_sx, only : opr_sx_cfl, opr_sx_curl, opr_sx_opgrad, &
43 opr_sx_conv1, opr_sx_convect_scalar, opr_sx_cdtp, &
44 opr_sx_dudxyz, opr_sx_lambda2, opr_sx_set_convect_rst
52 use space, only : space_t
53 use coefs, only : coef_t
54 use field, only : field_t
55 use field_list, only : field_list_t
56 use field_math, only : field_rzero
58 use math, only : glsum, cmult, add2, add3s2, cadd, copy, col2, invcol2, &
65 use vector, only : vector_t
67 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_max, mpi_sum
68 use, intrinsic :: iso_c_binding, only : c_ptr
69 implicit none
70 private
71
74
75 interface rotate_cyc
76 module procedure rotate_cyc_r1
77 module procedure rotate_cyc_r4
78 end interface rotate_cyc
79
80contains
81
89 subroutine dudxyz (du, u, dr, ds, dt, coef)
90 type(coef_t), intent(in), target :: coef
91 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: du
92 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: u, dr, ds, dt
93
94 if (neko_bcknd_sx .eq. 1) then
95 call opr_sx_dudxyz(du, u, dr, ds, dt, coef)
96 else if (neko_bcknd_xsmm .eq. 1) then
97 call opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
98 else if (neko_bcknd_device .eq. 1) then
99 call opr_device_dudxyz(du, u, dr, ds, dt, coef)
100 else
101 call opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
102 end if
103
104 end subroutine dudxyz
105
112 subroutine div(res, ux, uy, uz, coef)
113 type(coef_t), intent(in), target :: coef
114 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: res
115 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: ux, uy, uz
116 type(field_t), pointer :: work
117 integer :: ind
118 type(c_ptr) :: res_d
119
120 if (neko_bcknd_device .eq. 1) then
121 res_d = device_get_ptr(res)
122 end if
123
124 call neko_scratch_registry%request_field(work, ind, .false.)
125
126 ! Get dux / dx
127 call dudxyz(res, ux, coef%drdx, coef%dsdx, coef%dtdx, coef)
128
129 ! Get duy / dy
130 call dudxyz(work%x, uy, coef%drdy, coef%dsdy, coef%dtdy, coef)
131 if (neko_bcknd_device .eq. 1) then
132 call device_add2(res_d, work%x_d, work%size())
133 else
134 call add2(res, work%x, work%size())
135 end if
136
137 ! Get dux / dz
138 call dudxyz(work%x, uz, coef%drdz, coef%dsdz, coef%dtdz, coef)
139 if (neko_bcknd_device .eq. 1) then
140 call device_add2(res_d, work%x_d, work%size())
141 else
142 call add2(res, work%x, work%size())
143 end if
144
145 call neko_scratch_registry%relinquish_field(ind)
146
147 end subroutine div
148
155 subroutine grad(ux, uy, uz, u, coef)
156 type(coef_t), intent(in) :: coef
157 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
158 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
159 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
160 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
161
162 call dudxyz(ux, u, coef%drdx, coef%dsdx, coef%dtdx, coef)
163 call dudxyz(uy, u, coef%drdy, coef%dsdy, coef%dtdy, coef)
164 call dudxyz(uz, u, coef%drdz, coef%dsdz, coef%dtdz, coef)
165
166 end subroutine grad
167
180 subroutine opgrad(ux, uy, uz, u, coef, es, ee)
181 type(coef_t), intent(in) :: coef
182 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
183 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
184 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
185 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
186 integer, optional :: es, ee
187 integer :: eblk_start, eblk_end
188
189 if (present(es)) then
190 eblk_start = es
191 else
192 eblk_start = 1
193 end if
194
195 if (present(ee)) then
196 eblk_end = ee
197 else
198 eblk_end = coef%msh%nelv
199 end if
200
201 if (neko_bcknd_sx .eq. 1) then
202 call opr_sx_opgrad(ux, uy, uz, u, coef)
203 else if (neko_bcknd_xsmm .eq. 1) then
204 call opr_xsmm_opgrad(ux, uy, uz, u, coef)
205 else if (neko_bcknd_device .eq. 1) then
206 call opr_device_opgrad(ux, uy, uz, u, coef)
207 else
208 call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end)
209 end if
210
211 end subroutine opgrad
212
217 subroutine ortho(x, glb_n_points, n)
218 integer, intent(in) :: n
219 integer(kind=i8), intent(in) :: glb_n_points
220 real(kind=rp), dimension(n), intent(inout) :: x
221 real(kind=rp) :: c
222 type(c_ptr) :: x_d
223 if (neko_bcknd_device .eq. 1) then
224 x_d = device_get_ptr(x)
225 c = device_glsum(x_d, n)/glb_n_points
226 call device_cadd(x_d, -c, n)
227 else
228 c = glsum(x, n)/glb_n_points
229 call cadd(x, -c, n)
230 end if
231
232 end subroutine ortho
233
245 subroutine cdtp (dtx, x, dr, ds, dt, coef, es, ee)
246 type(coef_t), intent(in) :: coef
247 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
248 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
249 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
250 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
251 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
252 integer, optional :: es, ee
253 integer :: eblk_start, eblk_end
254
255 if (present(es)) then
256 eblk_start = es
257 else
258 eblk_start = 1
259 end if
260
261 if (present(ee)) then
262 eblk_end = ee
263 else
264 eblk_end = coef%msh%nelv
265 end if
266
267 if (neko_bcknd_sx .eq. 1) then
268 call opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
269 else if (neko_bcknd_xsmm .eq. 1) then
270 call opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
271 else if (neko_bcknd_device .eq. 1) then
272 call opr_device_cdtp(dtx, x, dr, ds, dt, coef)
273 else
274 call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, eblk_start, eblk_end)
275 end if
276
277 end subroutine cdtp
278
289 subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
290 type(space_t), intent(in) :: xh
291 type(coef_t), intent(in) :: coef
292 real(kind=rp), intent(inout) :: du(xh%lxyz, coef%msh%nelv)
293 real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
294 real(kind=rp), intent(inout) :: vx(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
295 real(kind=rp), intent(inout) :: vy(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
296 real(kind=rp), intent(inout) :: vz(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
297 integer, optional :: es, ee
298 integer :: eblk_end, eblk_start
299
300 associate(nelv => coef%msh%nelv, gdim => coef%msh%gdim)
301 if (present(es)) then
302 eblk_start = es
303 else
304 eblk_start = 1
305 end if
306
307 if (present(ee)) then
308 eblk_end = ee
309 else
310 eblk_end = coef%msh%nelv
311 end if
312
313 if (neko_bcknd_sx .eq. 1) then
314 call opr_sx_conv1(du, u, vx, vy, vz, xh, coef, nelv)
315 else if (neko_bcknd_xsmm .eq. 1) then
316 call opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
317 else if (neko_bcknd_device .eq. 1) then
318 call opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
319 else
320 call opr_cpu_conv1(du, u, vx, vy, vz, xh, coef, eblk_start, eblk_end)
321 end if
322 end associate
323
324 end subroutine conv1
325
343 subroutine convect_scalar(du, u, cr, cs, ct, Xh_GLL, Xh_GL, coef_GLL, &
344 coef_GL, GLL_to_GL)
345 type(space_t), intent(in) :: xh_gl
346 type(space_t), intent(in) :: xh_gll
347 type(coef_t), intent(in) :: coef_GLL
348 type(coef_t), intent(in) :: coef_GL
349 type(interpolator_t), intent(inout) :: GLL_to_GL
350 real(kind=rp), intent(inout) :: &
351 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
352 real(kind=rp), intent(inout) :: &
353 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
354 type(field_t), intent(inout) :: cr, cs, ct
355 type(c_ptr) :: u_d
356
357 if (neko_bcknd_sx .eq. 1) then
358 call opr_sx_convect_scalar(du, u, cr%x, cs%x, ct%x, &
359 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
360 else if (neko_bcknd_xsmm .eq. 1) then
361 call opr_xsmm_convect_scalar(du, u, cr%x, cs%x, ct%x, &
362 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
363 else if (neko_bcknd_device .eq. 1) then
364 u_d = device_get_ptr(u)
365 call opr_device_convect_scalar(du, u_d, cr%x_d, cs%x_d, ct%x_d, &
366 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
367 else
368 call opr_cpu_convect_scalar(du, u, cr%x, cs%x, ct%x, &
369 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
370 end if
371
372 end subroutine convect_scalar
373
374 !! Compute the curl fo a vector field.
375 !! @param w1 Will store the x component of the curl.
376 !! @param w2 Will store the y component of the curl.
377 !! @param w3 Will store the z component of the curl.
378 !! @param u1 The x component of the vector field.
379 !! @param u2 The y component of the vector field.
380 !! @param u3 The z component of the vector field.
381 !! @param work1 A temporary array for computations.
382 !! @param work2 A temporary array for computations.
383 !! @param coef The SEM coefficients.
384 subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
385 type(field_t), intent(inout) :: w1
386 type(field_t), intent(inout) :: w2
387 type(field_t), intent(inout) :: w3
388 type(field_t), intent(in) :: u1
389 type(field_t), intent(in) :: u2
390 type(field_t), intent(in) :: u3
391 type(field_t), intent(inout) :: work1
392 type(field_t), intent(inout) :: work2
393 type(coef_t), intent(in) :: coef
394 type(c_ptr), optional, intent(inout) :: event
395
396 if (neko_bcknd_sx .eq. 1) then
397 call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
398 else if (neko_bcknd_xsmm .eq. 1) then
399 call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
400 else if (neko_bcknd_device .eq. 1) then
401 if (present(event)) then
402 call opr_device_curl(w1, w2, w3, u1, u2, u3, &
403 work1, work2, coef, event)
404 else
405 call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
406 end if
407 else
408 call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
409 end if
410
411 end subroutine curl
412
413 !! Compute the CFL number
414 !! @param dt The timestep.
415 !! @param u The x component of velocity.
416 !! @param v The y component of velocity.
417 !! @param w The z component of velocity.
418 !! @param Xh The SEM function space.
419 !! @param coef The SEM coefficients.
420 !! @param nelv The total number of elements.
421 !! @param gdim Number of geometric dimensions.
422 function cfl(dt, u, v, w, Xh, coef, nelv, gdim)
423 type(space_t), intent(in) :: xh
424 type(coef_t), intent(in) :: coef
425 integer, intent(in) :: nelv, gdim
426 real(kind=rp), intent(in) :: dt
427 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv), intent(in) :: u, v, w
428 real(kind=rp) :: cfl
429 integer :: ierr
430
431 if (neko_bcknd_sx .eq. 1) then
432 cfl = opr_sx_cfl(dt, u, v, w, xh, coef, nelv)
433 else if (neko_bcknd_device .eq. 1) then
434 cfl = opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
435 else
436 cfl = opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
437 end if
438
439 if (.not. neko_device_mpi) then
440 call mpi_allreduce(mpi_in_place, cfl, 1, &
441 mpi_real_precision, mpi_max, neko_comm, ierr)
442 end if
443
444 end function cfl
445
446 !! Compute the CFL number for compressible flows
447 !! @param dt The timestep.
448 !! @param max_wave_speed The precomputed maximum wave speed field.
449 !! @param Xh The SEM function space.
450 !! @param coef The SEM coefficients.
451 !! @param nelv The total number of elements.
452 !! @param gdim Number of geometric dimensions.
453 function cfl_compressible(dt, max_wave_speed, Xh, coef, nelv, gdim)
454 type(space_t), intent(in) :: xh
455 type(coef_t), intent(in) :: coef
456 integer, intent(in) :: nelv, gdim
457 real(kind=rp), intent(in) :: dt
458 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv), intent(in) :: max_wave_speed
459 real(kind=rp) :: cfl_compressible
460
461 cfl_compressible = cfl(dt, max_wave_speed, max_wave_speed, max_wave_speed, &
462 xh, coef, nelv, gdim)
463
464 end function cfl_compressible
465
478 subroutine strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
479 type(field_t), intent(in) :: u, v, w
480 type(coef_t), intent(in) :: coef
481 real(kind=rp), intent(inout) :: s11(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
482 real(kind=rp), intent(inout) :: s22(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
483 real(kind=rp), intent(inout) :: s33(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
484 real(kind=rp), intent(inout) :: s12(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
485 real(kind=rp), intent(inout) :: s13(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
486 real(kind=rp), intent(inout) :: s23(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
487
488 type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d
489
490 integer :: nelv, lxyz
491
492 if (neko_bcknd_device .eq. 1) then
493 s11_d = device_get_ptr(s11)
494 s22_d = device_get_ptr(s22)
495 s33_d = device_get_ptr(s33)
496 s12_d = device_get_ptr(s12)
497 s23_d = device_get_ptr(s23)
498 s13_d = device_get_ptr(s13)
499 end if
500
501 nelv = u%msh%nelv
502 lxyz = u%Xh%lxyz
503
504 ! we use s11 as a work array here
505 call dudxyz (s12, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
506 call dudxyz (s11, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
507 if (neko_bcknd_device .eq. 1) then
508 call device_add2(s12_d, s11_d, nelv*lxyz)
509 else
510 call add2(s12, s11, nelv*lxyz)
511 end if
512
513 call dudxyz (s13, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
514 call dudxyz (s11, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
515 if (neko_bcknd_device .eq. 1) then
516 call device_add2(s13_d, s11_d, nelv*lxyz)
517 else
518 call add2(s13, s11, nelv*lxyz)
519 end if
520
521 call dudxyz (s23, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
522 call dudxyz (s11, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
523 if (neko_bcknd_device .eq. 1) then
524 call device_add2(s23_d, s11_d, nelv*lxyz)
525 else
526 call add2(s23, s11, nelv*lxyz)
527 end if
528
529 call dudxyz (s11, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
530 call dudxyz (s22, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
531 call dudxyz (s33, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
532
533 if (neko_bcknd_device .eq. 1) then
534 call device_cmult(s12_d, 0.5_rp, nelv*lxyz)
535 call device_cmult(s13_d, 0.5_rp, nelv*lxyz)
536 call device_cmult(s23_d, 0.5_rp, nelv*lxyz)
537 else
538 call cmult(s12, 0.5_rp, nelv*lxyz)
539 call cmult(s13, 0.5_rp, nelv*lxyz)
540 call cmult(s23, 0.5_rp, nelv*lxyz)
541 end if
542
543 end subroutine strain_rate
544
551 subroutine lambda2op(lambda2, u, v, w, coef)
552 type(coef_t), intent(in) :: coef
553 type(field_t), intent(inout) :: lambda2
554 type(field_t), intent(in) :: u, v, w
556 if (neko_bcknd_sx .eq. 1) then
557 call opr_sx_lambda2(lambda2, u, v, w, coef)
558 else if (neko_bcknd_device .eq. 1) then
559 call opr_device_lambda2(lambda2, u, v, w, coef)
560 else
561 call opr_cpu_lambda2(lambda2, u, v, w, coef)
562 end if
563
564 end subroutine lambda2op
565
576 subroutine set_convect_rst(cr, cs, ct, cx, cy, cz, Xh, coef)
577 type(space_t), intent(inout) :: xh
578 type(coef_t), intent(inout) :: coef
579 type(field_t), intent(inout) :: cr, cs, ct
580 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
581 intent(in) :: cx, cy, cz
582 type(c_ptr) :: cx_d, cy_d, cz_d
583
584 if (neko_bcknd_sx .eq. 1) then
585 call opr_sx_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
586 else if (neko_bcknd_xsmm .eq. 1) then
587 call opr_xsmm_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
588 else if (neko_bcknd_device .eq. 1) then
589 cx_d = device_get_ptr(cx)
590 cy_d = device_get_ptr(cy)
591 cz_d = device_get_ptr(cz)
592 call opr_device_set_convect_rst(cr%x_d, cs%x_d, ct%x_d, &
593 cx_d, cy_d, cz_d, xh, coef)
594 else
595 call opr_cpu_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
596 end if
597
598 end subroutine set_convect_rst
599
615 subroutine runge_kutta(phi, conv_k1, conv_k23, conv_k4, Xh_GLL, Xh_GL, &
616 coef, coef_GL, GLL_to_GL, tau, dtau, n, nel, n_GL)
617 type(space_t), intent(in) :: xh_gll
618 type(space_t), intent(inout) :: xh_gl
619 type(coef_t), intent(in) :: coef
620 type(coef_t), intent(inout) :: coef_gl
621 type(interpolator_t) :: gll_to_gl
622 real(kind=rp), intent(inout) :: tau, dtau
623 integer, intent(in) :: n, nel, n_gl
624 type(field_t), intent(inout) :: phi
625 type(field_list_t) :: conv_k1, conv_k23, conv_k4
626 real(kind=rp) :: c1, c2, c3
627 type(field_t), pointer :: u1, k1, k2, k3, k4
628 type(vector_t), pointer :: u1_gl
629 integer :: ind(6), i, e
630
631 call neko_scratch_registry%request_field(u1, ind(1), .false.)
632 call neko_scratch_registry%request_field(k1, ind(2), .false.)
633 call neko_scratch_registry%request_field(k2, ind(3), .false.)
634 call neko_scratch_registry%request_field(k3, ind(4), .false.)
635 call neko_scratch_registry%request_field(k4, ind(5), .false.)
636 call neko_scratch_registry%request_vector(u1_gl, ind(6), n_gl, .false.)
637
638 c1 = 1.0_rp
639 c2 = -dtau/2.
640 c3 = -dtau
641
642 if (neko_bcknd_device .eq. 1) then
643
644 ! Stage 1:
645 call device_invcol3(u1%x_d, phi%x_d, coef%B_d, n)
646 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
647 call convect_scalar(k1%x, u1_gl%x, conv_k1%items(1)%ptr, &
648 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
649 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
650 call device_col2(k1%x_d, coef%B_d, n)
651
652 ! Stage 2:
653 call device_add3s2(u1%x_d, phi%x_d, k1%x_d, c1, c2, n)
654 call device_invcol2(u1%x_d, coef%B_d, n)
655 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
656 call convect_scalar(k2%x, u1_gl%x, conv_k23%items(1)%ptr, &
657 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
658 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
659 call device_col2(k2%x_d, coef%B_d, n)
660
661 ! Stage 3:
662 call device_add3s2(u1%x_d, phi%x_d, k2%x_d, c1, c2, n)
663 call device_invcol2(u1%x_d, coef%B_d, n)
664 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
665 call convect_scalar(k3%x, u1_gl%x, conv_k23%items(1)%ptr, &
666 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
667 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
668 call device_col2(k3%x_d, coef%B_d, n)
669
670 ! Stage 4:
671 call device_add3s2(u1%x_d, phi%x_d, k3%x_d, c1, c3, n)
672 call device_invcol2(u1%x_d, coef%B_d, n)
673 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
674 call convect_scalar(k4%x, u1_gl%x, conv_k4%items(1)%ptr, &
675 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
676 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
677 call device_col2(k4%x_d, coef%B_d, n)
678
679 c1 = -dtau/6.
680 c2 = -dtau/3.
681
682 call device_add5s4(phi%x_d, k1%x_d, k2%x_d, k3%x_d, k4%x_d, &
683 c1, c2, c2, c1, n)
684
685 else
686
687 ! Stage 1:
688 call invcol3(u1%x, phi%x, coef%B, n)
689 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
690 call convect_scalar(k1%x, u1_gl%x, conv_k1%items(1)%ptr, &
691 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
692 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
693 call col2(k1%x, coef%B, n)
694
695 ! Stage 2:
696 call add3s2(u1%x, phi%x, k1%x, c1, c2, n)
697 call invcol2(u1%x, coef%B, n)
698 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
699 call convect_scalar(k2%x, u1_gl%x, conv_k23%items(1)%ptr, &
700 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
701 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
702 call col2(k2%x, coef%B, n)
703
704 ! Stage 3:
705 call add3s2(u1%x, phi%x, k2%x, c1, c2, n)
706 call invcol2(u1%x, coef%B, n)
707 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
708 call convect_scalar(k3%x, u1_gl%x, conv_k23%items(1)%ptr, &
709 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
710 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
711 call col2(k3%x, coef%B, n)
712
713 ! Stage 4:
714 call add3s2(u1%x, phi%x, k3%x, c1, c3, n)
715 call invcol2(u1%x, coef%B, n)
716 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
717 call convect_scalar(k4%x, u1_gl%x, conv_k4%items(1)%ptr, &
718 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
719 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
720 call col2(k4%x, coef%B, n)
721
722 c1 = -dtau/6.
723 c2 = -dtau/3.
724 call add5s4(phi%x, k1%x, k2%x, k3%x, k4%x, c1, c2, c2, c1, n)
725 end if
726
727 call neko_scratch_registry%relinquish_field(ind)
728
729 end subroutine runge_kutta
730
731 subroutine rotate_cyc_r1(vx, vy, vz, idir, coef)
732 real(kind=rp), dimension(:), intent(inout) :: vx, vy, vz
733 integer, intent(in) :: idir
734 type(coef_t), intent(in) :: coef
735 if (coef%cyclic) then
736 if (neko_bcknd_device .eq. 1) then
737 call opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
738 else
739 call opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
740 end if
741 end if
742 end subroutine rotate_cyc_r1
743
744 subroutine rotate_cyc_r4(vx, vy, vz, idir, coef)
745 real(kind=rp), dimension(:,:,:,:), intent(inout) :: vx, vy, vz
746 integer, intent(in) :: idir
747 type(coef_t), intent(in) :: coef
748 if (coef%cyclic) then
749 if (neko_bcknd_device .eq. 1) then
750 call opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
751 else
752 call opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
753 end if
754 end if
755 end subroutine rotate_cyc_r4
756
757
758end module operators
759
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
subroutine, public field_rzero(a, n)
Zero a real vector.
Defines a field.
Definition field.f90:34
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:411
subroutine, public invcol2(a, b, n)
Vector division .
Definition math.f90:840
subroutine, public cadd(a, s, n)
Add a scalar to vector .
Definition math.f90:462
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:499
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public invcol3(a, b, c, n)
Invert a vector .
Definition math.f90:626
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:895
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:854
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
Returns .
Definition math.f90:926
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_xsmm
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Transforms the convecting velocity field to the rst form of the GL space.
subroutine, public ortho(x, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
real(kind=rp) function, public cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Apply the convecting velocity c to the to the scalar field u, used in the OIFS scheme.
subroutine rotate_cyc_r4(vx, vy, vz, idir, coef)
subroutine, public div(res, ux, uy, uz, coef)
Compute the divergence of a vector field.
real(kind=rp) function, public cfl_compressible(dt, max_wave_speed, xh, coef, nelv, gdim)
subroutine, public conv1(du, u, vx, vy, vz, xh, coef, es, ee)
Compute the advection term.
subroutine, public grad(ux, uy, uz, u, coef)
Compute the gradient of a scalar field.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine, public strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine, public lambda2op(lambda2, u, v, w, coef)
Compute the Lambda2 field for a given velocity field.
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine, public dudxyz(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
Definition operators.f90:90
subroutine, public runge_kutta(phi, conv_k1, conv_k23, conv_k4, xh_gll, xh_gl, coef, coef_gl, gll_to_gl, tau, dtau, n, nel, n_gl)
Compute one step of Runge Kutta time interpolation for OIFS scheme.
Operators CPU backend.
Definition opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:128
subroutine, public opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:314
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:181
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:241
subroutine, public opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:342
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Operators SX-Aurora backend.
Definition opr_sx.f90:2
subroutine, public opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_sx.f90:102
Operators libxsmm backend.
Definition opr_xsmm.F90:61
subroutine, public opr_xsmm_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Definition opr_xsmm.F90:371
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
Definition opr_xsmm.F90:297
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
Definition opr_xsmm.F90:242
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_xsmm.F90:418
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
Definition opr_xsmm.F90:92
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
Definition opr_xsmm.F90:146
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Definition opr_xsmm.F90:469
Defines a registry for storing and requesting temporary objects This can be used when you have a func...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
Defines a vector.
Definition vector.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
field_list_t, To be able to group fields together
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63