Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
operators.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
37 use num_types, only : rp, i8
38 use opr_cpu, only : opr_cpu_cfl, opr_cpu_curl, opr_cpu_opgrad, &
39 opr_cpu_conv1, opr_cpu_convect_scalar, opr_cpu_cdtp, &
40 opr_cpu_dudxyz, opr_cpu_lambda2, opr_cpu_set_convect_rst, &
42 use opr_sx, only : opr_sx_cfl, opr_sx_curl, opr_sx_opgrad, &
43 opr_sx_conv1, opr_sx_convect_scalar, opr_sx_cdtp, &
44 opr_sx_dudxyz, opr_sx_lambda2, opr_sx_set_convect_rst
52 use space, only : space_t
53 use coefs, only : coef_t
54 use field, only : field_t
55 use field_list, only : field_list_t
56 use field_math, only : field_rzero
58 use math, only : glsum, cmult, add2, add3s2, cadd, copy, col2, invcol2, &
65 use vector, only : vector_t
67 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_max, mpi_sum
68 use, intrinsic :: iso_c_binding, only : c_ptr
69 use logger, only : neko_log
70 implicit none
71 private
72
76
77 interface rotate_cyc
78 module procedure rotate_cyc_r1
79 module procedure rotate_cyc_r4
80 end interface rotate_cyc
81
82contains
83
91 subroutine dudxyz (du, u, dr, ds, dt, coef)
92 type(coef_t), intent(in), target :: coef
93 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: du
94 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: u, dr, ds, dt
95
96 if (neko_bcknd_sx .eq. 1) then
97 call opr_sx_dudxyz(du, u, dr, ds, dt, coef)
98 else if (neko_bcknd_xsmm .eq. 1) then
99 call opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
100 else if (neko_bcknd_device .eq. 1) then
101 call opr_device_dudxyz(du, u, dr, ds, dt, coef)
102 else
103 call opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
104 end if
105
106 end subroutine dudxyz
107
114 subroutine div(res, ux, uy, uz, coef)
115 type(coef_t), intent(in), target :: coef
116 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(inout) :: res
117 real(kind=rp), dimension(coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & coef%msh%nelv), intent(in) :: ux, uy, uz
118 type(field_t), pointer :: work
119 integer :: ind
120 type(c_ptr) :: res_d
121
122 if (neko_bcknd_device .eq. 1) then
123 res_d = device_get_ptr(res)
124 end if
125
126 call neko_scratch_registry%request_field(work, ind, .false.)
127
128 ! Get dux / dx
129 call dudxyz(res, ux, coef%drdx, coef%dsdx, coef%dtdx, coef)
130
131 ! Get duy / dy
132 call dudxyz(work%x, uy, coef%drdy, coef%dsdy, coef%dtdy, coef)
133 if (neko_bcknd_device .eq. 1) then
134 call device_add2(res_d, work%x_d, work%size())
135 else
136 call add2(res, work%x, work%size())
137 end if
138
139 ! Get dux / dz
140 call dudxyz(work%x, uz, coef%drdz, coef%dsdz, coef%dtdz, coef)
141 if (neko_bcknd_device .eq. 1) then
142 call device_add2(res_d, work%x_d, work%size())
143 else
144 call add2(res, work%x, work%size())
145 end if
146
147 call neko_scratch_registry%relinquish_field(ind)
148
149 end subroutine div
150
157 subroutine grad(ux, uy, uz, u, coef)
158 type(coef_t), intent(in) :: coef
159 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
160 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
161 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
162 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
163
164 call dudxyz(ux, u, coef%drdx, coef%dsdx, coef%dtdx, coef)
165 call dudxyz(uy, u, coef%drdy, coef%dsdy, coef%dtdy, coef)
166 call dudxyz(uz, u, coef%drdz, coef%dsdz, coef%dtdz, coef)
167
168 end subroutine grad
169
182 subroutine opgrad(ux, uy, uz, u, coef, es, ee)
183 type(coef_t), intent(in) :: coef
184 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
185 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
186 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
187 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
188 integer, optional :: es, ee
189 integer :: eblk_start, eblk_end
190
191 if (present(es)) then
192 eblk_start = es
193 else
194 eblk_start = 1
195 end if
196
197 if (present(ee)) then
198 eblk_end = ee
199 else
200 eblk_end = coef%msh%nelv
201 end if
202
203 if (neko_bcknd_sx .eq. 1) then
204 call opr_sx_opgrad(ux, uy, uz, u, coef)
205 else if (neko_bcknd_xsmm .eq. 1) then
206 call opr_xsmm_opgrad(ux, uy, uz, u, coef)
207 else if (neko_bcknd_device .eq. 1) then
208 call opr_device_opgrad(ux, uy, uz, u, coef)
209 else
210 call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end)
211 end if
212
213 end subroutine opgrad
214
219 subroutine ortho(x, glb_n_points, n)
220 integer, intent(in) :: n
221 integer(kind=i8), intent(in) :: glb_n_points
222 real(kind=rp), dimension(n), intent(inout) :: x
223 real(kind=rp) :: c
224 type(c_ptr) :: x_d
225
226 if (neko_bcknd_device .eq. 1) then
227 call neko_log%deprecated('Operator: ortho, implicit device', &
228 'v2.0.0', 'Please call device_ortho instead.')
229
230 x_d = device_get_ptr(x)
231 c = device_glsum(x_d, n) / glb_n_points
232 call device_cadd(x_d, -c, n)
233 else
234 c = glsum(x, n) / glb_n_points
235 call cadd(x, -c, n)
236 end if
237
238 end subroutine ortho
239
251 subroutine cdtp (dtx, x, dr, ds, dt, coef, es, ee)
252 type(coef_t), intent(in) :: coef
253 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
254 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
255 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
256 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
257 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
258 integer, optional :: es, ee
259 integer :: eblk_start, eblk_end
260
261 if (present(es)) then
262 eblk_start = es
263 else
264 eblk_start = 1
265 end if
266
267 if (present(ee)) then
268 eblk_end = ee
269 else
270 eblk_end = coef%msh%nelv
271 end if
272
273 if (neko_bcknd_sx .eq. 1) then
274 call opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
275 else if (neko_bcknd_xsmm .eq. 1) then
276 call opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
277 else if (neko_bcknd_device .eq. 1) then
278 call opr_device_cdtp(dtx, x, dr, ds, dt, coef)
279 else
280 call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, eblk_start, eblk_end)
281 end if
282
283 end subroutine cdtp
284
295 subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
296 type(space_t), intent(in) :: xh
297 type(coef_t), intent(in) :: coef
298 real(kind=rp), intent(inout) :: du(xh%lxyz, coef%msh%nelv)
299 real(kind=rp), intent(inout) :: u(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
300 real(kind=rp), intent(inout) :: vx(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
301 real(kind=rp), intent(inout) :: vy(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
302 real(kind=rp), intent(inout) :: vz(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
303 integer, optional :: es, ee
304 integer :: eblk_end, eblk_start
305
306 associate(nelv => coef%msh%nelv, gdim => coef%msh%gdim)
307 if (present(es)) then
308 eblk_start = es
309 else
310 eblk_start = 1
311 end if
312
313 if (present(ee)) then
314 eblk_end = ee
315 else
316 eblk_end = coef%msh%nelv
317 end if
318
319 if (neko_bcknd_sx .eq. 1) then
320 call opr_sx_conv1(du, u, vx, vy, vz, xh, coef, nelv)
321 else if (neko_bcknd_xsmm .eq. 1) then
322 call opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
323 else if (neko_bcknd_device .eq. 1) then
324 call opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
325 else
326 call opr_cpu_conv1(du, u, vx, vy, vz, xh, coef, eblk_start, eblk_end)
327 end if
328 end associate
329
330 end subroutine conv1
331
349 subroutine convect_scalar(du, u, cr, cs, ct, Xh_GLL, Xh_GL, coef_GLL, &
350 coef_GL, GLL_to_GL)
351 type(space_t), intent(in) :: xh_gl
352 type(space_t), intent(in) :: xh_gll
353 type(coef_t), intent(in) :: coef_GLL
354 type(coef_t), intent(in) :: coef_GL
355 type(interpolator_t), intent(inout) :: GLL_to_GL
356 real(kind=rp), intent(inout) :: &
357 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
358 real(kind=rp), intent(inout) :: &
359 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
360 type(field_t), intent(inout) :: cr, cs, ct
361 type(c_ptr) :: u_d
362
363 if (neko_bcknd_sx .eq. 1) then
364 call opr_sx_convect_scalar(du, u, cr%x, cs%x, ct%x, &
365 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
366 else if (neko_bcknd_xsmm .eq. 1) then
367 call opr_xsmm_convect_scalar(du, u, cr%x, cs%x, ct%x, &
368 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
369 else if (neko_bcknd_device .eq. 1) then
370 u_d = device_get_ptr(u)
371 call opr_device_convect_scalar(du, u_d, cr%x_d, cs%x_d, ct%x_d, &
372 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
373 else
374 call opr_cpu_convect_scalar(du, u, cr%x, cs%x, ct%x, &
375 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
376 end if
377
378 end subroutine convect_scalar
379
380 !! Compute the curl fo a vector field.
381 !! @param w1 Will store the x component of the curl.
382 !! @param w2 Will store the y component of the curl.
383 !! @param w3 Will store the z component of the curl.
384 !! @param u1 The x component of the vector field.
385 !! @param u2 The y component of the vector field.
386 !! @param u3 The z component of the vector field.
387 !! @param work1 A temporary array for computations.
388 !! @param work2 A temporary array for computations.
389 !! @param coef The SEM coefficients.
390 subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
391 type(field_t), intent(inout) :: w1
392 type(field_t), intent(inout) :: w2
393 type(field_t), intent(inout) :: w3
394 type(field_t), intent(in) :: u1
395 type(field_t), intent(in) :: u2
396 type(field_t), intent(in) :: u3
397 type(field_t), intent(inout) :: work1
398 type(field_t), intent(inout) :: work2
399 type(coef_t), intent(in) :: coef
400 type(c_ptr), optional, intent(inout) :: event
401
402 if (neko_bcknd_sx .eq. 1) then
403 call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
404 else if (neko_bcknd_xsmm .eq. 1) then
405 call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
406 else if (neko_bcknd_device .eq. 1) then
407 if (present(event)) then
408 call opr_device_curl(w1, w2, w3, u1, u2, u3, &
409 work1, work2, coef, event)
410 else
411 call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
412 end if
413 else
414 call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
415 end if
416
417 end subroutine curl
418
419 !! Compute the CFL number
420 !! @param dt The timestep.
421 !! @param u The x component of velocity.
422 !! @param v The y component of velocity.
423 !! @param w The z component of velocity.
424 !! @param Xh The SEM function space.
425 !! @param coef The SEM coefficients.
426 !! @param nelv The total number of elements.
427 !! @param gdim Number of geometric dimensions.
428 function cfl(dt, u, v, w, Xh, coef, nelv, gdim)
429 type(space_t), intent(in) :: xh
430 type(coef_t), intent(in) :: coef
431 integer, intent(in) :: nelv, gdim
432 real(kind=rp), intent(in) :: dt
433 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv), intent(in) :: u, v, w
434 real(kind=rp) :: cfl
435 integer :: ierr
436
437 if (neko_bcknd_sx .eq. 1) then
438 cfl = opr_sx_cfl(dt, u, v, w, xh, coef, nelv)
439 else if (neko_bcknd_device .eq. 1) then
440 cfl = opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
441 else
442 cfl = opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
443 end if
444
445 if (.not. neko_device_mpi) then
446 call mpi_allreduce(mpi_in_place, cfl, 1, &
447 mpi_real_precision, mpi_max, neko_comm, ierr)
448 end if
449
450 end function cfl
451
452 !! Compute the CFL number for compressible flows
453 !! @param dt The timestep.
454 !! @param max_wave_speed The precomputed maximum wave speed field.
455 !! @param Xh The SEM function space.
456 !! @param coef The SEM coefficients.
457 !! @param nelv The total number of elements.
458 !! @param gdim Number of geometric dimensions.
459 function cfl_compressible(dt, max_wave_speed, Xh, coef, nelv, gdim)
460 type(space_t), intent(in) :: xh
461 type(coef_t), intent(in) :: coef
462 integer, intent(in) :: nelv, gdim
463 real(kind=rp), intent(in) :: dt
464 real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz, nelv), intent(in) :: &
465 max_wave_speed
466 real(kind=rp) :: cfl_compressible
467
468 cfl_compressible = cfl(dt, max_wave_speed, max_wave_speed, max_wave_speed, &
469 xh, coef, nelv, gdim)
470
471 end function cfl_compressible
472
485 subroutine strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
486 type(field_t), intent(in) :: u, v, w
487 type(coef_t), intent(in) :: coef
488 real(kind=rp), intent(inout) :: s11(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
489 real(kind=rp), intent(inout) :: s22(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
490 real(kind=rp), intent(inout) :: s33(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
491 real(kind=rp), intent(inout) :: s12(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
492 real(kind=rp), intent(inout) :: s13(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
493 real(kind=rp), intent(inout) :: s23(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
494
495 type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d
496
497 integer :: nelv, lxyz
498
499 if (neko_bcknd_device .eq. 1) then
500 s11_d = device_get_ptr(s11)
501 s22_d = device_get_ptr(s22)
502 s33_d = device_get_ptr(s33)
503 s12_d = device_get_ptr(s12)
504 s23_d = device_get_ptr(s23)
505 s13_d = device_get_ptr(s13)
506 end if
507
508 nelv = u%msh%nelv
509 lxyz = u%Xh%lxyz
510
511 ! we use s11 as a work array here
512 call dudxyz (s12, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
513 call dudxyz (s11, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
514 if (neko_bcknd_device .eq. 1) then
515 call device_add2(s12_d, s11_d, nelv*lxyz)
516 else
517 call add2(s12, s11, nelv*lxyz)
518 end if
519
520 call dudxyz (s13, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
521 call dudxyz (s11, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
522 if (neko_bcknd_device .eq. 1) then
523 call device_add2(s13_d, s11_d, nelv*lxyz)
524 else
525 call add2(s13, s11, nelv*lxyz)
526 end if
527
528 call dudxyz (s23, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
529 call dudxyz (s11, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
530 if (neko_bcknd_device .eq. 1) then
531 call device_add2(s23_d, s11_d, nelv*lxyz)
532 else
533 call add2(s23, s11, nelv*lxyz)
534 end if
535
536 call dudxyz (s11, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
537 call dudxyz (s22, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
538 call dudxyz (s33, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
539
540 if (neko_bcknd_device .eq. 1) then
541 call device_cmult(s12_d, 0.5_rp, nelv*lxyz)
542 call device_cmult(s13_d, 0.5_rp, nelv*lxyz)
543 call device_cmult(s23_d, 0.5_rp, nelv*lxyz)
544 else
545 call cmult(s12, 0.5_rp, nelv*lxyz)
546 call cmult(s13, 0.5_rp, nelv*lxyz)
547 call cmult(s23, 0.5_rp, nelv*lxyz)
548 end if
549
550 end subroutine strain_rate
551
558 subroutine lambda2op(lambda2, u, v, w, coef)
559 type(coef_t), intent(in) :: coef
560 type(field_t), intent(inout) :: lambda2
561 type(field_t), intent(in) :: u, v, w
563 if (neko_bcknd_sx .eq. 1) then
564 call opr_sx_lambda2(lambda2, u, v, w, coef)
565 else if (neko_bcknd_device .eq. 1) then
566 call opr_device_lambda2(lambda2, u, v, w, coef)
567 else
568 call opr_cpu_lambda2(lambda2, u, v, w, coef)
569 end if
570
571 end subroutine lambda2op
572
583 subroutine set_convect_rst(cr, cs, ct, cx, cy, cz, Xh, coef)
584 type(space_t), intent(inout) :: xh
585 type(coef_t), intent(inout) :: coef
586 type(field_t), intent(inout) :: cr, cs, ct
587 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
588 intent(in) :: cx, cy, cz
589 type(c_ptr) :: cx_d, cy_d, cz_d
590
591 if (neko_bcknd_sx .eq. 1) then
592 call opr_sx_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
593 else if (neko_bcknd_xsmm .eq. 1) then
594 call opr_xsmm_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
595 else if (neko_bcknd_device .eq. 1) then
596 cx_d = device_get_ptr(cx)
597 cy_d = device_get_ptr(cy)
598 cz_d = device_get_ptr(cz)
599 call opr_device_set_convect_rst(cr%x_d, cs%x_d, ct%x_d, &
600 cx_d, cy_d, cz_d, xh, coef)
601 else
602 call opr_cpu_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
603 end if
604
605 end subroutine set_convect_rst
606
622 subroutine runge_kutta(phi, conv_k1, conv_k23, conv_k4, Xh_GLL, Xh_GL, &
623 coef, coef_GL, GLL_to_GL, tau, dtau, n, nel, n_GL)
624 type(space_t), intent(in) :: xh_gll
625 type(space_t), intent(inout) :: xh_gl
626 type(coef_t), intent(in) :: coef
627 type(coef_t), intent(inout) :: coef_gl
628 type(interpolator_t) :: gll_to_gl
629 real(kind=rp), intent(inout) :: tau, dtau
630 integer, intent(in) :: n, nel, n_gl
631 type(field_t), intent(inout) :: phi
632 type(field_list_t) :: conv_k1, conv_k23, conv_k4
633 real(kind=rp) :: c1, c2, c3
634 type(field_t), pointer :: u1, k1, k2, k3, k4
635 type(vector_t), pointer :: u1_gl
636 integer :: ind(6), i, e
637
638 call neko_scratch_registry%request_field(u1, ind(1), .false.)
639 call neko_scratch_registry%request_field(k1, ind(2), .false.)
640 call neko_scratch_registry%request_field(k2, ind(3), .false.)
641 call neko_scratch_registry%request_field(k3, ind(4), .false.)
642 call neko_scratch_registry%request_field(k4, ind(5), .false.)
643 call neko_scratch_registry%request_vector(u1_gl, ind(6), n_gl, .false.)
644
645 c1 = 1.0_rp
646 c2 = -dtau/2.
647 c3 = -dtau
648
649 if (neko_bcknd_device .eq. 1) then
650
651 ! Stage 1:
652 call device_invcol3(u1%x_d, phi%x_d, coef%B_d, n)
653 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
654 call convect_scalar(k1%x, u1_gl%x, conv_k1%items(1)%ptr, &
655 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
656 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
657 call device_col2(k1%x_d, coef%B_d, n)
658
659 ! Stage 2:
660 call device_add3s2(u1%x_d, phi%x_d, k1%x_d, c1, c2, n)
661 call device_invcol2(u1%x_d, coef%B_d, n)
662 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
663 call convect_scalar(k2%x, u1_gl%x, conv_k23%items(1)%ptr, &
664 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
665 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
666 call device_col2(k2%x_d, coef%B_d, n)
667
668 ! Stage 3:
669 call device_add3s2(u1%x_d, phi%x_d, k2%x_d, c1, c2, n)
670 call device_invcol2(u1%x_d, coef%B_d, n)
671 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
672 call convect_scalar(k3%x, u1_gl%x, conv_k23%items(1)%ptr, &
673 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
674 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
675 call device_col2(k3%x_d, coef%B_d, n)
676
677 ! Stage 4:
678 call device_add3s2(u1%x_d, phi%x_d, k3%x_d, c1, c3, n)
679 call device_invcol2(u1%x_d, coef%B_d, n)
680 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
681 call convect_scalar(k4%x, u1_gl%x, conv_k4%items(1)%ptr, &
682 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
683 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
684 call device_col2(k4%x_d, coef%B_d, n)
685
686 c1 = -dtau/6.
687 c2 = -dtau/3.
688
689 call device_add5s4(phi%x_d, k1%x_d, k2%x_d, k3%x_d, k4%x_d, &
690 c1, c2, c2, c1, n)
691
692 else
693
694 ! Stage 1:
695 call invcol3(u1%x, phi%x, coef%B, n)
696 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
697 call convect_scalar(k1%x, u1_gl%x, conv_k1%items(1)%ptr, &
698 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
699 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
700 call col2(k1%x, coef%B, n)
701
702 ! Stage 2:
703 call add3s2(u1%x, phi%x, k1%x, c1, c2, n)
704 call invcol2(u1%x, coef%B, n)
705 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
706 call convect_scalar(k2%x, u1_gl%x, conv_k23%items(1)%ptr, &
707 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
708 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
709 call col2(k2%x, coef%B, n)
710
711 ! Stage 3:
712 call add3s2(u1%x, phi%x, k2%x, c1, c2, n)
713 call invcol2(u1%x, coef%B, n)
714 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
715 call convect_scalar(k3%x, u1_gl%x, conv_k23%items(1)%ptr, &
716 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
717 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
718 call col2(k3%x, coef%B, n)
719
720 ! Stage 4:
721 call add3s2(u1%x, phi%x, k3%x, c1, c3, n)
722 call invcol2(u1%x, coef%B, n)
723 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
724 call convect_scalar(k4%x, u1_gl%x, conv_k4%items(1)%ptr, &
725 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
726 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
727 call col2(k4%x, coef%B, n)
728
729 c1 = -dtau/6.
730 c2 = -dtau/3.
731 call add5s4(phi%x, k1%x, k2%x, k3%x, k4%x, c1, c2, c2, c1, n)
732 end if
733
734 call neko_scratch_registry%relinquish_field(ind)
735
736 end subroutine runge_kutta
737
738 subroutine rotate_cyc_r1(vx, vy, vz, idir, coef)
739 real(kind=rp), dimension(:), intent(inout) :: vx, vy, vz
740 integer, intent(in) :: idir
741 type(coef_t), intent(in) :: coef
742 if (coef%cyclic) then
743 if (neko_bcknd_device .eq. 1) then
744 call opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
745 else
746 call opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
747 end if
748 end if
749 end subroutine rotate_cyc_r1
750
751 subroutine rotate_cyc_r4(vx, vy, vz, idir, coef)
752 real(kind=rp), dimension(:,:,:,:), intent(inout) :: vx, vy, vz
753 integer, intent(in) :: idir
754 type(coef_t), intent(in) :: coef
755 if (coef%cyclic) then
756 if (neko_bcknd_device .eq. 1) then
757 call opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
758 else
759 call opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
760 end if
761 end if
762 end subroutine rotate_cyc_r4
763
764
765end module operators
766
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:51
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
subroutine, public field_rzero(a, n)
Zero a real vector.
Defines a field.
Definition field.f90:34
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:77
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:411
subroutine, public invcol2(a, b, n)
Vector division .
Definition math.f90:840
subroutine, public cadd(a, s, n)
Add a scalar to vector .
Definition math.f90:462
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:499
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:726
subroutine, public invcol3(a, b, c, n)
Invert a vector .
Definition math.f90:626
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:895
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:854
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
subroutine, public add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
Returns .
Definition math.f90:926
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_xsmm
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Transforms the convecting velocity field to the rst form of the GL space.
subroutine, public ortho(x, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
real(kind=rp) function, public cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Apply the convecting velocity c to the to the scalar field u, used in the OIFS scheme.
subroutine rotate_cyc_r4(vx, vy, vz, idir, coef)
subroutine, public div(res, ux, uy, uz, coef)
Compute the divergence of a vector field.
real(kind=rp) function, public cfl_compressible(dt, max_wave_speed, xh, coef, nelv, gdim)
subroutine, public conv1(du, u, vx, vy, vz, xh, coef, es, ee)
Compute the advection term.
subroutine, public grad(ux, uy, uz, u, coef)
Compute the gradient of a scalar field.
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine, public strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine, public lambda2op(lambda2, u, v, w, coef)
Compute the Lambda2 field for a given velocity field.
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine, public dudxyz(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
Definition operators.f90:92
subroutine, public runge_kutta(phi, conv_k1, conv_k23, conv_k4, xh_gll, xh_gl, coef, coef_gl, gll_to_gl, tau, dtau, n, nel, n_gl)
Compute one step of Runge Kutta time interpolation for OIFS scheme.
Operators CPU backend.
Definition opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:128
subroutine, public opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:313
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:178
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:238
subroutine, public opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:340
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine, public opr_device_cdtp(dtx, x, dr, ds, dt, coef)
subroutine, public opr_device_rotate_cyc_r4(vx, vy, vz, idir, coef)
real(kind=rp) function, public opr_device_cfl(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_dudxyz(du, u, dr, ds, dt, coef)
subroutine, public opr_device_opgrad(ux, uy, uz, u, coef)
subroutine, public opr_device_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
subroutine, public opr_device_lambda2(lambda2, u, v, w, coef)
Operators SX-Aurora backend.
Definition opr_sx.f90:2
subroutine, public opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_sx.f90:102
Operators libxsmm backend.
Definition opr_xsmm.F90:61
subroutine, public opr_xsmm_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Definition opr_xsmm.F90:371
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
Definition opr_xsmm.F90:297
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
Definition opr_xsmm.F90:242
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_xsmm.F90:418
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
Definition opr_xsmm.F90:92
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
Definition opr_xsmm.F90:146
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Definition opr_xsmm.F90:469
Defines a registry for storing and requesting temporary objects This can be used when you have a func...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
Defines a vector.
Definition vector.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
field_list_t, To be able to group fields together
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63