Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
operators.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
37 use num_types, only : rp, i8
38 use opr_cpu, only : opr_cpu_cfl, opr_cpu_curl, opr_cpu_opgrad, &
39 opr_cpu_conv1, opr_cpu_convect_scalar, opr_cpu_cdtp, &
40 opr_cpu_dudxyz, opr_cpu_lambda2, opr_cpu_set_convect_rst, &
42 use opr_sx, only : opr_sx_cfl, opr_sx_curl, opr_sx_opgrad, &
43 opr_sx_conv1, opr_sx_convect_scalar, opr_sx_cdtp, &
44 opr_sx_dudxyz, opr_sx_lambda2, opr_sx_set_convect_rst
52 use space, only : space_t
53 use coefs, only : coef_t
54 use field, only : field_t
55 use field_list, only : field_list_t
56 use field_math, only : field_rzero
58 use math, only : glsum, cmult, add2, add3s2, cadd, copy, col2, invcol2, &
65 use vector, only : vector_t
67 use mpi_f08, only : mpi_allreduce, mpi_in_place, mpi_max, mpi_sum
68 use, intrinsic :: iso_c_binding, only : c_ptr
69 use logger, only : neko_log
70 implicit none
71 private
72
76
78 interface dudxyz
79 module procedure dudxyz_r4
80 module procedure opr_device_dudxyz
81 module procedure dudxyz_f
82 end interface dudxyz
83
85 interface div
86 module procedure div_r4
87 module procedure div_d
88 end interface div
89
91 interface grad
92 module procedure grad_r4
93 module procedure grad_d
94 end interface grad
95
97 interface cfl
98 module procedure cfl_r4
99 module procedure cfl_d
100 module procedure cfl_f
101 end interface cfl
102
105 module procedure cfl_compressible_r4
106 module procedure cfl_compressible_d
107 module procedure cfl_compressible_f
108 end interface cfl_compressible
109
111 interface rotate_cyc
112 module procedure rotate_cyc_r1
113 module procedure rotate_cyc_r4
114 module procedure rotate_cyc_d
115 module procedure rotate_cyc_f
116 end interface rotate_cyc
117
119 interface strain_rate
120 module procedure strain_rate_r4
121 module procedure strain_rate_d
122 module procedure strain_rate_f
123 end interface strain_rate
124contains
125
133 subroutine dudxyz_r4(du, u, dr, ds, dt, coef)
134 real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: du
135 real(kind=rp), contiguous, dimension(:,:,:,:), intent(in) :: u, dr, ds, dt
136 type(coef_t), intent(in) :: coef
137 type(c_ptr) :: du_d, u_d, dr_d, ds_d, dt_d
138
139 if (neko_bcknd_sx .eq. 1) then
140 call opr_sx_dudxyz(du, u, dr, ds, dt, coef)
141 else if (neko_bcknd_xsmm .eq. 1) then
142 call opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
143 else if (neko_bcknd_device .eq. 1) then
144 call neko_log%deprecated('Operator: dudxyz, implicit device', &
145 '2.0.0', 'Please call opr_device_dudxyz instead.')
146
147 du_d = device_get_ptr(du)
148 u_d = device_get_ptr(u)
149 dr_d = device_get_ptr(dr)
150 ds_d = device_get_ptr(ds)
151 dt_d = device_get_ptr(dt)
152
153 call opr_device_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, coef)
154 else
155 call opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
156 end if
157
158 end subroutine dudxyz_r4
159
167 subroutine dudxyz_f(du, u, dr, ds, dt, coef)
168 type(field_t), intent(inout) :: du
169 type(field_t), intent(in) :: u, dr, ds, dt
170 type(coef_t), intent(in) :: coef
171
172 if (neko_bcknd_sx .eq. 1) then
173 call opr_sx_dudxyz(du%x, u%x, dr%x, ds%x, dt%x, coef)
174 else if (neko_bcknd_xsmm .eq. 1) then
175 call opr_xsmm_dudxyz(du%x, u%x, dr%x, ds%x, dt%x, coef)
176 else if (neko_bcknd_device .eq. 1) then
177 call opr_device_dudxyz(du%x_d, u%x_d, dr%x_d, ds%x_d, dt%x_d, coef)
178 else
179 call opr_cpu_dudxyz(du%x, u%x, dr%x, ds%x, dt%x, coef)
180 end if
181
182 end subroutine dudxyz_f
183
190 subroutine div_r4(res, ux, uy, uz, coef)
191 real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: res
192 real(kind=rp), contiguous, dimension(:,:,:,:), intent(in) :: ux, uy, uz
193 type(coef_t), intent(in), target :: coef
194 type(field_t), pointer :: work
195 integer :: ind
196 type(c_ptr) :: res_d
197
198 if (neko_bcknd_device .eq. 1) then
199 call neko_log%deprecated('Operator: div, implicit device', &
200 '2.0.0', 'Please call div_d instead.')
201 res_d = device_get_ptr(res)
202 end if
203
204 call neko_scratch_registry%request_field(work, ind, .false.)
205
206 ! Get dux / dx
207 call dudxyz(res, ux, coef%drdx, coef%dsdx, coef%dtdx, coef)
208
209 ! Get duy / dy
210 call dudxyz(work%x, uy, coef%drdy, coef%dsdy, coef%dtdy, coef)
211 if (neko_bcknd_device .eq. 1) then
212 call device_add2(res_d, work%x_d, work%size())
213 else
214 call add2(res, work%x, work%size())
215 end if
216
217 ! Get dux / dz
218 call dudxyz(work%x, uz, coef%drdz, coef%dsdz, coef%dtdz, coef)
219 if (neko_bcknd_device .eq. 1) then
220 call device_add2(res_d, work%x_d, work%size())
221 else
222 call add2(res, work%x, work%size())
223 end if
224
225 call neko_scratch_registry%relinquish_field(ind)
226
227 end subroutine div_r4
228
235 subroutine div_d(res_d, ux_d, uy_d, uz_d, coef)
236 type(c_ptr), intent(inout) :: res_d
237 type(c_ptr), intent(in) :: ux_d, uy_d, uz_d
238 type(coef_t), intent(in), target :: coef
239 type(field_t), pointer :: work
240 integer :: ind
241
242 call neko_scratch_registry%request_field(work, ind, .false.)
243
244 ! Get dux / dx
245 call dudxyz(res_d, ux_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
246
247 ! Get duy / dy
248 call dudxyz(work%x_d, uy_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
249 call device_add2(res_d, work%x_d, work%size())
250
251 ! Get duz / dz
252 call dudxyz(work%x_d, uz_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
253 call device_add2(res_d, work%x_d, work%size())
254
255 call neko_scratch_registry%relinquish_field(ind)
256
257 end subroutine div_d
258
265 subroutine grad_r4(ux, uy, uz, u, coef)
266 type(coef_t), intent(in) :: coef
267 real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: ux
268 real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: uy
269 real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: uz
270 real(kind=rp), contiguous, dimension(:,:,:,:), intent(in) :: u
271
272 call dudxyz(ux, u, coef%drdx, coef%dsdx, coef%dtdx, coef)
273 call dudxyz(uy, u, coef%drdy, coef%dsdy, coef%dtdy, coef)
274 call dudxyz(uz, u, coef%drdz, coef%dsdz, coef%dtdz, coef)
275
276 end subroutine grad_r4
277
284 subroutine grad_d(ux_d, uy_d, uz_d, u_d, coef)
285 type(coef_t), intent(in) :: coef
286 type(c_ptr), intent(inout) :: ux_d, uy_d, uz_d
287 type(c_ptr), intent(in) :: u_d
288
289 call dudxyz(ux_d, u_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
290 call dudxyz(uy_d, u_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
291 call dudxyz(uz_d, u_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
292
293 end subroutine grad_d
294
307 subroutine opgrad(ux, uy, uz, u, coef, es, ee)
308 type(coef_t), intent(in) :: coef
309 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: ux
310 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uy
311 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: uz
312 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: u
313 integer, optional :: es, ee
314 integer :: eblk_start, eblk_end
315 type(c_ptr) :: ux_d, uy_d, uz_d, u_d
316
317 if (present(es)) then
318 eblk_start = es
319 else
320 eblk_start = 1
321 end if
322
323 if (present(ee)) then
324 eblk_end = ee
325 else
326 eblk_end = coef%msh%nelv
327 end if
328
329 if (neko_bcknd_sx .eq. 1) then
330 call opr_sx_opgrad(ux, uy, uz, u, coef)
331 else if (neko_bcknd_xsmm .eq. 1) then
332 call opr_xsmm_opgrad(ux, uy, uz, u, coef)
333 else if (neko_bcknd_device .eq. 1) then
334 ux_d = device_get_ptr(ux)
335 uy_d = device_get_ptr(uy)
336 uz_d = device_get_ptr(uz)
337 u_d = device_get_ptr(u)
338 call opr_device_opgrad(ux_d, uy_d, uz_d, u_d, coef)
339 else
340 call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end)
341 end if
342
343 end subroutine opgrad
344
349 subroutine ortho(x, glb_n_points, n)
350 integer, intent(in) :: n
351 integer(kind=i8), intent(in) :: glb_n_points
352 real(kind=rp), dimension(n), intent(inout) :: x
353 real(kind=rp) :: c
354 type(c_ptr) :: x_d
355
356 if (neko_bcknd_device .eq. 1) then
357 call neko_log%deprecated('Operator: ortho, implicit device', &
358 '2.0.0', 'Please call device_ortho instead.')
359
360 x_d = device_get_ptr(x)
361 c = device_glsum(x_d, n) / glb_n_points
362 call device_cadd(x_d, -c, n)
363 else
364 c = glsum(x, n) / glb_n_points
365 call cadd(x, -c, n)
366 end if
367
368 end subroutine ortho
369
381 subroutine cdtp(dtx, x, dr, ds, dt, coef, es, ee)
382 type(coef_t), intent(in) :: coef
383 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: dtx
384 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(inout) :: x
385 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dr
386 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: ds
387 real(kind=rp), dimension(coef%Xh%lxyz, coef%msh%nelv), intent(in) :: dt
388 integer, optional :: es, ee
389 integer :: eblk_start, eblk_end
390 type(c_ptr) :: dtx_d, x_d, dr_d, ds_d, dt_d
391
392 if (present(es)) then
393 eblk_start = es
394 else
395 eblk_start = 1
396 end if
397
398 if (present(ee)) then
399 eblk_end = ee
400 else
401 eblk_end = coef%msh%nelv
402 end if
403
404 if (neko_bcknd_sx .eq. 1) then
405 call opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
406 else if (neko_bcknd_xsmm .eq. 1) then
407 call opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
408 else if (neko_bcknd_device .eq. 1) then
409 dtx_d = device_get_ptr(dtx)
410 x_d = device_get_ptr(x)
411 dr_d = device_get_ptr(dr)
412 ds_d = device_get_ptr(ds)
413 dt_d = device_get_ptr(dt)
414 call opr_device_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, coef)
415 else
416 call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef, eblk_start, eblk_end)
417 end if
418
419 end subroutine cdtp
420
431 subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
432 type(space_t), intent(in) :: xh
433 type(coef_t), intent(in) :: coef
434 real(kind=rp), intent(inout) :: du(xh%lxyz, coef%msh%nelv)
435 real(kind=rp), intent(in) :: u(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
436 real(kind=rp), intent(in) :: vx(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
437 real(kind=rp), intent(in) :: vy(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
438 real(kind=rp), intent(in) :: vz(xh%lx, xh%ly, xh%lz, coef%msh%nelv)
439 integer, optional :: es, ee
440 integer :: eblk_end, eblk_start
441 type(c_ptr) :: du_d, u_d, vx_d, vy_d, vz_d
442
443 associate(nelv => coef%msh%nelv, gdim => coef%msh%gdim)
444 if (present(es)) then
445 eblk_start = es
446 else
447 eblk_start = 1
448 end if
449
450 if (present(ee)) then
451 eblk_end = ee
452 else
453 eblk_end = coef%msh%nelv
454 end if
455
456 if (neko_bcknd_sx .eq. 1) then
457 call opr_sx_conv1(du, u, vx, vy, vz, xh, coef, nelv)
458 else if (neko_bcknd_xsmm .eq. 1) then
459 call opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
460 else if (neko_bcknd_device .eq. 1) then
461 du_d = device_get_ptr(du)
462 u_d = device_get_ptr(u)
463 vx_d = device_get_ptr(vx)
464 vy_d = device_get_ptr(vy)
465 vz_d = device_get_ptr(vz)
466 call opr_device_conv1(du_d, u_d, vx_d, vy_d, vz_d, xh, coef, nelv, gdim)
467 else
468 call opr_cpu_conv1(du, u, vx, vy, vz, xh, coef, eblk_start, eblk_end)
469 end if
470 end associate
471
472 end subroutine conv1
473
491 subroutine convect_scalar(du, u, cr, cs, ct, Xh_GLL, Xh_GL, coef_GLL, &
492 coef_GL, GLL_to_GL)
493 type(space_t), intent(in) :: Xh_GL
494 type(space_t), intent(in) :: Xh_GLL
495 type(coef_t), intent(in) :: coef_GLL
496 type(coef_t), intent(in) :: coef_GL
497 type(interpolator_t), intent(inout) :: GLL_to_GL
498 real(kind=rp), intent(inout) :: &
499 du(xh_gll%lx, xh_gll%ly, xh_gll%lz, coef_gl%msh%nelv)
500 real(kind=rp), intent(inout) :: &
501 u(xh_gl%lx, xh_gl%lx, xh_gl%lx, coef_gl%msh%nelv)
502 type(field_t), intent(inout) :: cr, cs, ct
503 type(c_ptr) :: u_d
504
505 if (neko_bcknd_sx .eq. 1) then
506 call opr_sx_convect_scalar(du, u, cr%x, cs%x, ct%x, &
507 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
508 else if (neko_bcknd_xsmm .eq. 1) then
509 call opr_xsmm_convect_scalar(du, u, cr%x, cs%x, ct%x, &
510 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
511 else if (neko_bcknd_device .eq. 1) then
512 u_d = device_get_ptr(u)
513 call opr_device_convect_scalar(du, u_d, cr%x_d, cs%x_d, ct%x_d, &
514 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
515 else
516 call opr_cpu_convect_scalar(du, u, cr%x, cs%x, ct%x, &
517 xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
518 end if
519
520 end subroutine convect_scalar
521
522 !! Compute the curl fo a vector field.
523 !! @param w1 Will store the x component of the curl.
524 !! @param w2 Will store the y component of the curl.
525 !! @param w3 Will store the z component of the curl.
526 !! @param u1 The x component of the vector field.
527 !! @param u2 The y component of the vector field.
528 !! @param u3 The z component of the vector field.
529 !! @param work1 A temporary array for computations.
530 !! @param work2 A temporary array for computations.
531 !! @param coef The SEM coefficients.
532 subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
533 type(field_t), intent(inout) :: w1
534 type(field_t), intent(inout) :: w2
535 type(field_t), intent(inout) :: w3
536 type(field_t), intent(in) :: u1
537 type(field_t), intent(in) :: u2
538 type(field_t), intent(in) :: u3
539 type(field_t), intent(inout) :: work1
540 type(field_t), intent(inout) :: work2
541 type(coef_t), intent(in) :: coef
542 type(c_ptr), optional, intent(inout) :: event
543
544 if (neko_bcknd_sx .eq. 1) then
545 call opr_sx_curl(w1%x, w2%x, w3%x, u1%x, u2%x, u3%x, &
546 work1%x, work2%x, coef)
547 else if (neko_bcknd_xsmm .eq. 1) then
548 call opr_xsmm_curl(w1%x, w2%x, w3%x, u1%x, u2%x, u3%x, &
549 work1%x, work2%x, coef)
550 else if (neko_bcknd_device .eq. 1) then
551 call opr_device_curl(w1, w2, w3, u1, u2, u3, &
552 work1, work2, coef, event)
553 else
554 call opr_cpu_curl(w1%x, w2%x, w3%x, u1%x, u2%x, u3%x, &
555 work1%x, work2%x, coef)
556 end if
557
558 end subroutine curl
559
560 !! Compute the CFL number
561 !! @param dt The timestep.
562 !! @param u The x component of velocity.
563 !! @param v The y component of velocity.
564 !! @param w The z component of velocity.
565 !! @param Xh The SEM function space.
566 !! @param coef The SEM coefficients.
567 !! @param nelv The total number of elements.
568 !! @param gdim Number of geometric dimensions.
569 function cfl_r4(dt, u, v, w, Xh, coef, nelv, gdim)
570 real(kind=rp), intent(in) :: dt
571 real(kind=rp), contiguous, dimension(:,:,:,:), intent(in) :: u, v, w
572 type(space_t), intent(in) :: xh
573 type(coef_t), intent(in) :: coef
574 integer, intent(in) :: nelv, gdim
575 real(kind=rp) :: cfl_r4
576 integer :: ierr
577 type(c_ptr) :: u_d, v_d, w_d
578
579 if (neko_bcknd_sx .eq. 1) then
580 cfl_r4 = opr_sx_cfl(dt, u, v, w, xh, coef, nelv)
581 else if (neko_bcknd_device .eq. 1) then
582 call neko_log%deprecated('Operator: cfl_r4, implicit device', &
583 '2.0.0', 'Please call cfl_d instead.')
584
585 u_d = device_get_ptr(u)
586 v_d = device_get_ptr(v)
587 w_d = device_get_ptr(w)
588
589 cfl_r4 = opr_device_cfl(dt, u_d, v_d, w_d, xh, coef, nelv, gdim)
590 else
591 cfl_r4 = opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
592 end if
593
594 if (.not. neko_device_mpi) then
595 call mpi_allreduce(mpi_in_place, cfl_r4, 1, &
596 mpi_real_precision, mpi_max, neko_comm, ierr)
597 end if
598
599 end function cfl_r4
600
601 function cfl_d(dt, u_d, v_d, w_d, Xh, coef, nelv, gdim)
602 real(kind=rp), intent(in) :: dt
603 type(c_ptr), intent(in) :: u_d, v_d, w_d
604 type(space_t), intent(in) :: xh
605 type(coef_t), intent(in) :: coef
606 integer, intent(in) :: nelv, gdim
607 real(kind=rp) :: cfl_d
608 integer :: ierr
609
610 cfl_d = opr_device_cfl(dt, u_d, v_d, w_d, xh, coef, nelv, gdim)
611
612 if (.not. neko_device_mpi) then
613 call mpi_allreduce(mpi_in_place, cfl_d, 1, &
614 mpi_real_precision, mpi_max, neko_comm, ierr)
615 end if
616
617 end function cfl_d
618
619 function cfl_f(dt, u, v, w, Xh, coef, nelv, gdim)
620 real(kind=rp), intent(in) :: dt
621 type(field_t), intent(in) :: u, v, w
622 type(space_t), intent(in) :: xh
623 type(coef_t), intent(in) :: coef
624 integer, intent(in) :: nelv, gdim
625 real(kind=rp) :: cfl_f
626 integer :: ierr
627
628 if (neko_bcknd_sx .eq. 1) then
629 cfl_f = opr_sx_cfl(dt, u%x, v%x, w%x, xh, coef, nelv)
630 else if (neko_bcknd_device .eq. 1) then
631 cfl_f = opr_device_cfl(dt, u%x_d, v%x_d, w%x_d, xh, coef, nelv, gdim)
632 else
633 cfl_f = opr_cpu_cfl(dt, u%x, v%x, w%x, xh, coef, nelv, gdim)
634 end if
635
636 if (.not. neko_device_mpi) then
637 call mpi_allreduce(mpi_in_place, cfl_f, 1, &
638 mpi_real_precision, mpi_max, neko_comm, ierr)
639 end if
640
641 end function cfl_f
642
643 !! Compute the CFL number for compressible flows
644 !! @param dt The timestep.
645 !! @param max_wave_speed The precomputed maximum wave speed field.
646 !! @param Xh The SEM function space.
647 !! @param coef The SEM coefficients.
648 !! @param nelv The total number of elements.
649 !! @param gdim Number of geometric dimensions.
650 function cfl_compressible_r4(dt, max_wave_speed, Xh, coef, nelv, gdim)
651 real(kind=rp), intent(in) :: dt
652 real(kind=rp), contiguous, dimension(:,:,:,:), intent(in) :: max_wave_speed
653 type(space_t), intent(in) :: xh
654 type(coef_t), intent(in) :: coef
655 integer, intent(in) :: nelv, gdim
656 real(kind=rp) :: cfl_compressible_r4
657
658 cfl_compressible_r4 = cfl(dt, max_wave_speed, max_wave_speed, &
659 max_wave_speed, xh, coef, nelv, gdim)
660
661 end function cfl_compressible_r4
662
663 !! Compute the CFL number for compressible flows
664 !! @param dt The timestep.
665 !! @param max_wave_speed The precomputed maximum wave speed field.
666 !! @param Xh The SEM function space.
667 !! @param coef The SEM coefficients.
668 !! @param nelv The total number of elements.
669 !! @param gdim Number of geometric dimensions.
670 function cfl_compressible_d(dt, max_wave_speed, Xh, coef, nelv, gdim)
671 real(kind=rp), intent(in) :: dt
672 type(c_ptr), intent(in) :: max_wave_speed
673 type(space_t), intent(in) :: xh
674 type(coef_t), intent(in) :: coef
675 integer, intent(in) :: nelv, gdim
676 real(kind=rp) :: cfl_compressible_d
677
678 cfl_compressible_d = cfl(dt, max_wave_speed, max_wave_speed, &
679 max_wave_speed, xh, coef, nelv, gdim)
680
681 end function cfl_compressible_d
682
683 !! Compute the CFL number for compressible flows
684 !! @param dt The timestep.
685 !! @param max_wave_speed The precomputed maximum wave speed field.
686 !! @param Xh The SEM function space.
687 !! @param coef The SEM coefficients.
688 !! @param nelv The total number of elements.
689 !! @param gdim Number of geometric dimensions.
690 function cfl_compressible_f(dt, max_wave_speed, Xh, coef, nelv, gdim)
691 real(kind=rp), intent(in) :: dt
692 type(field_t), intent(in) :: max_wave_speed
693 type(space_t), intent(in) :: xh
694 type(coef_t), intent(in) :: coef
695 integer, intent(in) :: nelv, gdim
696 real(kind=rp) :: cfl_compressible_f
697
698 cfl_compressible_f = cfl(dt, max_wave_speed, max_wave_speed, &
699 max_wave_speed, xh, coef, nelv, gdim)
700
701 end function cfl_compressible_f
702
715 subroutine strain_rate_r4(s11, s22, s33, s12, s13, s23, u, v, w, coef)
716 real(kind=rp), contiguous, intent(inout) :: s11(:,:,:,:)
717 real(kind=rp), contiguous, intent(inout) :: s22(:,:,:,:)
718 real(kind=rp), contiguous, intent(inout) :: s33(:,:,:,:)
719 real(kind=rp), contiguous, intent(inout) :: s12(:,:,:,:)
720 real(kind=rp), contiguous, intent(inout) :: s13(:,:,:,:)
721 real(kind=rp), contiguous, intent(inout) :: s23(:,:,:,:)
722 real(kind=rp), contiguous, intent(in) :: u(:,:,:,:), v(:,:,:,:), w(:,:,:,:)
723 type(coef_t), intent(in) :: coef
724
725 type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d, u_d, v_d, w_d
726
727 integer :: nelv, lxyz
728
729 nelv = coef%msh%nelv
730 lxyz = coef%Xh%lxyz
731
732 if (neko_bcknd_device .eq. 1) then
733 call neko_log%deprecated('Operator: strain_rate_r4, implicit device', &
734 '2.0.0', 'Please call strain_rate_d instead.')
735 s11_d = device_get_ptr(s11)
736 s22_d = device_get_ptr(s22)
737 s33_d = device_get_ptr(s33)
738 s12_d = device_get_ptr(s12)
739 s23_d = device_get_ptr(s23)
740 s13_d = device_get_ptr(s13)
741 u_d = device_get_ptr(u)
742 v_d = device_get_ptr(v)
743 w_d = device_get_ptr(w)
744
745 call dudxyz(s12_d, u_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
746 call dudxyz(s11_d, v_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
747 call device_add2(s12_d, s11_d, nelv*lxyz)
748
749 call dudxyz(s13_d, u_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
750 call dudxyz(s11_d, w_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
751 call device_add2(s13_d, s11_d, nelv*lxyz)
752
753 call dudxyz(s23_d, v_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
754 call dudxyz(s11_d, w_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
755 call device_add2(s23_d, s11_d, nelv*lxyz)
756
757 call dudxyz(s11_d, u_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
758 call dudxyz(s22_d, v_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
759 call dudxyz(s33_d, w_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
760 call device_cmult(s12_d, 0.5_rp, nelv*lxyz)
761 call device_cmult(s13_d, 0.5_rp, nelv*lxyz)
762 call device_cmult(s23_d, 0.5_rp, nelv*lxyz)
763 else
764 call dudxyz(s12, u, coef%drdy, coef%dsdy, coef%dtdy, coef)
765 call dudxyz(s11, v, coef%drdx, coef%dsdx, coef%dtdx, coef)
766 call add2(s12, s11, nelv*lxyz)
767
768 call dudxyz(s13, u, coef%drdz, coef%dsdz, coef%dtdz, coef)
769 call dudxyz(s11, w, coef%drdx, coef%dsdx, coef%dtdx, coef)
770 call add2(s13, s11, nelv*lxyz)
771
772 call dudxyz(s23, v, coef%drdz, coef%dsdz, coef%dtdz, coef)
773 call dudxyz(s11, w, coef%drdy, coef%dsdy, coef%dtdy, coef)
774 call add2(s23, s11, nelv*lxyz)
775
776 call dudxyz(s11, u, coef%drdx, coef%dsdx, coef%dtdx, coef)
777 call dudxyz(s22, v, coef%drdy, coef%dsdy, coef%dtdy, coef)
778 call dudxyz(s33, w, coef%drdz, coef%dsdz, coef%dtdz, coef)
779 call cmult(s12, 0.5_rp, nelv*lxyz)
780 call cmult(s13, 0.5_rp, nelv*lxyz)
781 call cmult(s23, 0.5_rp, nelv*lxyz)
782 end if
783
784 end subroutine strain_rate_r4
785
798 subroutine strain_rate_d(s11_d, s22_d, s33_d, s12_d, s13_d, s23_d, &
799 u_d, v_d, w_d, coef)
800 type(c_ptr), intent(inout) :: s11_d, s22_d, s33_d, s12_d, s13_d, s23_d
801 type(c_ptr), intent(in) :: u_d, v_d, w_d
802 type(coef_t), intent(in) :: coef
803
804 integer :: nelv, lxyz
805
806 nelv = coef%msh%nelv
807 lxyz = coef%Xh%lxyz
808
809 call dudxyz(s12_d, u_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
810 call dudxyz(s11_d, v_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
811 call device_add2(s12_d, s11_d, nelv*lxyz)
812
813 call dudxyz(s13_d, u_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
814 call dudxyz(s11_d, w_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
815 call device_add2(s13_d, s11_d, nelv*lxyz)
816
817 call dudxyz(s23_d, v_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
818 call dudxyz(s11_d, w_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
819 call device_add2(s23_d, s11_d, nelv*lxyz)
820
821 call dudxyz(s11_d, u_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
822 call dudxyz(s22_d, v_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
823 call dudxyz(s33_d, w_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
824 call device_cmult(s12_d, 0.5_rp, nelv*lxyz)
825 call device_cmult(s13_d, 0.5_rp, nelv*lxyz)
826 call device_cmult(s23_d, 0.5_rp, nelv*lxyz)
827
828 end subroutine strain_rate_d
829
842 subroutine strain_rate_f(s11, s22, s33, s12, s13, s23, u, v, w, coef)
843 type(field_t), intent(inout) :: s11, s22, s33, s12, s13, s23
844 type(field_t), intent(in) :: u, v, w
845 type(coef_t), intent(in) :: coef
846
847 integer :: n
848 n = coef%Xh%lxyz * coef%msh%nelv
849
850 if (neko_bcknd_device .eq. 1) then
851 call dudxyz(s12%x_d, u%x_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
852 call dudxyz(s11%x_d, v%x_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
853 call device_add2(s12%x_d, s11%x_d, n)
854
855 call dudxyz(s13%x_d, u%x_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
856 call dudxyz(s11%x_d, w%x_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
857 call device_add2(s13%x_d, s11%x_d, n)
858
859 call dudxyz(s23%x_d, v%x_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
860 call dudxyz(s11%x_d, w%x_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
861 call device_add2(s23%x_d, s11%x_d, n)
862
863 call dudxyz(s11%x_d, u%x_d, coef%drdx_d, coef%dsdx_d, coef%dtdx_d, coef)
864 call dudxyz(s22%x_d, v%x_d, coef%drdy_d, coef%dsdy_d, coef%dtdy_d, coef)
865 call dudxyz(s33%x_d, w%x_d, coef%drdz_d, coef%dsdz_d, coef%dtdz_d, coef)
866 call device_cmult(s12%x_d, 0.5_rp, n)
867 call device_cmult(s13%x_d, 0.5_rp, n)
868 call device_cmult(s23%x_d, 0.5_rp, n)
869 else
870 call dudxyz(s12%x, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
871 call dudxyz(s11%x, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
872 call add2(s12%x, s11%x, n)
873
874 call dudxyz(s13%x, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
875 call dudxyz(s11%x, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
876 call add2(s13%x, s11%x, n)
877
878 call dudxyz(s23%x, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
879 call dudxyz(s11%x, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
880 call add2(s23%x, s11%x, n)
881
882 call dudxyz(s11%x, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
883 call dudxyz(s22%x, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
884 call dudxyz(s33%x, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
885 call cmult(s12%x, 0.5_rp, n)
886 call cmult(s13%x, 0.5_rp, n)
887 call cmult(s23%x, 0.5_rp, n)
888 end if
889
890 end subroutine strain_rate_f
891
898 subroutine lambda2op(lambda2, u, v, w, coef)
899 type(coef_t), intent(in) :: coef
900 type(field_t), intent(inout) :: lambda2
901 type(field_t), intent(in) :: u, v, w
902
903 if (neko_bcknd_sx .eq. 1) then
904 call opr_sx_lambda2(lambda2%x, u%x, v%x, w%x, coef)
905 else if (neko_bcknd_device .eq. 1) then
906 call opr_device_lambda2(lambda2%x_d, u%x_d, v%x_d, w%x_d, coef)
907 else
908 call opr_cpu_lambda2(lambda2%x, u%x, v%x, w%x, coef)
909 end if
910
911 end subroutine lambda2op
912
923 subroutine set_convect_rst(cr, cs, ct, cx, cy, cz, Xh, coef)
924 type(space_t), intent(inout) :: xh
925 type(coef_t), intent(inout) :: coef
926 type(field_t), intent(inout) :: cr, cs, ct
927 real(kind=rp), dimension(Xh%lxyz, coef%msh%nelv), &
928 intent(in) :: cx, cy, cz
929 type(c_ptr) :: cx_d, cy_d, cz_d
930
931 if (neko_bcknd_sx .eq. 1) then
932 call opr_sx_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
933 else if (neko_bcknd_xsmm .eq. 1) then
934 call opr_xsmm_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
935 else if (neko_bcknd_device .eq. 1) then
936 cx_d = device_get_ptr(cx)
937 cy_d = device_get_ptr(cy)
938 cz_d = device_get_ptr(cz)
939 call opr_device_set_convect_rst(cr%x_d, cs%x_d, ct%x_d, &
940 cx_d, cy_d, cz_d, xh, coef)
941 else
942 call opr_cpu_set_convect_rst(cr%x, cs%x, ct%x, cx, cy, cz, xh, coef)
943 end if
944
945 end subroutine set_convect_rst
946
962 subroutine runge_kutta(phi, conv_k1, conv_k23, conv_k4, Xh_GLL, Xh_GL, &
963 coef, coef_GL, GLL_to_GL, tau, dtau, n, nel, n_GL)
964 type(space_t), intent(in) :: xh_gll
965 type(space_t), intent(inout) :: xh_gl
966 type(coef_t), intent(in) :: coef
967 type(coef_t), intent(inout) :: coef_gl
968 type(interpolator_t) :: gll_to_gl
969 real(kind=rp), intent(inout) :: tau, dtau
970 integer, intent(in) :: n, nel, n_gl
971 type(field_t), intent(inout) :: phi
972 type(field_list_t) :: conv_k1, conv_k23, conv_k4
973 real(kind=rp) :: c1, c2, c3
974 type(field_t), pointer :: u1, k1, k2, k3, k4
975 type(vector_t), pointer :: u1_gl
976 integer :: ind(6), i, e
977
978 call neko_scratch_registry%request_field(u1, ind(1), .false.)
979 call neko_scratch_registry%request_field(k1, ind(2), .false.)
980 call neko_scratch_registry%request_field(k2, ind(3), .false.)
981 call neko_scratch_registry%request_field(k3, ind(4), .false.)
982 call neko_scratch_registry%request_field(k4, ind(5), .false.)
983 call neko_scratch_registry%request_vector(u1_gl, ind(6), n_gl, .false.)
984
985 c1 = 1.0_rp
986 c2 = -dtau / 2.0_rp
987 c3 = -dtau
988
989 if (neko_bcknd_device .eq. 1) then
990
991 ! Stage 1:
992 call device_invcol3(u1%x_d, phi%x_d, coef%B_d, n)
993 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
994 call convect_scalar(k1%x, u1_gl%x, conv_k1%items(1)%ptr, &
995 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
996 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
997 call device_col2(k1%x_d, coef%B_d, n)
998
999 ! Stage 2:
1000 call device_add3s2(u1%x_d, phi%x_d, k1%x_d, c1, c2, n)
1001 call device_invcol2(u1%x_d, coef%B_d, n)
1002 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1003 call convect_scalar(k2%x, u1_gl%x, conv_k23%items(1)%ptr, &
1004 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
1005 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1006 call device_col2(k2%x_d, coef%B_d, n)
1007
1008 ! Stage 3:
1009 call device_add3s2(u1%x_d, phi%x_d, k2%x_d, c1, c2, n)
1010 call device_invcol2(u1%x_d, coef%B_d, n)
1011 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1012 call convect_scalar(k3%x, u1_gl%x, conv_k23%items(1)%ptr, &
1013 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
1014 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1015 call device_col2(k3%x_d, coef%B_d, n)
1016
1017 ! Stage 4:
1018 call device_add3s2(u1%x_d, phi%x_d, k3%x_d, c1, c3, n)
1019 call device_invcol2(u1%x_d, coef%B_d, n)
1020 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1021 call convect_scalar(k4%x, u1_gl%x, conv_k4%items(1)%ptr, &
1022 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
1023 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1024 call device_col2(k4%x_d, coef%B_d, n)
1025
1026 c1 = -dtau / 6.0_rp
1027 c2 = -dtau / 3.0_rp
1028
1029 call device_add5s4(phi%x_d, k1%x_d, k2%x_d, k3%x_d, k4%x_d, &
1030 c1, c2, c2, c1, n)
1031
1032 else
1033
1034 ! Stage 1:
1035 call invcol3(u1%x, phi%x, coef%B, n)
1036 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1037 call convect_scalar(k1%x, u1_gl%x, conv_k1%items(1)%ptr, &
1038 conv_k1%items(2)%ptr, conv_k1%items(3)%ptr, &
1039 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1040 call col2(k1%x, coef%B, n)
1041
1042 ! Stage 2:
1043 call add3s2(u1%x, phi%x, k1%x, c1, c2, n)
1044 call invcol2(u1%x, coef%B, n)
1045 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1046 call convect_scalar(k2%x, u1_gl%x, conv_k23%items(1)%ptr, &
1047 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
1048 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1049 call col2(k2%x, coef%B, n)
1050
1051 ! Stage 3:
1052 call add3s2(u1%x, phi%x, k2%x, c1, c2, n)
1053 call invcol2(u1%x, coef%B, n)
1054 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1055 call convect_scalar(k3%x, u1_gl%x, conv_k23%items(1)%ptr, &
1056 conv_k23%items(2)%ptr, conv_k23%items(3)%ptr, &
1057 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1058 call col2(k3%x, coef%B, n)
1059
1060 ! Stage 4:
1061 call add3s2(u1%x, phi%x, k3%x, c1, c3, n)
1062 call invcol2(u1%x, coef%B, n)
1063 call gll_to_gl%map(u1_gl%x, u1%x, nel, xh_gl)
1064 call convect_scalar(k4%x, u1_gl%x, conv_k4%items(1)%ptr, &
1065 conv_k4%items(2)%ptr, conv_k4%items(3)%ptr, &
1066 xh_gll, xh_gl, coef, coef_gl, gll_to_gl)
1067 call col2(k4%x, coef%B, n)
1068
1069 c1 = -dtau / 6.0_rp
1070 c2 = -dtau / 3.0_rp
1071 call add5s4(phi%x, k1%x, k2%x, k3%x, k4%x, c1, c2, c2, c1, n)
1072 end if
1073
1074 call neko_scratch_registry%relinquish(ind)
1075
1076 end subroutine runge_kutta
1077
1078 subroutine rotate_cyc_r1(vx, vy, vz, idir, coef)
1079 real(kind=rp), contiguous, dimension(:), intent(inout) :: vx, vy, vz
1080 integer, intent(in) :: idir
1081 type(coef_t), intent(in) :: coef
1082 type(c_ptr) :: vx_d, vy_d, vz_d
1083
1084 if (coef%cyclic .and. coef%cyc_msk(0) .gt. 1) then
1085 if (neko_bcknd_device .eq. 1) then
1086 call neko_log%deprecated('Operator: rotate_cyc_r1, implicit device', &
1087 '2.0.0', 'Please call rotate_cyc_d instead.')
1088
1089 vx_d = device_get_ptr(vx)
1090 vy_d = device_get_ptr(vy)
1091 vz_d = device_get_ptr(vz)
1092 call opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
1093 else
1094 call opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
1095 end if
1096 end if
1097 end subroutine rotate_cyc_r1
1098
1099 subroutine rotate_cyc_r4(vx, vy, vz, idir, coef)
1100 real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: vx, vy, vz
1101 integer, intent(in) :: idir
1102 type(coef_t), intent(in) :: coef
1103 type(c_ptr) :: vx_d, vy_d, vz_d
1104
1105 if (coef%cyclic .and. coef%cyc_msk(0) .gt. 1) then
1106 if (neko_bcknd_device .eq. 1) then
1107 call neko_log%deprecated('Operator: rotate_cyc_r4, implicit device', &
1108 '2.0.0', 'Please call rotate_cyc_d instead.')
1109
1110 vx_d = device_get_ptr(vx)
1111 vy_d = device_get_ptr(vy)
1112 vz_d = device_get_ptr(vz)
1113 call opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
1114 else
1115 call opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
1116 end if
1117 end if
1118 end subroutine rotate_cyc_r4
1119
1120 subroutine rotate_cyc_d(vx_d, vy_d, vz_d, idir, coef)
1121 type(c_ptr), intent(inout) :: vx_d, vy_d, vz_d
1122 integer, intent(in) :: idir
1123 type(coef_t), intent(in) :: coef
1124
1125 if (coef%cyclic .and. coef%cyc_msk(0) .gt. 1) then
1126 call opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
1127 end if
1128 end subroutine rotate_cyc_d
1129
1130 subroutine rotate_cyc_f(vx, vy, vz, idir, coef)
1131 type(field_t), intent(inout) :: vx, vy, vz
1132 integer, intent(in) :: idir
1133 type(coef_t), intent(in) :: coef
1134
1135 if (coef%cyclic .and. coef%cyc_msk(0) .gt. 1) then
1136 if (neko_bcknd_device .eq. 1) then
1137 call opr_device_rotate_cyc(vx%x_d, vy%x_d, vz%x_d, idir, coef)
1138 else
1139 call opr_cpu_rotate_cyc_r4(vx%x, vy%x, vz%x, idir, coef)
1140 end if
1141 end if
1142 end subroutine rotate_cyc_f
1143
1144end module operators
Return the device pointer for an associated Fortran array.
Definition device.F90:107
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Compute CFL condition for compressible flow.
Compute CFL condition.
Definition operators.f90:97
Compute the divergence of a vector field.
Definition operators.f90:85
Compute derivative of a scalar field along a single direction.
Definition operators.f90:78
Compute the gradient of a scalar field, multiplied by the mass matrix.
Definition operators.f90:91
Apply cyclic boundary condition to a vector field.
Compute the strain rate tensor of a vector field.
Coefficients.
Definition coef.f90:34
Definition comm.F90:1
type(mpi_datatype), public mpi_real_precision
MPI type for working precision of REAL types.
Definition comm.F90:52
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:44
subroutine, public device_add2(a_d, b_d, n, strm)
Vector addition .
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_add3s2(a_d, b_d, c_d, c1, c2, n, strm)
Returns .
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_invcol3(a_d, b_d, c_d, n, strm)
Vector division .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_add5s4(a_d, b_d, c_d, d_d, e_d, c1, c2, c3, c4, n, strm)
Returns .
subroutine, public device_invcol2(a_d, b_d, n, strm)
Vector division .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:225
subroutine, public field_rzero(a, n)
Zero a real vector.
Defines a field.
Definition field.f90:34
Routines to interpolate between different spaces.
A simulation component that computes lambda2 The values are stored in the field registry under the na...
Definition lambda2.f90:37
Logging routines.
Definition log.f90:34
type(log_t), public neko_log
Global log stream.
Definition log.f90:80
Definition math.f90:60
subroutine, public cmult(a, c, n)
Multiplication by constant c .
Definition math.f90:502
subroutine, public invcol2(a, b, n)
Vector division .
Definition math.f90:1028
subroutine, public cadd(a, s, n)
Add a scalar to vector .
Definition math.f90:564
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:627
subroutine, public add2(a, b, n)
Vector addition .
Definition math.f90:898
subroutine, public invcol3(a, b, c, n)
Invert a vector .
Definition math.f90:783
subroutine, public add3s2(a, b, c, c1, c2, n)
Returns .
Definition math.f90:1091
subroutine, public col2(a, b, n)
Vector multiplication .
Definition math.f90:1044
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:289
subroutine, public add5s4(a, b, c, d, e, c1, c2, c3, c4, n)
Returns .
Definition math.f90:1126
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:233
Build configurations.
integer, parameter neko_bcknd_sx
integer, parameter neko_bcknd_device
logical, parameter neko_device_mpi
integer, parameter neko_bcknd_xsmm
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Transforms the convecting velocity field to the rst form of the GL space.
subroutine, public ortho(x, glb_n_points, n)
Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
subroutine strain_rate_d(s11_d, s22_d, s33_d, s12_d, s13_d, s23_d, u_d, v_d, w_d, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine dudxyz_r4(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
real(kind=rp) function cfl_compressible_r4(dt, max_wave_speed, xh, coef, nelv, gdim)
subroutine, public opgrad(ux, uy, uz, u, coef, es, ee)
Compute the weak gradient of a scalar field, i.e. the gradient multiplied by the mass matrix.
subroutine div_r4(res, ux, uy, uz, coef)
Compute the divergence of a vector field.
subroutine rotate_cyc_r1(vx, vy, vz, idir, coef)
subroutine grad_r4(ux, uy, uz, u, coef)
Compute the gradient of a scalar field.
subroutine strain_rate_r4(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Apply the convecting velocity c to the to the scalar field u, used in the OIFS scheme.
subroutine rotate_cyc_r4(vx, vy, vz, idir, coef)
real(kind=rp) function cfl_compressible_d(dt, max_wave_speed, xh, coef, nelv, gdim)
subroutine grad_d(ux_d, uy_d, uz_d, u_d, coef)
Compute the gradient of a scalar field.
real(kind=rp) function cfl_compressible_f(dt, max_wave_speed, xh, coef, nelv, gdim)
real(kind=rp) function cfl_f(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public conv1(du, u, vx, vy, vz, xh, coef, es, ee)
Compute the advection term.
subroutine div_d(res_d, ux_d, uy_d, uz_d, coef)
Compute the divergence of a vector field.
real(kind=rp) function cfl_r4(dt, u, v, w, xh, coef, nelv, gdim)
subroutine, public curl(w1, w2, w3, u1, u2, u3, work1, work2, coef, event)
subroutine strain_rate_f(s11, s22, s33, s12, s13, s23, u, v, w, coef)
Compute the strain rate tensor, i.e 0.5 * du_i/dx_j + du_j/dx_i.
subroutine, public lambda2op(lambda2, u, v, w, coef)
Compute the Lambda2 field for a given velocity field.
subroutine rotate_cyc_d(vx_d, vy_d, vz_d, idir, coef)
subroutine dudxyz_f(du, u, dr, ds, dt, coef)
Compute derivative of a scalar field along a single direction.
subroutine, public cdtp(dtx, x, dr, ds, dt, coef, es, ee)
Apply D^T to a scalar field, where D is the derivative matrix.
subroutine rotate_cyc_f(vx, vy, vz, idir, coef)
subroutine, public runge_kutta(phi, conv_k1, conv_k23, conv_k4, xh_gll, xh_gl, coef, coef_gl, gll_to_gl, tau, dtau, n, nel, n_gl)
Compute one step of Runge Kutta time interpolation for OIFS scheme.
Operators CPU backend.
Definition opr_cpu.f90:34
subroutine, public opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_cpu.f90:127
subroutine, public opr_cpu_rotate_cyc_r1(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:326
real(kind=rp) function, public opr_cpu_cfl(dt, u, v, w, xh, coef, nelv, gdim)
Definition opr_cpu.f90:185
subroutine, public opr_cpu_lambda2(lambda2, u, v, w, coef)
Definition opr_cpu.f90:245
subroutine, public opr_cpu_rotate_cyc_r4(vx, vy, vz, idir, coef)
Definition opr_cpu.f90:353
Operators accelerator backends.
subroutine, public opr_device_convect_scalar(du, u_d, cr_d, cs_d, ct_d, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
subroutine, public opr_device_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, coef)
subroutine, public opr_device_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, coef)
real(kind=rp) function, public opr_device_cfl(dt, u_d, v_d, w_d, xh, coef, nelv, gdim)
subroutine, public opr_device_conv1(du_d, u_d, vx_d, vy_d, vz_d, xh, coef, nelv, gdim)
subroutine, public opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh, event)
subroutine, public opr_device_set_convect_rst(cr_d, cs_d, ct_d, cx_d, cy_d, cz_d, xh, coef)
subroutine, public opr_device_lambda2(lambda2_d, u_d, v_d, w_d, coef)
subroutine, public opr_device_rotate_cyc(vx_d, vy_d, vz_d, idir, coef)
subroutine, public opr_device_opgrad(ux_d, uy_d, uz_d, u_d, coef)
Operators SX-Aurora backend.
Definition opr_sx.f90:2
subroutine, public opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_sx.f90:103
Operators libxsmm backend.
Definition opr_xsmm.F90:61
subroutine, public opr_xsmm_convect_scalar(du, u, cr, cs, ct, xh_gll, xh_gl, coef_gll, coef_gl, gll_to_gl)
Definition opr_xsmm.F90:370
subroutine, public opr_xsmm_conv1(du, u, vx, vy, vz, xh, coef, nelv, gdim)
Definition opr_xsmm.F90:296
subroutine, public opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
Definition opr_xsmm.F90:241
subroutine, public opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_xh)
Definition opr_xsmm.F90:417
subroutine, public opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
Definition opr_xsmm.F90:91
subroutine, public opr_xsmm_opgrad(ux, uy, uz, u, coef)
Definition opr_xsmm.F90:145
subroutine, public opr_xsmm_set_convect_rst(cr, cs, ct, cx, cy, cz, xh, coef)
Definition opr_xsmm.F90:476
Defines a registry for storing and requesting temporary objects This can be used when you have a func...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Defines a function space.
Definition space.f90:34
Defines a vector.
Definition vector.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
field_list_t, To be able to group fields together
Interpolation between two space::space_t.
The function space for the SEM solution fields.
Definition space.f90:63