Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
euler_res_device.F90
Go to the documentation of this file.
1! Copyright (c) 2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use euler_residual, only : euler_rhs_t
35 use field, only : field_t
36 use ax_product, only : ax_t
37 use coefs, only : coef_t
38 use gather_scatter, only : gs_t, gs_op_add
39 use num_types, only : rp, c_rp
41 use utils, only : neko_error
42 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
43 use operators, only : div, rotate_cyc
44 use field_math, only : field_cmult
46 use field_list, only : field_list_t
47 use device_math, only : device_copy, device_rone, &
49
50 type, public, extends(euler_rhs_t) :: euler_res_device_t
51 contains
52 procedure, nopass :: step => advance_primitive_variables_device
53 procedure, nopass :: evaluate_rhs_device
54 end type euler_res_device_t
55
56#ifdef HAVE_HIP
57 interface
58 subroutine euler_res_part_visc_hip(rhs_field_d, Binv_d, field_d, &
59 effective_visc_d, n) &
60 bind(c, name = 'euler_res_part_visc_hip')
61 use, intrinsic :: iso_c_binding
62 import c_rp
63 implicit none
64 type(c_ptr), value :: rhs_field_d, Binv_d, field_d, effective_visc_d
65 integer(c_int) :: n
66 end subroutine euler_res_part_visc_hip
67 end interface
68
69 interface
70 subroutine euler_res_part_mx_flux_hip(f_x, f_y, f_z, &
71 m_x, m_y, m_z, rho_field, p, n) &
72 bind(c, name = 'euler_res_part_mx_flux_hip')
73 use, intrinsic :: iso_c_binding
74 implicit none
75 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
76 integer(c_int) :: n
77 end subroutine euler_res_part_mx_flux_hip
78 end interface
79
80 interface
81 subroutine euler_res_part_my_flux_hip(f_x, f_y, f_z, &
82 m_x, m_y, m_z, rho_field, p, n) &
83 bind(c, name = 'euler_res_part_my_flux_hip')
84 use, intrinsic :: iso_c_binding
85 implicit none
86 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
87 integer(c_int) :: n
88 end subroutine euler_res_part_my_flux_hip
89 end interface
90
91 interface
92 subroutine euler_res_part_mz_flux_hip(f_x, f_y, f_z, &
93 m_x, m_y, m_z, rho_field, p, n) &
94 bind(c, name = 'euler_res_part_mz_flux_hip')
95 use, intrinsic :: iso_c_binding
96 implicit none
97 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
98 integer(c_int) :: n
99 end subroutine euler_res_part_mz_flux_hip
100 end interface
101
102 interface
103 subroutine euler_res_part_e_flux_hip(f_x, f_y, f_z, &
104 m_x, m_y, m_z, rho_field, p, E, n) &
105 bind(c, name = 'euler_res_part_E_flux_hip')
106 use, intrinsic :: iso_c_binding
107 implicit none
108 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p, E
109 integer(c_int) :: n
110 end subroutine euler_res_part_e_flux_hip
111 end interface
112
113 interface
114 subroutine euler_res_part_coef_mult_hip(rhs_rho_field_d, rhs_m_x_d, &
115 rhs_m_y_d, rhs_m_z_d, &
116 rhs_E_d, mult_d, n) &
117 bind(c, name = 'euler_res_part_coef_mult_hip')
118 use, intrinsic :: iso_c_binding
119 implicit none
120 type(c_ptr), value :: rhs_rho_field_d, rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
121 rhs_E_d, mult_d
122 integer(c_int) :: n
123 end subroutine euler_res_part_coef_mult_hip
124 end interface
125
126 interface
127 subroutine euler_res_part_rk_sum_hip(rho, m_x, m_y, m_z, E, &
128 k_rho_i, k_m_x_i, k_m_y_i, &
129 k_m_z_i, k_E_i, &
130 dt, b_i, n) &
131 bind(c, name = 'euler_res_part_rk_sum_hip')
132 use, intrinsic :: iso_c_binding
133 import c_rp
134 implicit none
135 type(c_ptr), value :: rho, m_x, m_y, m_z, E, &
136 k_rho_i, k_m_x_i, k_m_y_i, &
137 k_m_z_i, k_E_i
138 real(c_rp) :: dt, b_i
139 integer(c_int) :: n
140 end subroutine euler_res_part_rk_sum_hip
141 end interface
142#elif HAVE_CUDA
143 interface
144 subroutine euler_res_part_visc_cuda(rhs_field_d, Binv_d, field_d, &
145 effective_visc_d, n) &
146 bind(c, name = 'euler_res_part_visc_cuda')
147 use, intrinsic :: iso_c_binding
148 import c_rp
149 implicit none
150 type(c_ptr), value :: rhs_field_d, Binv_d, field_d, effective_visc_d
151 integer(c_int) :: n
152 end subroutine euler_res_part_visc_cuda
153 end interface
154
155 interface
156 subroutine euler_res_part_mx_flux_cuda(f_x, f_y, f_z, &
157 m_x, m_y, m_z, rho_field, p, n) &
158 bind(c, name = 'euler_res_part_mx_flux_cuda')
159 use, intrinsic :: iso_c_binding
160 implicit none
161 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
162 integer(c_int) :: n
163 end subroutine euler_res_part_mx_flux_cuda
164 end interface
165
166 interface
167 subroutine euler_res_part_my_flux_cuda(f_x, f_y, f_z, &
168 m_x, m_y, m_z, rho_field, p, n) &
169 bind(c, name = 'euler_res_part_my_flux_cuda')
170 use, intrinsic :: iso_c_binding
171 implicit none
172 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
173 integer(c_int) :: n
174 end subroutine euler_res_part_my_flux_cuda
175 end interface
176
177 interface
178 subroutine euler_res_part_mz_flux_cuda(f_x, f_y, f_z, &
179 m_x, m_y, m_z, rho_field, p, n) &
180 bind(c, name = 'euler_res_part_mz_flux_cuda')
181 use, intrinsic :: iso_c_binding
182 implicit none
183 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
184 integer(c_int) :: n
185 end subroutine euler_res_part_mz_flux_cuda
186 end interface
187
188 interface
189 subroutine euler_res_part_e_flux_cuda(f_x, f_y, f_z, &
190 m_x, m_y, m_z, rho_field, p, E, n) &
191 bind(c, name = 'euler_res_part_E_flux_cuda')
192 use, intrinsic :: iso_c_binding
193 implicit none
194 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p, E
195 integer(c_int) :: n
196 end subroutine euler_res_part_e_flux_cuda
197 end interface
198
199 interface
200 subroutine euler_res_part_coef_mult_cuda(rhs_rho_field_d, &
201 rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
202 rhs_E_d, mult_d, n) &
203 bind(c, name = 'euler_res_part_coef_mult_cuda')
204 use, intrinsic :: iso_c_binding
205 implicit none
206 type(c_ptr), value :: rhs_rho_field_d, rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
207 rhs_E_d, mult_d
208 integer(c_int) :: n
209 end subroutine euler_res_part_coef_mult_cuda
210 end interface
211
212 interface
213 subroutine euler_res_part_rk_sum_cuda(rho, m_x, m_y, m_z, E, &
214 k_rho_i, k_m_x_i, k_m_y_i, &
215 k_m_z_i, k_E_i, &
216 dt, c, n) &
217 bind(c, name = 'euler_res_part_rk_sum_cuda')
218 use, intrinsic :: iso_c_binding
219 import c_rp
220 implicit none
221 type(c_ptr), value :: rho, m_x, m_y, m_z, E, &
222 k_rho_i, k_m_x_i, k_m_y_i, &
223 k_m_z_i, k_E_i
224 real(c_rp) :: dt, c
225 integer(c_int) :: n
226 end subroutine euler_res_part_rk_sum_cuda
227 end interface
228#elif HAVE_OPENCL
229 interface
230 subroutine euler_res_part_visc_opencl(rhs_field_d, Binv_d, field_d, &
231 effective_visc_d, n) &
232 bind(c, name = 'euler_res_part_visc_opencl')
233 use, intrinsic :: iso_c_binding
234 import c_rp
235 implicit none
236 type(c_ptr), value :: rhs_field_d, Binv_d, field_d, effective_visc_d
237 integer(c_int) :: n
238 end subroutine euler_res_part_visc_opencl
239 end interface
240
241 interface
242 subroutine euler_res_part_mx_flux_opencl(f_x, f_y, f_z, &
243 m_x, m_y, m_z, rho_field, p, n) &
244 bind(c, name = 'euler_res_part_mx_flux_opencl')
245 use, intrinsic :: iso_c_binding
246 implicit none
247 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
248 integer(c_int) :: n
249 end subroutine euler_res_part_mx_flux_opencl
250 end interface
251
252 interface
253 subroutine euler_res_part_my_flux_opencl(f_x, f_y, f_z, &
254 m_x, m_y, m_z, rho_field, p, n) &
255 bind(c, name = 'euler_res_part_my_flux_opencl')
256 use, intrinsic :: iso_c_binding
257 implicit none
258 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
259 integer(c_int) :: n
260 end subroutine euler_res_part_my_flux_opencl
261 end interface
262
263 interface
264 subroutine euler_res_part_mz_flux_opencl(f_x, f_y, f_z, &
265 m_x, m_y, m_z, rho_field, p, n) &
266 bind(c, name = 'euler_res_part_mz_flux_opencl')
267 use, intrinsic :: iso_c_binding
268 implicit none
269 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
270 integer(c_int) :: n
271 end subroutine euler_res_part_mz_flux_opencl
272 end interface
273
274 interface
275 subroutine euler_res_part_e_flux_opencl(f_x, f_y, f_z, &
276 m_x, m_y, m_z, rho_field, p, E, n) &
277 bind(c, name = 'euler_res_part_E_flux_opencl')
278 use, intrinsic :: iso_c_binding
279 implicit none
280 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p, E
281 integer(c_int) :: n
282 end subroutine euler_res_part_e_flux_opencl
283 end interface
284
285 interface
286 subroutine euler_res_part_coef_mult_opencl(rhs_rho_field_d, &
287 rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
288 rhs_E_d, mult_d, n) &
289 bind(c, name = 'euler_res_part_coef_mult_opencl')
290 use, intrinsic :: iso_c_binding
291 implicit none
292 type(c_ptr), value :: rhs_rho_field_d, rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
293 rhs_E_d, mult_d
294 integer(c_int) :: n
296 end interface
297
298 interface
299 subroutine euler_res_part_rk_sum_opencl(rho, m_x, m_y, m_z, E, &
300 k_rho_i, k_m_x_i, k_m_y_i, &
301 k_m_z_i, k_E_i, &
302 dt, c, n) &
303 bind(c, name = 'euler_res_part_rk_sum_opencl')
304 use, intrinsic :: iso_c_binding
305 import c_rp
306 implicit none
307 type(c_ptr), value :: rho, m_x, m_y, m_z, E, &
308 k_rho_i, k_m_x_i, k_m_y_i, &
309 k_m_z_i, k_E_i
310 real(c_rp) :: dt, c
311 integer(c_int) :: n
312 end subroutine euler_res_part_rk_sum_opencl
313 end interface
314#elif HAVE_METAL
315 interface
316 subroutine euler_res_part_visc_metal(rhs_field_d, Binv_d, field_d, &
317 effective_visc_d, n) &
318 bind(c, name = 'euler_res_part_visc_metal')
319 use, intrinsic :: iso_c_binding
320 import c_rp
321 implicit none
322 type(c_ptr), value :: rhs_field_d, Binv_d, field_d, effective_visc_d
323 integer(c_int) :: n
324 end subroutine euler_res_part_visc_metal
325 end interface
326
327 interface
328 subroutine euler_res_part_mx_flux_metal(f_x, f_y, f_z, &
329 m_x, m_y, m_z, rho_field, p, n) &
330 bind(c, name = 'euler_res_part_mx_flux_metal')
331 use, intrinsic :: iso_c_binding
332 implicit none
333 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
334 integer(c_int) :: n
335 end subroutine euler_res_part_mx_flux_metal
336 end interface
337
338 interface
339 subroutine euler_res_part_my_flux_metal(f_x, f_y, f_z, &
340 m_x, m_y, m_z, rho_field, p, n) &
341 bind(c, name = 'euler_res_part_my_flux_metal')
342 use, intrinsic :: iso_c_binding
343 implicit none
344 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
345 integer(c_int) :: n
346 end subroutine euler_res_part_my_flux_metal
347 end interface
348
349 interface
350 subroutine euler_res_part_mz_flux_metal(f_x, f_y, f_z, &
351 m_x, m_y, m_z, rho_field, p, n) &
352 bind(c, name = 'euler_res_part_mz_flux_metal')
353 use, intrinsic :: iso_c_binding
354 implicit none
355 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
356 integer(c_int) :: n
357 end subroutine euler_res_part_mz_flux_metal
358 end interface
359
360 interface
361 subroutine euler_res_part_e_flux_metal(f_x, f_y, f_z, &
362 m_x, m_y, m_z, rho_field, p, E, n) &
363 bind(c, name = 'euler_res_part_E_flux_metal')
364 use, intrinsic :: iso_c_binding
365 implicit none
366 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p, E
367 integer(c_int) :: n
368 end subroutine euler_res_part_e_flux_metal
369 end interface
370
371 interface
372 subroutine euler_res_part_coef_mult_metal(rhs_rho_field_d, &
373 rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
374 rhs_E_d, mult_d, n) &
375 bind(c, name = 'euler_res_part_coef_mult_metal')
376 use, intrinsic :: iso_c_binding
377 implicit none
378 type(c_ptr), value :: rhs_rho_field_d, rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
379 rhs_E_d, mult_d
380 integer(c_int) :: n
381 end subroutine euler_res_part_coef_mult_metal
382 end interface
383
384 interface
385 subroutine euler_res_part_rk_sum_metal(rho, m_x, m_y, m_z, E, &
386 k_rho_i, k_m_x_i, k_m_y_i, &
387 k_m_z_i, k_E_i, &
388 dt, c, n) &
389 bind(c, name = 'euler_res_part_rk_sum_metal')
390 use, intrinsic :: iso_c_binding
391 import c_rp
392 implicit none
393 type(c_ptr), value :: rho, m_x, m_y, m_z, E, &
394 k_rho_i, k_m_x_i, k_m_y_i, &
395 k_m_z_i, k_E_i
396 real(c_rp) :: dt, c
397 integer(c_int) :: n
398 end subroutine euler_res_part_rk_sum_metal
399 end interface
400#endif
401
402contains
404 m_x, m_y, m_z, E, p, u, v, w, Ax, &
405 coef, gs, h, effective_visc, rk_scheme, dt)
406 type(field_t), intent(inout) :: rho_field, m_x, m_y, m_z, E
407 type(field_t), intent(in) :: p, u, v, w, h, effective_visc
408 class(ax_t), intent(inout) :: Ax
409 type(coef_t), intent(inout) :: coef
410 type(gs_t), intent(inout) :: gs
411 class(runge_kutta_time_scheme_t), intent(in) :: rk_scheme
412 real(kind=rp), intent(in) :: dt
413 integer :: n, s, i, j, k
414 real(kind=rp) :: t, c
415 type(field_t), pointer :: k_rho_1, k_rho_2, k_rho_3, k_rho_4, &
416 k_m_x_1, k_m_x_2, k_m_x_3, k_m_x_4, &
417 k_m_y_1, k_m_y_2, k_m_y_3, k_m_y_4, &
418 k_m_z_1, k_m_z_2, k_m_z_3, k_m_z_4, &
419 k_e_1, k_e_2, k_e_3, k_e_4, &
420 temp_rho, temp_m_x, temp_m_y, temp_m_z, temp_e
421 integer :: temp_indices(25)
422 type(field_list_t) :: k_rho, k_m_x, k_m_y, k_m_z, k_E
423
424 n = p%dof%size()
425 s = rk_scheme%order
426 call neko_scratch_registry%request_field(k_rho_1, temp_indices(1), .true.)
427 call neko_scratch_registry%request_field(k_rho_2, temp_indices(2), .true.)
428 call neko_scratch_registry%request_field(k_rho_3, temp_indices(3), .true.)
429 call neko_scratch_registry%request_field(k_rho_4, temp_indices(4), .true.)
430 call neko_scratch_registry%request_field(k_m_x_1, temp_indices(5), .true.)
431 call neko_scratch_registry%request_field(k_m_x_2, temp_indices(6), .true.)
432 call neko_scratch_registry%request_field(k_m_x_3, temp_indices(7), .true.)
433 call neko_scratch_registry%request_field(k_m_x_4, temp_indices(8), .true.)
434 call neko_scratch_registry%request_field(k_m_y_1, temp_indices(9), .true.)
435 call neko_scratch_registry%request_field(k_m_y_2, temp_indices(10), .true.)
436 call neko_scratch_registry%request_field(k_m_y_3, temp_indices(11), .true.)
437 call neko_scratch_registry%request_field(k_m_y_4, temp_indices(12), .true.)
438 call neko_scratch_registry%request_field(k_m_z_1, temp_indices(13), .true.)
439 call neko_scratch_registry%request_field(k_m_z_2, temp_indices(14), .true.)
440 call neko_scratch_registry%request_field(k_m_z_3, temp_indices(15), .true.)
441 call neko_scratch_registry%request_field(k_m_z_4, temp_indices(16), .true.)
442 call neko_scratch_registry%request_field(k_e_1, temp_indices(17), .true.)
443 call neko_scratch_registry%request_field(k_e_2, temp_indices(18), .true.)
444 call neko_scratch_registry%request_field(k_e_3, temp_indices(19), .true.)
445 call neko_scratch_registry%request_field(k_e_4, temp_indices(20), .true.)
446 call neko_scratch_registry%request_field(temp_rho, temp_indices(21), .false.)
447 call neko_scratch_registry%request_field(temp_m_x, temp_indices(22), .false.)
448 call neko_scratch_registry%request_field(temp_m_y, temp_indices(23), .false.)
449 call neko_scratch_registry%request_field(temp_m_z, temp_indices(24), .false.)
450 call neko_scratch_registry%request_field(temp_e, temp_indices(25), .false.)
451
452 call k_rho%init(4)
453 call k_rho%assign(1, k_rho_1)
454 call k_rho%assign(2, k_rho_2)
455 call k_rho%assign(3, k_rho_3)
456 call k_rho%assign(4, k_rho_4)
457 call k_m_x%init(4)
458 call k_m_x%assign(1, k_m_x_1)
459 call k_m_x%assign(2, k_m_x_2)
460 call k_m_x%assign(3, k_m_x_3)
461 call k_m_x%assign(4, k_m_x_4)
462 call k_m_y%init(4)
463 call k_m_y%assign(1, k_m_y_1)
464 call k_m_y%assign(2, k_m_y_2)
465 call k_m_y%assign(3, k_m_y_3)
466 call k_m_y%assign(4, k_m_y_4)
467 call k_m_z%init(4)
468 call k_m_z%assign(1, k_m_z_1)
469 call k_m_z%assign(2, k_m_z_2)
470 call k_m_z%assign(3, k_m_z_3)
471 call k_m_z%assign(4, k_m_z_4)
472 call k_e%init(4)
473 call k_e%assign(1, k_e_1)
474 call k_e%assign(2, k_e_2)
475 call k_e%assign(3, k_e_3)
476 call k_e%assign(4, k_e_4)
477
478 ! Runge-Kutta stages
479 do i = 1, s
480 call device_copy(temp_rho%x_d, rho_field%x_d, n)
481 call device_copy(temp_m_x%x_d, m_x%x_d, n)
482 call device_copy(temp_m_y%x_d, m_y%x_d, n)
483 call device_copy(temp_m_z%x_d, m_z%x_d, n)
484 call device_copy(temp_e%x_d, e%x_d, n)
485
486 do j = 1, i-1
487#ifdef HAVE_HIP
488 call euler_res_part_rk_sum_hip(temp_rho%x_d, temp_m_x%x_d, temp_m_y%x_d, &
489 temp_m_z%x_d, temp_e%x_d, &
490 k_rho%items(j)%ptr%x_d, k_m_x%items(j)%ptr%x_d, k_m_y%items(j)%ptr%x_d, &
491 k_m_z%items(j)%ptr%x_d, k_e%items(j)%ptr%x_d, &
492 dt, rk_scheme%coeffs_A(i, j), n)
493#elif HAVE_CUDA
494 call euler_res_part_rk_sum_cuda(temp_rho%x_d, temp_m_x%x_d, temp_m_y%x_d, &
495 temp_m_z%x_d, temp_e%x_d, &
496 k_rho%items(j)%ptr%x_d, k_m_x%items(j)%ptr%x_d, k_m_y%items(j)%ptr%x_d, &
497 k_m_z%items(j)%ptr%x_d, k_e%items(j)%ptr%x_d, &
498 dt, rk_scheme%coeffs_A(i, j), n)
499#elif HAVE_OPENCL
500 call euler_res_part_rk_sum_opencl(temp_rho%x_d, temp_m_x%x_d, temp_m_y%x_d, &
501 temp_m_z%x_d, temp_e%x_d, &
502 k_rho%items(j)%ptr%x_d, k_m_x%items(j)%ptr%x_d, k_m_y%items(j)%ptr%x_d, &
503 k_m_z%items(j)%ptr%x_d, k_e%items(j)%ptr%x_d, &
504 dt, rk_scheme%coeffs_A(i, j), n)
505#elif HAVE_METAL
506 call euler_res_part_rk_sum_metal(temp_rho%x_d, temp_m_x%x_d, temp_m_y%x_d, &
507 temp_m_z%x_d, temp_e%x_d, &
508 k_rho%items(j)%ptr%x_d, k_m_x%items(j)%ptr%x_d, k_m_y%items(j)%ptr%x_d, &
509 k_m_z%items(j)%ptr%x_d, k_e%items(j)%ptr%x_d, &
510 dt, rk_scheme%coeffs_A(i, j), n)
511#endif
512 end do
513
514 ! Compute f(U) = rhs(U) for the intermediate values
515 call evaluate_rhs_device(k_rho%items(i)%ptr, k_m_x%items(i)%ptr, &
516 k_m_y%items(i)%ptr, k_m_z%items(i)%ptr, k_e%items(i)%ptr, &
517 temp_rho, temp_m_x, temp_m_y, temp_m_z, temp_e, &
518 p, u, v, w, ax, &
519 coef, gs, h, effective_visc)
520 end do
521
522 ! Update the solution
523 do i = 1, s
524#ifdef HAVE_HIP
525 call euler_res_part_rk_sum_hip(rho_field%x_d, &
526 m_x%x_d, m_y%x_d, m_z%x_d, e%x_d, &
527 k_rho%items(i)%ptr%x_d, k_m_x%items(i)%ptr%x_d, k_m_y%items(i)%ptr%x_d, &
528 k_m_z%items(i)%ptr%x_d, k_e%items(i)%ptr%x_d, &
529 dt, rk_scheme%coeffs_b(i), n)
530#elif HAVE_CUDA
531 call euler_res_part_rk_sum_cuda(rho_field%x_d, &
532 m_x%x_d, m_y%x_d, m_z%x_d, e%x_d, &
533 k_rho%items(i)%ptr%x_d, k_m_x%items(i)%ptr%x_d, k_m_y%items(i)%ptr%x_d, &
534 k_m_z%items(i)%ptr%x_d, k_e%items(i)%ptr%x_d, &
535 dt, rk_scheme%coeffs_b(i), n)
536#elif HAVE_OPENCL
537 call euler_res_part_rk_sum_opencl(rho_field%x_d, &
538 m_x%x_d, m_y%x_d, m_z%x_d, e%x_d, &
539 k_rho%items(i)%ptr%x_d, k_m_x%items(i)%ptr%x_d, k_m_y%items(i)%ptr%x_d, &
540 k_m_z%items(i)%ptr%x_d, k_e%items(i)%ptr%x_d, &
541 dt, rk_scheme%coeffs_b(i), n)
542#elif HAVE_METAL
543 call euler_res_part_rk_sum_metal(rho_field%x_d, &
544 m_x%x_d, m_y%x_d, m_z%x_d, e%x_d, &
545 k_rho%items(i)%ptr%x_d, k_m_x%items(i)%ptr%x_d, k_m_y%items(i)%ptr%x_d, &
546 k_m_z%items(i)%ptr%x_d, k_e%items(i)%ptr%x_d, &
547 dt, rk_scheme%coeffs_b(i), n)
548#endif
549 end do
550
551 call neko_scratch_registry%relinquish_field(temp_indices)
553
554 subroutine evaluate_rhs_device(rhs_rho_field, rhs_m_x, rhs_m_y, &
555 rhs_m_z, rhs_E, rho_field, &
556 m_x, m_y, m_z, E, p, u, v, w, Ax, &
557 coef, gs, h, effective_visc)
558 type(field_t), intent(inout) :: rhs_rho_field, rhs_m_x, rhs_m_y, rhs_m_z, rhs_E
559 type(field_t), intent(inout) :: rho_field, m_x, m_y, m_z, E
560 type(field_t), intent(in) :: p, u, v, w, h, effective_visc
561 class(ax_t), intent(inout) :: Ax
562 type(coef_t), intent(inout) :: coef
563 type(gs_t), intent(inout) :: gs
564 integer :: n, i
565 real(kind=rp) :: visc_coeff
566 type(field_t), pointer :: temp, f_x, f_y, f_z, &
567 visc_rho, visc_m_x, visc_m_y, visc_m_z, visc_e
568 integer :: temp_indices(8)
569
570 n = coef%dof%size()
571 call neko_scratch_registry%request_field(f_x, temp_indices(1), .false.)
572 call neko_scratch_registry%request_field(f_y, temp_indices(2), .false.)
573 call neko_scratch_registry%request_field(f_z, temp_indices(3), .false.)
574
576 call div(rhs_rho_field%x, m_x%x, m_y%x, m_z%x, coef)
577
579 ! m_x
580#ifdef HAVE_HIP
581 call euler_res_part_mx_flux_hip(f_x%x_d, f_y%x_d, f_z%x_d, &
582 m_x%x_d, m_y%x_d, m_z%x_d, rho_field%x_d, p%x_d, n)
583#elif HAVE_CUDA
584 call euler_res_part_mx_flux_cuda(f_x%x_d, f_y%x_d, f_z%x_d, &
585 m_x%x_d, m_y%x_d, m_z%x_d, rho_field%x_d, p%x_d, n)
586#elif HAVE_OPENCL
587 call euler_res_part_mx_flux_opencl(f_x%x_d, f_y%x_d, f_z%x_d, &
588 m_x%x_d, m_y%x_d, m_z%x_d, rho_field%x_d, p%x_d, n)
589#elif HAVE_METAL
590 call euler_res_part_mx_flux_metal(f_x%x_d, f_y%x_d, f_z%x_d, &
591 m_x%x_d, m_y%x_d, m_z%x_d, rho_field%x_d, p%x_d, n)
592#endif
593 call div(rhs_m_x%x, f_x%x, f_y%x, f_z%x, coef)
594 ! m_y
595#ifdef HAVE_HIP
596 call euler_res_part_my_flux_hip(f_x%x_d, f_y%x_d, f_z%x_d, &
597 m_x%x_d, m_y%x_d, m_z%x_d, &
598 rho_field%x_d, p%x_d, n)
599#elif HAVE_CUDA
600 call euler_res_part_my_flux_cuda(f_x%x_d, f_y%x_d, f_z%x_d, &
601 m_x%x_d, m_y%x_d, m_z%x_d, &
602 rho_field%x_d, p%x_d, n)
603#elif HAVE_OPENCL
604 call euler_res_part_my_flux_opencl(f_x%x_d, f_y%x_d, f_z%x_d, &
605 m_x%x_d, m_y%x_d, m_z%x_d, &
606 rho_field%x_d, p%x_d, n)
607#elif HAVE_METAL
608 call euler_res_part_my_flux_metal(f_x%x_d, f_y%x_d, f_z%x_d, &
609 m_x%x_d, m_y%x_d, m_z%x_d, &
610 rho_field%x_d, p%x_d, n)
611#endif
612 call div(rhs_m_y%x, f_x%x, f_y%x, f_z%x, coef)
613 ! m_z
614#ifdef HAVE_HIP
615 call euler_res_part_mz_flux_hip(f_x%x_d, f_y%x_d, f_z%x_d, &
616 m_x%x_d, m_y%x_d, m_z%x_d, &
617 rho_field%x_d, p%x_d, n)
618#elif HAVE_CUDA
619 call euler_res_part_mz_flux_cuda(f_x%x_d, f_y%x_d, f_z%x_d, &
620 m_x%x_d, m_y%x_d, m_z%x_d, &
621 rho_field%x_d, p%x_d, n)
622#elif HAVE_OPENCL
623 call euler_res_part_mz_flux_opencl(f_x%x_d, f_y%x_d, f_z%x_d, &
624 m_x%x_d, m_y%x_d, m_z%x_d, &
625 rho_field%x_d, p%x_d, n)
626#elif HAVE_METAL
627 call euler_res_part_mz_flux_metal(f_x%x_d, f_y%x_d, f_z%x_d, &
628 m_x%x_d, m_y%x_d, m_z%x_d, &
629 rho_field%x_d, p%x_d, n)
630#endif
631 call div(rhs_m_z%x, f_x%x, f_y%x, f_z%x, coef)
632
634#ifdef HAVE_HIP
635 call euler_res_part_e_flux_hip(f_x%x_d, f_y%x_d, f_z%x_d, &
636 m_x%x_d, m_y%x_d, m_z%x_d, &
637 rho_field%x_d, p%x_d, e%x_d, n)
638#elif HAVE_CUDA
639 call euler_res_part_e_flux_cuda(f_x%x_d, f_y%x_d, f_z%x_d, &
640 m_x%x_d, m_y%x_d, m_z%x_d, &
641 rho_field%x_d, p%x_d, e%x_d, n)
642#elif HAVE_OPENCL
643 call euler_res_part_e_flux_opencl(f_x%x_d, f_y%x_d, f_z%x_d, &
644 m_x%x_d, m_y%x_d, m_z%x_d, &
645 rho_field%x_d, p%x_d, e%x_d, n)
646#elif HAVE_METAL
647 call euler_res_part_e_flux_metal(f_x%x_d, f_y%x_d, f_z%x_d, &
648 m_x%x_d, m_y%x_d, m_z%x_d, &
649 rho_field%x_d, p%x_d, e%x_d, n)
650#endif
651 call div(rhs_e%x_d, f_x%x_d, f_y%x_d, f_z%x_d, coef)
652
653 call gs%op(rhs_rho_field, gs_op_add)
654 call rotate_cyc(rhs_m_x%x_d, rhs_m_y%x_d, rhs_m_z%x_d, 1, coef)
655 call gs%op(rhs_m_x, gs_op_add)
656 call gs%op(rhs_m_y, gs_op_add)
657 call gs%op(rhs_m_z, gs_op_add)
658 call rotate_cyc(rhs_m_x%x_d, rhs_m_y%x_d, rhs_m_z%x_d, 0, coef)
659 call gs%op(rhs_e, gs_op_add)
660
661#ifdef HAVE_HIP
662 call euler_res_part_coef_mult_hip(rhs_rho_field%x_d, rhs_m_x%x_d, &
663 rhs_m_y%x_d, rhs_m_z%x_d, &
664 rhs_e%x_d, coef%mult_d, n)
665#elif HAVE_CUDA
666 call euler_res_part_coef_mult_cuda(rhs_rho_field%x_d, rhs_m_x%x_d, &
667 rhs_m_y%x_d, rhs_m_z%x_d, &
668 rhs_e%x_d, coef%mult_d, n)
669#elif HAVE_OPENCL
670 call euler_res_part_coef_mult_opencl(rhs_rho_field%x_d, rhs_m_x%x_d, &
671 rhs_m_y%x_d, rhs_m_z%x_d, &
672 rhs_e%x_d, coef%mult_d, n)
673#elif HAVE_METAL
674 call euler_res_part_coef_mult_metal(rhs_rho_field%x_d, rhs_m_x%x_d, &
675 rhs_m_y%x_d, rhs_m_z%x_d, &
676 rhs_e%x_d, coef%mult_d, n)
677#endif
678
679 call neko_scratch_registry%request_field(visc_rho, temp_indices(4), .false.)
680 call neko_scratch_registry%request_field(visc_m_x, temp_indices(5), .false.)
681 call neko_scratch_registry%request_field(visc_m_y, temp_indices(6), .false.)
682 call neko_scratch_registry%request_field(visc_m_z, temp_indices(7), .false.)
683 call neko_scratch_registry%request_field(visc_e, temp_indices(8), .false.)
684
685 ! Set h1 coefficient to the effective viscosity for the Laplacian operator
686 call device_copy(coef%h1_d, effective_visc%x_d, n)
687
688 ! Calculate artificial diffusion with variable viscosity
689 call ax%compute(visc_rho%x, rho_field%x, coef, p%msh, p%Xh)
690 call ax%compute(visc_m_x%x, m_x%x, coef, p%msh, p%Xh)
691 call ax%compute(visc_m_y%x, m_y%x, coef, p%msh, p%Xh)
692 call ax%compute(visc_m_z%x, m_z%x, coef, p%msh, p%Xh)
693 call ax%compute(visc_e%x, e%x, coef, p%msh, p%Xh)
694
695 ! Reset h1 coefficient back to 1.0 for other operations
696 call device_rone(coef%h1_d, n)
697
698 call gs%op(visc_rho, gs_op_add)
699 call rotate_cyc(visc_m_x%x_d, visc_m_y%x_d, visc_m_z%x_d, 1, coef)
700 call gs%op(visc_m_x, gs_op_add)
701 call gs%op(visc_m_y, gs_op_add)
702 call gs%op(visc_m_z, gs_op_add)
703 call rotate_cyc(visc_m_x%x_d, visc_m_y%x_d, visc_m_z%x_d, 0, coef)
704 call gs%op(visc_e, gs_op_add)
705
706 ! Apply artificial viscosity - the coefficient is already in the Laplacian
707 ! rhs = -rhs - Binv * visc_lap
708 call device_col2(visc_rho%x_d, coef%Binv_d, n)
709 call device_col2(visc_m_x%x_d, coef%Binv_d, n)
710 call device_col2(visc_m_y%x_d, coef%Binv_d, n)
711 call device_col2(visc_m_z%x_d, coef%Binv_d, n)
712 call device_col2(visc_e%x_d, coef%Binv_d, n)
713
714 call device_cmult(rhs_rho_field%x_d, -1.0_rp, n)
715 call device_sub2(rhs_rho_field%x_d, visc_rho%x_d, n)
716
717 call device_cmult(rhs_m_x%x_d, -1.0_rp, n)
718 call device_sub2(rhs_m_x%x_d, visc_m_x%x_d, n)
719
720 call device_cmult(rhs_m_y%x_d, -1.0_rp, n)
721 call device_sub2(rhs_m_y%x_d, visc_m_y%x_d, n)
722
723 call device_cmult(rhs_m_z%x_d, -1.0_rp, n)
724 call device_sub2(rhs_m_z%x_d, visc_m_z%x_d, n)
725
726 call device_cmult(rhs_e%x_d, -1.0_rp, n)
727 call device_sub2(rhs_e%x_d, visc_e%x_d, n)
728
729 call neko_scratch_registry%relinquish_field(temp_indices)
730
731 end subroutine evaluate_rhs_device
732
733end module euler_res_device
void euler_res_part_coef_mult_opencl(void *rhs_rho, void *rhs_m_x, void *rhs_m_y, void *rhs_m_z, void *rhs_E, void *mult, int *n)
Definition euler_res.c:205
void euler_res_part_visc_opencl(void *rhs_u, void *Binv, void *lap_sol, void *effective_visc, int *n)
Definition euler_res.c:49
void euler_res_part_my_flux_opencl(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.c:108
void euler_res_part_mx_flux_opencl(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.c:76
void euler_res_part_mz_flux_opencl(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.c:140
void euler_res_part_rk_sum_opencl(void *rho, void *m_x, void *m_y, void *m_z, void *E, void *k_rho_i, void *k_m_x_i, void *k_m_y_i, void *k_m_z_i, void *k_E_i, real *dt, real *c, int *n)
Definition euler_res.c:235
void euler_res_part_mx_flux_cuda(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.cu:55
void euler_res_part_my_flux_cuda(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.cu:70
void euler_res_part_visc_cuda(void *rhs_u, void *Binv, void *lap_sol, void *effective_visc, int *n)
Definition euler_res.cu:42
void euler_res_part_rk_sum_cuda(void *rho, void *m_x, void *m_y, void *m_z, void *E, void *k_rho_i, void *k_m_x_i, void *k_m_y_i, void *k_m_z_i, void *k_E_i, real *dt, real *c, int *n)
Definition euler_res.cu:131
void euler_res_part_coef_mult_cuda(void *rhs_rho, void *rhs_m_x, void *rhs_m_y, void *rhs_m_z, void *rhs_E, void *mult, int *n)
Definition euler_res.cu:116
void euler_res_part_mz_flux_cuda(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.cu:85
Compute the divergence of a vector field.
Definition operators.f90:85
Apply cyclic boundary condition to a vector field.
Defines a Matrix-vector product.
Definition ax.f90:34
Coefficients.
Definition coef.f90:34
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine evaluate_rhs_device(rhs_rho_field, rhs_m_x, rhs_m_y, rhs_m_z, rhs_e, rho_field, m_x, m_y, m_z, e, p, u, v, w, ax, coef, gs, h, effective_visc)
subroutine advance_primitive_variables_device(rho_field, m_x, m_y, m_z, e, p, u, v, w, ax, coef, gs, h, effective_visc, rk_scheme, dt)
subroutine, public field_cmult(a, c, n)
Multiplication by constant c .
Defines a field.
Definition field.f90:34
Gather-scatter.
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
Defines a registry for storing and requesting temporary objects This can be used when you have a func...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Utilities.
Definition utils.f90:35
Base type for a matrix-vector product providing .
Definition ax.f90:43
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Abstract type to compute rhs.
Definition euler_res.f90:44
field_list_t, To be able to group fields together