Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
euler_res_device.F90
Go to the documentation of this file.
1! Copyright (c) 2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34 use euler_residual, only : euler_rhs_t
35 use field, only : field_t
36 use ax_product, only : ax_t
37 use coefs, only : coef_t
38 use gather_scatter, only : gs_t, gs_op_add
39 use num_types, only : rp, c_rp
41 use utils, only : neko_error
42 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
43 use operators, only : div, rotate_cyc
44 use field_math, only : field_cmult
46 use field_list, only : field_list_t
47 use device_math, only : device_copy, device_rone, &
49
50 type, public, extends(euler_rhs_t) :: euler_res_device_t
51 contains
52 procedure, nopass :: step => advance_primitive_variables_device
53 procedure, nopass :: evaluate_rhs_device
54 end type euler_res_device_t
55
56#ifdef HAVE_HIP
57 interface
58 subroutine euler_res_part_visc_hip(rhs_field_d, Binv_d, field_d, &
59 effective_visc_d, n) &
60 bind(c, name = 'euler_res_part_visc_hip')
61 use, intrinsic :: iso_c_binding
62 import c_rp
63 implicit none
64 type(c_ptr), value :: rhs_field_d, Binv_d, field_d, effective_visc_d
65 integer(c_int) :: n
66 end subroutine euler_res_part_visc_hip
67 end interface
68
69 interface
70 subroutine euler_res_part_mx_flux_hip(f_x, f_y, f_z, &
71 m_x, m_y, m_z, rho_field, p, n) &
72 bind(c, name = 'euler_res_part_mx_flux_hip')
73 use, intrinsic :: iso_c_binding
74 implicit none
75 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
76 integer(c_int) :: n
77 end subroutine euler_res_part_mx_flux_hip
78 end interface
79
80 interface
81 subroutine euler_res_part_my_flux_hip(f_x, f_y, f_z, &
82 m_x, m_y, m_z, rho_field, p, n) &
83 bind(c, name = 'euler_res_part_my_flux_hip')
84 use, intrinsic :: iso_c_binding
85 implicit none
86 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
87 integer(c_int) :: n
88 end subroutine euler_res_part_my_flux_hip
89 end interface
90
91 interface
92 subroutine euler_res_part_mz_flux_hip(f_x, f_y, f_z, &
93 m_x, m_y, m_z, rho_field, p, n) &
94 bind(c, name = 'euler_res_part_mz_flux_hip')
95 use, intrinsic :: iso_c_binding
96 implicit none
97 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
98 integer(c_int) :: n
99 end subroutine euler_res_part_mz_flux_hip
100 end interface
101
102 interface
103 subroutine euler_res_part_e_flux_hip(f_x, f_y, f_z, &
104 m_x, m_y, m_z, rho_field, p, E, n) &
105 bind(c, name = 'euler_res_part_E_flux_hip')
106 use, intrinsic :: iso_c_binding
107 implicit none
108 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p, E
109 integer(c_int) :: n
110 end subroutine euler_res_part_e_flux_hip
111 end interface
112
113 interface
114 subroutine euler_res_part_coef_mult_hip(rhs_rho_field_d, rhs_m_x_d, &
115 rhs_m_y_d, rhs_m_z_d, &
116 rhs_E_d, mult_d, n) &
117 bind(c, name = 'euler_res_part_coef_mult_hip')
118 use, intrinsic :: iso_c_binding
119 implicit none
120 type(c_ptr), value :: rhs_rho_field_d, rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
121 rhs_E_d, mult_d
122 integer(c_int) :: n
123 end subroutine euler_res_part_coef_mult_hip
124 end interface
125
126 interface
127 subroutine euler_res_part_rk_sum_hip(rho, m_x, m_y, m_z, E, &
128 k_rho_i, k_m_x_i, k_m_y_i, &
129 k_m_z_i, k_E_i, &
130 dt, b_i, n) &
131 bind(c, name = 'euler_res_part_rk_sum_hip')
132 use, intrinsic :: iso_c_binding
133 import c_rp
134 implicit none
135 type(c_ptr), value :: rho, m_x, m_y, m_z, E, &
136 k_rho_i, k_m_x_i, k_m_y_i, &
137 k_m_z_i, k_E_i
138 real(c_rp) :: dt, b_i
139 integer(c_int) :: n
140 end subroutine euler_res_part_rk_sum_hip
141 end interface
142#elif HAVE_CUDA
143 interface
144 subroutine euler_res_part_visc_cuda(rhs_field_d, Binv_d, field_d, &
145 effective_visc_d, n) &
146 bind(c, name = 'euler_res_part_visc_cuda')
147 use, intrinsic :: iso_c_binding
148 import c_rp
149 implicit none
150 type(c_ptr), value :: rhs_field_d, Binv_d, field_d, effective_visc_d
151 integer(c_int) :: n
152 end subroutine euler_res_part_visc_cuda
153 end interface
154
155 interface
156 subroutine euler_res_part_mx_flux_cuda(f_x, f_y, f_z, &
157 m_x, m_y, m_z, rho_field, p, n) &
158 bind(c, name = 'euler_res_part_mx_flux_cuda')
159 use, intrinsic :: iso_c_binding
160 implicit none
161 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
162 integer(c_int) :: n
163 end subroutine euler_res_part_mx_flux_cuda
164 end interface
165
166 interface
167 subroutine euler_res_part_my_flux_cuda(f_x, f_y, f_z, &
168 m_x, m_y, m_z, rho_field, p, n) &
169 bind(c, name = 'euler_res_part_my_flux_cuda')
170 use, intrinsic :: iso_c_binding
171 implicit none
172 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
173 integer(c_int) :: n
174 end subroutine euler_res_part_my_flux_cuda
175 end interface
176
177 interface
178 subroutine euler_res_part_mz_flux_cuda(f_x, f_y, f_z, &
179 m_x, m_y, m_z, rho_field, p, n) &
180 bind(c, name = 'euler_res_part_mz_flux_cuda')
181 use, intrinsic :: iso_c_binding
182 implicit none
183 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
184 integer(c_int) :: n
185 end subroutine euler_res_part_mz_flux_cuda
186 end interface
187
188 interface
189 subroutine euler_res_part_e_flux_cuda(f_x, f_y, f_z, &
190 m_x, m_y, m_z, rho_field, p, E, n) &
191 bind(c, name = 'euler_res_part_E_flux_cuda')
192 use, intrinsic :: iso_c_binding
193 implicit none
194 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p, E
195 integer(c_int) :: n
196 end subroutine euler_res_part_e_flux_cuda
197 end interface
198
199 interface
200 subroutine euler_res_part_coef_mult_cuda(rhs_rho_field_d, &
201 rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
202 rhs_E_d, mult_d, n) &
203 bind(c, name = 'euler_res_part_coef_mult_cuda')
204 use, intrinsic :: iso_c_binding
205 implicit none
206 type(c_ptr), value :: rhs_rho_field_d, rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
207 rhs_E_d, mult_d
208 integer(c_int) :: n
209 end subroutine euler_res_part_coef_mult_cuda
210 end interface
211
212 interface
213 subroutine euler_res_part_rk_sum_cuda(rho, m_x, m_y, m_z, E, &
214 k_rho_i, k_m_x_i, k_m_y_i, &
215 k_m_z_i, k_E_i, &
216 dt, c, n) &
217 bind(c, name = 'euler_res_part_rk_sum_cuda')
218 use, intrinsic :: iso_c_binding
219 import c_rp
220 implicit none
221 type(c_ptr), value :: rho, m_x, m_y, m_z, E, &
222 k_rho_i, k_m_x_i, k_m_y_i, &
223 k_m_z_i, k_E_i
224 real(c_rp) :: dt, c
225 integer(c_int) :: n
226 end subroutine euler_res_part_rk_sum_cuda
227 end interface
228#elif HAVE_OPENCL
229 interface
230 subroutine euler_res_part_visc_opencl(rhs_field_d, Binv_d, field_d, &
231 effective_visc_d, n) &
232 bind(c, name = 'euler_res_part_visc_opencl')
233 use, intrinsic :: iso_c_binding
234 import c_rp
235 implicit none
236 type(c_ptr), value :: rhs_field_d, Binv_d, field_d, effective_visc_d
237 integer(c_int) :: n
238 end subroutine euler_res_part_visc_opencl
239 end interface
240
241 interface
242 subroutine euler_res_part_mx_flux_opencl(f_x, f_y, f_z, &
243 m_x, m_y, m_z, rho_field, p, n) &
244 bind(c, name = 'euler_res_part_mx_flux_opencl')
245 use, intrinsic :: iso_c_binding
246 implicit none
247 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
248 integer(c_int) :: n
249 end subroutine euler_res_part_mx_flux_opencl
250 end interface
251
252 interface
253 subroutine euler_res_part_my_flux_opencl(f_x, f_y, f_z, &
254 m_x, m_y, m_z, rho_field, p, n) &
255 bind(c, name = 'euler_res_part_my_flux_opencl')
256 use, intrinsic :: iso_c_binding
257 implicit none
258 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
259 integer(c_int) :: n
260 end subroutine euler_res_part_my_flux_opencl
261 end interface
262
263 interface
264 subroutine euler_res_part_mz_flux_opencl(f_x, f_y, f_z, &
265 m_x, m_y, m_z, rho_field, p, n) &
266 bind(c, name = 'euler_res_part_mz_flux_opencl')
267 use, intrinsic :: iso_c_binding
268 implicit none
269 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p
270 integer(c_int) :: n
271 end subroutine euler_res_part_mz_flux_opencl
272 end interface
273
274 interface
275 subroutine euler_res_part_e_flux_opencl(f_x, f_y, f_z, &
276 m_x, m_y, m_z, rho_field, p, E, n) &
277 bind(c, name = 'euler_res_part_E_flux_opencl')
278 use, intrinsic :: iso_c_binding
279 implicit none
280 type(c_ptr), value :: f_x, f_y, f_z, m_x, m_y, m_z, rho_field, p, E
281 integer(c_int) :: n
282 end subroutine euler_res_part_e_flux_opencl
283 end interface
284
285 interface
286 subroutine euler_res_part_coef_mult_opencl(rhs_rho_field_d, &
287 rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
288 rhs_E_d, mult_d, n) &
289 bind(c, name = 'euler_res_part_coef_mult_opencl')
290 use, intrinsic :: iso_c_binding
291 implicit none
292 type(c_ptr), value :: rhs_rho_field_d, rhs_m_x_d, rhs_m_y_d, rhs_m_z_d, &
293 rhs_E_d, mult_d
294 integer(c_int) :: n
296 end interface
297
298 interface
299 subroutine euler_res_part_rk_sum_opencl(rho, m_x, m_y, m_z, E, &
300 k_rho_i, k_m_x_i, k_m_y_i, &
301 k_m_z_i, k_E_i, &
302 dt, c, n) &
303 bind(c, name = 'euler_res_part_rk_sum_opencl')
304 use, intrinsic :: iso_c_binding
305 import c_rp
306 implicit none
307 type(c_ptr), value :: rho, m_x, m_y, m_z, E, &
308 k_rho_i, k_m_x_i, k_m_y_i, &
309 k_m_z_i, k_E_i
310 real(c_rp) :: dt, c
311 integer(c_int) :: n
312 end subroutine euler_res_part_rk_sum_opencl
313 end interface
314#endif
315
316contains
318 m_x, m_y, m_z, E, p, u, v, w, Ax, &
319 coef, gs, h, effective_visc, rk_scheme, dt)
320 type(field_t), intent(inout) :: rho_field, m_x, m_y, m_z, E
321 type(field_t), intent(in) :: p, u, v, w, h, effective_visc
322 class(ax_t), intent(inout) :: Ax
323 type(coef_t), intent(inout) :: coef
324 type(gs_t), intent(inout) :: gs
325 class(runge_kutta_time_scheme_t), intent(in) :: rk_scheme
326 real(kind=rp), intent(in) :: dt
327 integer :: n, s, i, j, k
328 real(kind=rp) :: t, c
329 type(field_t), pointer :: k_rho_1, k_rho_2, k_rho_3, k_rho_4, &
330 k_m_x_1, k_m_x_2, k_m_x_3, k_m_x_4, &
331 k_m_y_1, k_m_y_2, k_m_y_3, k_m_y_4, &
332 k_m_z_1, k_m_z_2, k_m_z_3, k_m_z_4, &
333 k_e_1, k_e_2, k_e_3, k_e_4, &
334 temp_rho, temp_m_x, temp_m_y, temp_m_z, temp_e
335 integer :: temp_indices(25)
336 type(field_list_t) :: k_rho, k_m_x, k_m_y, k_m_z, k_E
337
338 n = p%dof%size()
339 s = rk_scheme%order
340 call neko_scratch_registry%request_field(k_rho_1, temp_indices(1), .true.)
341 call neko_scratch_registry%request_field(k_rho_2, temp_indices(2), .true.)
342 call neko_scratch_registry%request_field(k_rho_3, temp_indices(3), .true.)
343 call neko_scratch_registry%request_field(k_rho_4, temp_indices(4), .true.)
344 call neko_scratch_registry%request_field(k_m_x_1, temp_indices(5), .true.)
345 call neko_scratch_registry%request_field(k_m_x_2, temp_indices(6), .true.)
346 call neko_scratch_registry%request_field(k_m_x_3, temp_indices(7), .true.)
347 call neko_scratch_registry%request_field(k_m_x_4, temp_indices(8), .true.)
348 call neko_scratch_registry%request_field(k_m_y_1, temp_indices(9), .true.)
349 call neko_scratch_registry%request_field(k_m_y_2, temp_indices(10), .true.)
350 call neko_scratch_registry%request_field(k_m_y_3, temp_indices(11), .true.)
351 call neko_scratch_registry%request_field(k_m_y_4, temp_indices(12), .true.)
352 call neko_scratch_registry%request_field(k_m_z_1, temp_indices(13), .true.)
353 call neko_scratch_registry%request_field(k_m_z_2, temp_indices(14), .true.)
354 call neko_scratch_registry%request_field(k_m_z_3, temp_indices(15), .true.)
355 call neko_scratch_registry%request_field(k_m_z_4, temp_indices(16), .true.)
356 call neko_scratch_registry%request_field(k_e_1, temp_indices(17), .true.)
357 call neko_scratch_registry%request_field(k_e_2, temp_indices(18), .true.)
358 call neko_scratch_registry%request_field(k_e_3, temp_indices(19), .true.)
359 call neko_scratch_registry%request_field(k_e_4, temp_indices(20), .true.)
360 call neko_scratch_registry%request_field(temp_rho, temp_indices(21), .false.)
361 call neko_scratch_registry%request_field(temp_m_x, temp_indices(22), .false.)
362 call neko_scratch_registry%request_field(temp_m_y, temp_indices(23), .false.)
363 call neko_scratch_registry%request_field(temp_m_z, temp_indices(24), .false.)
364 call neko_scratch_registry%request_field(temp_e, temp_indices(25), .false.)
365
366 call k_rho%init(4)
367 call k_rho%assign(1, k_rho_1)
368 call k_rho%assign(2, k_rho_2)
369 call k_rho%assign(3, k_rho_3)
370 call k_rho%assign(4, k_rho_4)
371 call k_m_x%init(4)
372 call k_m_x%assign(1, k_m_x_1)
373 call k_m_x%assign(2, k_m_x_2)
374 call k_m_x%assign(3, k_m_x_3)
375 call k_m_x%assign(4, k_m_x_4)
376 call k_m_y%init(4)
377 call k_m_y%assign(1, k_m_y_1)
378 call k_m_y%assign(2, k_m_y_2)
379 call k_m_y%assign(3, k_m_y_3)
380 call k_m_y%assign(4, k_m_y_4)
381 call k_m_z%init(4)
382 call k_m_z%assign(1, k_m_z_1)
383 call k_m_z%assign(2, k_m_z_2)
384 call k_m_z%assign(3, k_m_z_3)
385 call k_m_z%assign(4, k_m_z_4)
386 call k_e%init(4)
387 call k_e%assign(1, k_e_1)
388 call k_e%assign(2, k_e_2)
389 call k_e%assign(3, k_e_3)
390 call k_e%assign(4, k_e_4)
391
392 ! Runge-Kutta stages
393 do i = 1, s
394 call device_copy(temp_rho%x_d, rho_field%x_d, n)
395 call device_copy(temp_m_x%x_d, m_x%x_d, n)
396 call device_copy(temp_m_y%x_d, m_y%x_d, n)
397 call device_copy(temp_m_z%x_d, m_z%x_d, n)
398 call device_copy(temp_e%x_d, e%x_d, n)
399
400 do j = 1, i-1
401#ifdef HAVE_HIP
402 call euler_res_part_rk_sum_hip(temp_rho%x_d, temp_m_x%x_d, temp_m_y%x_d, &
403 temp_m_z%x_d, temp_e%x_d, &
404 k_rho%items(j)%ptr%x_d, k_m_x%items(j)%ptr%x_d, k_m_y%items(j)%ptr%x_d, &
405 k_m_z%items(j)%ptr%x_d, k_e%items(j)%ptr%x_d, &
406 dt, rk_scheme%coeffs_A(i, j), n)
407#elif HAVE_CUDA
408 call euler_res_part_rk_sum_cuda(temp_rho%x_d, temp_m_x%x_d, temp_m_y%x_d, &
409 temp_m_z%x_d, temp_e%x_d, &
410 k_rho%items(j)%ptr%x_d, k_m_x%items(j)%ptr%x_d, k_m_y%items(j)%ptr%x_d, &
411 k_m_z%items(j)%ptr%x_d, k_e%items(j)%ptr%x_d, &
412 dt, rk_scheme%coeffs_A(i, j), n)
413#elif HAVE_OPENCL
414 call euler_res_part_rk_sum_opencl(temp_rho%x_d, temp_m_x%x_d, temp_m_y%x_d, &
415 temp_m_z%x_d, temp_e%x_d, &
416 k_rho%items(j)%ptr%x_d, k_m_x%items(j)%ptr%x_d, k_m_y%items(j)%ptr%x_d, &
417 k_m_z%items(j)%ptr%x_d, k_e%items(j)%ptr%x_d, &
418 dt, rk_scheme%coeffs_A(i, j), n)
419#endif
420 end do
421
422 ! Compute f(U) = rhs(U) for the intermediate values
423 call evaluate_rhs_device(k_rho%items(i)%ptr, k_m_x%items(i)%ptr, &
424 k_m_y%items(i)%ptr, k_m_z%items(i)%ptr, k_e%items(i)%ptr, &
425 temp_rho, temp_m_x, temp_m_y, temp_m_z, temp_e, &
426 p, u, v, w, ax, &
427 coef, gs, h, effective_visc)
428 end do
429
430 ! Update the solution
431 do i = 1, s
432#ifdef HAVE_HIP
433 call euler_res_part_rk_sum_hip(rho_field%x_d, &
434 m_x%x_d, m_y%x_d, m_z%x_d, e%x_d, &
435 k_rho%items(i)%ptr%x_d, k_m_x%items(i)%ptr%x_d, k_m_y%items(i)%ptr%x_d, &
436 k_m_z%items(i)%ptr%x_d, k_e%items(i)%ptr%x_d, &
437 dt, rk_scheme%coeffs_b(i), n)
438#elif HAVE_CUDA
439 call euler_res_part_rk_sum_cuda(rho_field%x_d, &
440 m_x%x_d, m_y%x_d, m_z%x_d, e%x_d, &
441 k_rho%items(i)%ptr%x_d, k_m_x%items(i)%ptr%x_d, k_m_y%items(i)%ptr%x_d, &
442 k_m_z%items(i)%ptr%x_d, k_e%items(i)%ptr%x_d, &
443 dt, rk_scheme%coeffs_b(i), n)
444#elif HAVE_OPENCL
445 call euler_res_part_rk_sum_opencl(rho_field%x_d, &
446 m_x%x_d, m_y%x_d, m_z%x_d, e%x_d, &
447 k_rho%items(i)%ptr%x_d, k_m_x%items(i)%ptr%x_d, k_m_y%items(i)%ptr%x_d, &
448 k_m_z%items(i)%ptr%x_d, k_e%items(i)%ptr%x_d, &
449 dt, rk_scheme%coeffs_b(i), n)
450#endif
451 end do
452
453 call neko_scratch_registry%relinquish_field(temp_indices)
455
456 subroutine evaluate_rhs_device(rhs_rho_field, rhs_m_x, rhs_m_y, &
457 rhs_m_z, rhs_E, rho_field, &
458 m_x, m_y, m_z, E, p, u, v, w, Ax, &
459 coef, gs, h, effective_visc)
460 type(field_t), intent(inout) :: rhs_rho_field, rhs_m_x, rhs_m_y, rhs_m_z, rhs_E
461 type(field_t), intent(inout) :: rho_field, m_x, m_y, m_z, E
462 type(field_t), intent(in) :: p, u, v, w, h, effective_visc
463 class(ax_t), intent(inout) :: Ax
464 type(coef_t), intent(inout) :: coef
465 type(gs_t), intent(inout) :: gs
466 integer :: n, i
467 real(kind=rp) :: visc_coeff
468 type(field_t), pointer :: temp, f_x, f_y, f_z, &
469 visc_rho, visc_m_x, visc_m_y, visc_m_z, visc_e
470 integer :: temp_indices(8)
471
472 n = coef%dof%size()
473 call neko_scratch_registry%request_field(f_x, temp_indices(1), .false.)
474 call neko_scratch_registry%request_field(f_y, temp_indices(2), .false.)
475 call neko_scratch_registry%request_field(f_z, temp_indices(3), .false.)
476
478 call div(rhs_rho_field%x, m_x%x, m_y%x, m_z%x, coef)
479
481 ! m_x
482#ifdef HAVE_HIP
483 call euler_res_part_mx_flux_hip(f_x%x_d, f_y%x_d, f_z%x_d, &
484 m_x%x_d, m_y%x_d, m_z%x_d, rho_field%x_d, p%x_d, n)
485#elif HAVE_CUDA
486 call euler_res_part_mx_flux_cuda(f_x%x_d, f_y%x_d, f_z%x_d, &
487 m_x%x_d, m_y%x_d, m_z%x_d, rho_field%x_d, p%x_d, n)
488#elif HAVE_OPENCL
489 call euler_res_part_mx_flux_opencl(f_x%x_d, f_y%x_d, f_z%x_d, &
490 m_x%x_d, m_y%x_d, m_z%x_d, rho_field%x_d, p%x_d, n)
491#endif
492 call div(rhs_m_x%x, f_x%x, f_y%x, f_z%x, coef)
493 ! m_y
494#ifdef HAVE_HIP
495 call euler_res_part_my_flux_hip(f_x%x_d, f_y%x_d, f_z%x_d, &
496 m_x%x_d, m_y%x_d, m_z%x_d, &
497 rho_field%x_d, p%x_d, n)
498#elif HAVE_CUDA
499 call euler_res_part_my_flux_cuda(f_x%x_d, f_y%x_d, f_z%x_d, &
500 m_x%x_d, m_y%x_d, m_z%x_d, &
501 rho_field%x_d, p%x_d, n)
502#elif HAVE_OPENCL
503 call euler_res_part_my_flux_opencl(f_x%x_d, f_y%x_d, f_z%x_d, &
504 m_x%x_d, m_y%x_d, m_z%x_d, &
505 rho_field%x_d, p%x_d, n)
506#endif
507 call div(rhs_m_y%x, f_x%x, f_y%x, f_z%x, coef)
508 ! m_z
509#ifdef HAVE_HIP
510 call euler_res_part_mz_flux_hip(f_x%x_d, f_y%x_d, f_z%x_d, &
511 m_x%x_d, m_y%x_d, m_z%x_d, &
512 rho_field%x_d, p%x_d, n)
513#elif HAVE_CUDA
514 call euler_res_part_mz_flux_cuda(f_x%x_d, f_y%x_d, f_z%x_d, &
515 m_x%x_d, m_y%x_d, m_z%x_d, &
516 rho_field%x_d, p%x_d, n)
517#elif HAVE_OPENCL
518 call euler_res_part_mz_flux_opencl(f_x%x_d, f_y%x_d, f_z%x_d, &
519 m_x%x_d, m_y%x_d, m_z%x_d, &
520 rho_field%x_d, p%x_d, n)
521#endif
522 call div(rhs_m_z%x, f_x%x, f_y%x, f_z%x, coef)
523
525#ifdef HAVE_HIP
526 call euler_res_part_e_flux_hip(f_x%x_d, f_y%x_d, f_z%x_d, &
527 m_x%x_d, m_y%x_d, m_z%x_d, &
528 rho_field%x_d, p%x_d, e%x_d, n)
529#elif HAVE_CUDA
530 call euler_res_part_e_flux_cuda(f_x%x_d, f_y%x_d, f_z%x_d, &
531 m_x%x_d, m_y%x_d, m_z%x_d, &
532 rho_field%x_d, p%x_d, e%x_d, n)
533#elif HAVE_OPENCL
534 call euler_res_part_e_flux_opencl(f_x%x_d, f_y%x_d, f_z%x_d, &
535 m_x%x_d, m_y%x_d, m_z%x_d, &
536 rho_field%x_d, p%x_d, e%x_d, n)
537#endif
538 call div(rhs_e%x, f_x%x, f_y%x, f_z%x, coef)
539
540 call gs%op(rhs_rho_field, gs_op_add)
541 call rotate_cyc(rhs_m_x%x, rhs_m_y%x, rhs_m_z%x, 1, coef)
542 call gs%op(rhs_m_x, gs_op_add)
543 call gs%op(rhs_m_y, gs_op_add)
544 call gs%op(rhs_m_z, gs_op_add)
545 call rotate_cyc(rhs_m_x%x, rhs_m_y%x, rhs_m_z%x, 0, coef)
546 call gs%op(rhs_e, gs_op_add)
547
548#ifdef HAVE_HIP
549 call euler_res_part_coef_mult_hip(rhs_rho_field%x_d, rhs_m_x%x_d, &
550 rhs_m_y%x_d, rhs_m_z%x_d, &
551 rhs_e%x_d, coef%mult_d, n)
552#elif HAVE_CUDA
553 call euler_res_part_coef_mult_cuda(rhs_rho_field%x_d, rhs_m_x%x_d, &
554 rhs_m_y%x_d, rhs_m_z%x_d, &
555 rhs_e%x_d, coef%mult_d, n)
556#elif HAVE_OPENCL
557 call euler_res_part_coef_mult_opencl(rhs_rho_field%x_d, rhs_m_x%x_d, &
558 rhs_m_y%x_d, rhs_m_z%x_d, &
559 rhs_e%x_d, coef%mult_d, n)
560#endif
561
562 call neko_scratch_registry%request_field(visc_rho, temp_indices(4), .false.)
563 call neko_scratch_registry%request_field(visc_m_x, temp_indices(5), .false.)
564 call neko_scratch_registry%request_field(visc_m_y, temp_indices(6), .false.)
565 call neko_scratch_registry%request_field(visc_m_z, temp_indices(7), .false.)
566 call neko_scratch_registry%request_field(visc_e, temp_indices(8), .false.)
567
568 ! Set h1 coefficient to the effective viscosity for the Laplacian operator
569 call device_copy(coef%h1_d, effective_visc%x_d, n)
570
571 ! Calculate artificial diffusion with variable viscosity
572 call ax%compute(visc_rho%x, rho_field%x, coef, p%msh, p%Xh)
573 call ax%compute(visc_m_x%x, m_x%x, coef, p%msh, p%Xh)
574 call ax%compute(visc_m_y%x, m_y%x, coef, p%msh, p%Xh)
575 call ax%compute(visc_m_z%x, m_z%x, coef, p%msh, p%Xh)
576 call ax%compute(visc_e%x, e%x, coef, p%msh, p%Xh)
577
578 ! Reset h1 coefficient back to 1.0 for other operations
579 call device_rone(coef%h1_d, n)
580
581 call gs%op(visc_rho, gs_op_add)
582 call rotate_cyc(visc_m_x%x, visc_m_y%x, visc_m_z%x, 1, coef)
583 call gs%op(visc_m_x, gs_op_add)
584 call gs%op(visc_m_y, gs_op_add)
585 call gs%op(visc_m_z, gs_op_add)
586 call rotate_cyc(visc_m_x%x, visc_m_y%x, visc_m_z%x, 0, coef)
587 call gs%op(visc_e, gs_op_add)
588
589 ! Apply artificial viscosity - the coefficient is already in the Laplacian
590 ! rhs = -rhs - Binv * visc_lap
591 call device_col2(visc_rho%x_d, coef%Binv_d, n)
592 call device_col2(visc_m_x%x_d, coef%Binv_d, n)
593 call device_col2(visc_m_y%x_d, coef%Binv_d, n)
594 call device_col2(visc_m_z%x_d, coef%Binv_d, n)
595 call device_col2(visc_e%x_d, coef%Binv_d, n)
596
597 call device_cmult(rhs_rho_field%x_d, -1.0_rp, n)
598 call device_sub2(rhs_rho_field%x_d, visc_rho%x_d, n)
599
600 call device_cmult(rhs_m_x%x_d, -1.0_rp, n)
601 call device_sub2(rhs_m_x%x_d, visc_m_x%x_d, n)
602
603 call device_cmult(rhs_m_y%x_d, -1.0_rp, n)
604 call device_sub2(rhs_m_y%x_d, visc_m_y%x_d, n)
605
606 call device_cmult(rhs_m_z%x_d, -1.0_rp, n)
607 call device_sub2(rhs_m_z%x_d, visc_m_z%x_d, n)
608
609 call device_cmult(rhs_e%x_d, -1.0_rp, n)
610 call device_sub2(rhs_e%x_d, visc_e%x_d, n)
611
612 call neko_scratch_registry%relinquish_field(temp_indices)
613
614 end subroutine evaluate_rhs_device
615
616end module euler_res_device
void euler_res_part_coef_mult_opencl(void *rhs_rho, void *rhs_m_x, void *rhs_m_y, void *rhs_m_z, void *rhs_E, void *mult, int *n)
Definition euler_res.c:205
void euler_res_part_visc_opencl(void *rhs_u, void *Binv, void *lap_sol, void *effective_visc, int *n)
Definition euler_res.c:49
void euler_res_part_my_flux_opencl(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.c:108
void euler_res_part_mx_flux_opencl(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.c:76
void euler_res_part_mz_flux_opencl(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.c:140
void euler_res_part_rk_sum_opencl(void *rho, void *m_x, void *m_y, void *m_z, void *E, void *k_rho_i, void *k_m_x_i, void *k_m_y_i, void *k_m_z_i, void *k_E_i, real *dt, real *c, int *n)
Definition euler_res.c:235
void euler_res_part_mx_flux_cuda(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.cu:55
void euler_res_part_my_flux_cuda(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.cu:70
void euler_res_part_visc_cuda(void *rhs_u, void *Binv, void *lap_sol, void *effective_visc, int *n)
Definition euler_res.cu:42
void euler_res_part_rk_sum_cuda(void *rho, void *m_x, void *m_y, void *m_z, void *E, void *k_rho_i, void *k_m_x_i, void *k_m_y_i, void *k_m_z_i, void *k_E_i, real *dt, real *c, int *n)
Definition euler_res.cu:131
void euler_res_part_coef_mult_cuda(void *rhs_rho, void *rhs_m_x, void *rhs_m_y, void *rhs_m_z, void *rhs_E, void *mult, int *n)
Definition euler_res.cu:116
void euler_res_part_mz_flux_cuda(void *f_x, void *f_y, void *f_z, void *m_x, void *m_y, void *m_z, void *rho_field, void *p, int *n)
Definition euler_res.cu:85
Defines a Matrix-vector product.
Definition ax.f90:34
Coefficients.
Definition coef.f90:34
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
subroutine, public device_cmult(a_d, c, n, strm)
Multiplication by constant c .
subroutine, public device_sub2(a_d, b_d, n, strm)
Vector substraction .
subroutine, public device_copy(a_d, b_d, n, strm)
Copy a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine evaluate_rhs_device(rhs_rho_field, rhs_m_x, rhs_m_y, rhs_m_z, rhs_e, rho_field, m_x, m_y, m_z, e, p, u, v, w, ax, coef, gs, h, effective_visc)
subroutine advance_primitive_variables_device(rho_field, m_x, m_y, m_z, e, p, u, v, w, ax, coef, gs, h, effective_visc, rk_scheme, dt)
subroutine, public field_cmult(a, c, n)
Multiplication by constant c .
Defines a field.
Definition field.f90:34
Gather-scatter.
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Operators.
Definition operators.f90:34
subroutine, public div(res, ux, uy, uz, coef)
Compute the divergence of a vector field.
Defines a registry for storing and requesting temporary objects This can be used when you have a func...
type(scratch_registry_t), target, public neko_scratch_registry
Global scratch registry.
Utilities.
Definition utils.f90:35
Base type for a matrix-vector product providing .
Definition ax.f90:43
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
Abstract type to compute rhs.
Definition euler_res.f90:44
field_list_t, To be able to group fields together