Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
device_tree_amg_smoother.F90
Go to the documentation of this file.
1! Copyright (c) 2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use num_types, only : rp, c_rp
36 use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_bool
37 use device, only : glb_cmd_queue
38 use utils, only : neko_error
39 implicit none
40 private
41
42#ifdef HAVE_HIP
43 interface
44 subroutine hip_amg_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
45 inv_thet, n, zero_initial, strm) &
46 bind(c, name='hip_amg_cheby_solve_part1')
47 use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_bool
48 import c_rp
49 type(c_ptr), value :: r_d, f_d, w_d, x_d, d_d, strm
50 real(c_rp) :: inv_thet
51 logical(c_bool) :: zero_initial
52 end subroutine hip_amg_cheby_solve_part1
53 end interface
54
55 interface
56 subroutine hip_amg_cheby_solve_part2(r_d, w_d, d_d, x_d, &
57 tmp1, tmp2, n, strm) bind(c, name='hip_amg_cheby_solve_part2')
58 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
59 import c_rp
60 type(c_ptr), value :: r_d, w_d, d_d, x_d, strm
61 real(c_rp) :: tmp1, tmp2
62 integer(c_int) :: n
63 end subroutine hip_amg_cheby_solve_part2
64 end interface
65#elif HAVE_CUDA
66 interface
67 subroutine cuda_amg_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
68 inv_thet, n, zero_initial, strm) &
69 bind(c, name='cuda_amg_cheby_solve_part1')
70 use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_bool
71 import c_rp
72 type(c_ptr), value :: r_d, f_d, w_d, x_d, d_d, strm
73 real(c_rp) :: inv_thet
74 logical(c_bool) :: zero_initial
75 end subroutine cuda_amg_cheby_solve_part1
76 end interface
77
78 interface
79 subroutine cuda_amg_cheby_solve_part2(r_d, w_d, d_d, x_d, &
80 tmp1, tmp2, n, strm) bind(c, name='cuda_amg_cheby_solve_part2')
81 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
82 import c_rp
83 type(c_ptr), value :: r_d, w_d, d_d, x_d, strm
84 real(c_rp) :: tmp1, tmp2
85 integer(c_int) :: n
86 end subroutine cuda_amg_cheby_solve_part2
87 end interface
88#elif HAVE_OPENCL
89#endif
90
92
93contains
94
95 subroutine amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
96 inv_thet, n, zero_initial)
97 type(c_ptr), intent(inout) :: r_d, f_d, w_d, d_d, x_d
98 real(kind=rp), intent(in) :: inv_thet
99 integer, intent(in) :: n
100 logical, intent(in) :: zero_initial
101 logical(c_bool) :: zinit
102
103 zinit = zero_initial
104
105#ifdef HAVE_HIP
106 call hip_amg_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
107 inv_thet, n, zinit, glb_cmd_queue)
108#elif HAVE_CUDA
109 call cuda_amg_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
110 inv_thet, n, zinit, glb_cmd_queue)
111#else
112 call neko_error('No device backend configured')
113#endif
114 end subroutine amg_device_cheby_solve_part1
115
116 subroutine amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
117 type(c_ptr), intent(inout) :: r_d, w_d, d_d, x_d
118 real(kind=rp), intent(in) :: tmp1, tmp2
119 integer, intent(in) :: n
120#ifdef HAVE_HIP
121 call hip_amg_cheby_solve_part2(r_d, w_d, d_d, x_d, &
122 tmp1, tmp2, n, glb_cmd_queue)
123#elif HAVE_CUDA
124 call cuda_amg_cheby_solve_part2(r_d, w_d, d_d, x_d, &
125 tmp1, tmp2, n, glb_cmd_queue)
126#else
127 call neko_error('No device backend configured')
128#endif
129 end subroutine amg_device_cheby_solve_part2
130
void cuda_amg_cheby_solve_part2(void *r, void *w, void *d, void *x, real *tmp1, real *tmp2, int *n, cudaStream_t strm)
Definition amg_cheby.cu:100
void cuda_amg_cheby_solve_part1(void *r, void *f, void *w, void *x, void *d, real *inv_thet, int *n, bool *zero_initial, cudaStream_t strm)
Definition amg_cheby.cu:85
Implements device kernels for use with TreeAMG smoothers.
subroutine, public amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, inv_thet, n, zero_initial)
subroutine, public amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35