Neko 1.99.4
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
device_tree_amg_smoother.F90
Go to the documentation of this file.
1! Copyright (c) 2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use num_types, only : rp, c_rp
36 use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_bool
37 use device, only : glb_cmd_queue
38 use utils, only : neko_error
39 implicit none
40 private
41
42#ifdef HAVE_HIP
43 interface
44 subroutine hip_amg_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
45 inv_thet, n, zero_initial, strm) &
46 bind(c, name='hip_amg_cheby_solve_part1')
47 use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_bool
48 import c_rp
49 type(c_ptr), value :: r_d, f_d, w_d, x_d, d_d, strm
50 real(c_rp) :: inv_thet
51 logical(c_bool) :: zero_initial
52 end subroutine hip_amg_cheby_solve_part1
53 end interface
54
55 interface
56 subroutine hip_amg_cheby_solve_part2(r_d, w_d, d_d, x_d, &
57 tmp1, tmp2, n, strm) bind(c, name='hip_amg_cheby_solve_part2')
58 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
59 import c_rp
60 type(c_ptr), value :: r_d, w_d, d_d, x_d, strm
61 real(c_rp) :: tmp1, tmp2
62 integer(c_int) :: n
63 end subroutine hip_amg_cheby_solve_part2
64 end interface
65#elif HAVE_CUDA
66 interface
67 subroutine cuda_amg_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
68 inv_thet, n, zero_initial, strm) &
69 bind(c, name='cuda_amg_cheby_solve_part1')
70 use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_bool
71 import c_rp
72 type(c_ptr), value :: r_d, f_d, w_d, x_d, d_d, strm
73 real(c_rp) :: inv_thet
74 logical(c_bool) :: zero_initial
75 end subroutine cuda_amg_cheby_solve_part1
76 end interface
77
78 interface
79 subroutine cuda_amg_cheby_solve_part2(r_d, w_d, d_d, x_d, &
80 tmp1, tmp2, n, strm) bind(c, name='cuda_amg_cheby_solve_part2')
81 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
82 import c_rp
83 type(c_ptr), value :: r_d, w_d, d_d, x_d, strm
84 real(c_rp) :: tmp1, tmp2
85 integer(c_int) :: n
86 end subroutine cuda_amg_cheby_solve_part2
87 end interface
88#elif HAVE_METAL
89 interface
90 subroutine metal_amg_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
91 inv_thet, n, zero_initial, strm) &
92 bind(c, name='metal_amg_cheby_solve_part1')
93 use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_bool
94 import c_rp
95 type(c_ptr), value :: r_d, f_d, w_d, x_d, d_d, strm
96 real(c_rp) :: inv_thet
97 integer(c_int) :: n
98 logical(c_bool) :: zero_initial
99 end subroutine metal_amg_cheby_solve_part1
100 end interface
101
102 interface
103 subroutine metal_amg_cheby_solve_part2(r_d, w_d, d_d, x_d, &
104 tmp1, tmp2, n, strm) bind(c, name='metal_amg_cheby_solve_part2')
105 use, intrinsic :: iso_c_binding, only : c_ptr, c_int
106 import c_rp
107 type(c_ptr), value :: r_d, w_d, d_d, x_d, strm
108 real(c_rp) :: tmp1, tmp2
109 integer(c_int) :: n
110 end subroutine metal_amg_cheby_solve_part2
111 end interface
112#elif HAVE_OPENCL
113#endif
114
116
117contains
118
119 subroutine amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
120 inv_thet, n, zero_initial)
121 type(c_ptr), intent(inout) :: r_d, f_d, w_d, d_d, x_d
122 real(kind=rp), intent(in) :: inv_thet
123 integer, intent(in) :: n
124 logical, intent(in) :: zero_initial
125 logical(c_bool) :: zinit
126
127 zinit = zero_initial
128
129#ifdef HAVE_HIP
130 call hip_amg_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
131 inv_thet, n, zinit, glb_cmd_queue)
132#elif HAVE_CUDA
133 call cuda_amg_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
134 inv_thet, n, zinit, glb_cmd_queue)
135#elif HAVE_METAL
136 call metal_amg_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, &
137 inv_thet, n, zinit, glb_cmd_queue)
138#else
139 call neko_error('No device backend configured')
140#endif
141 end subroutine amg_device_cheby_solve_part1
142
143 subroutine amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
144 type(c_ptr), intent(inout) :: r_d, w_d, d_d, x_d
145 real(kind=rp), intent(in) :: tmp1, tmp2
146 integer, intent(in) :: n
147#ifdef HAVE_HIP
148 call hip_amg_cheby_solve_part2(r_d, w_d, d_d, x_d, &
149 tmp1, tmp2, n, glb_cmd_queue)
150#elif HAVE_CUDA
151 call cuda_amg_cheby_solve_part2(r_d, w_d, d_d, x_d, &
152 tmp1, tmp2, n, glb_cmd_queue)
153#elif HAVE_METAL
154 call metal_amg_cheby_solve_part2(r_d, w_d, d_d, x_d, &
155 tmp1, tmp2, n, glb_cmd_queue)
156#else
157 call neko_error('No device backend configured')
158#endif
159 end subroutine amg_device_cheby_solve_part2
160
void cuda_amg_cheby_solve_part2(void *r, void *w, void *d, void *x, real *tmp1, real *tmp2, int *n, cudaStream_t strm)
Definition amg_cheby.cu:100
void cuda_amg_cheby_solve_part1(void *r, void *f, void *w, void *x, void *d, real *inv_thet, int *n, bool *zero_initial, cudaStream_t strm)
Definition amg_cheby.cu:85
Implements device kernels for use with TreeAMG smoothers.
subroutine, public amg_device_cheby_solve_part1(r_d, f_d, w_d, x_d, d_d, inv_thet, n, zero_initial)
subroutine, public amg_device_cheby_solve_part2(r_d, w_d, d_d, x_d, tmp1, tmp2, n)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:52
integer, parameter, public c_rp
Definition num_types.f90:13
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Utilities.
Definition utils.f90:35