Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pc_jacobi_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use precon, only : pc_t
36 use coefs, only : coef_t
37 use dofmap, only : dofmap_t
38 use num_types, only : rp
43 use gather_scatter, only : gs_t, gs_op_add
44 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, c_associated
45 implicit none
46 private
47
49 type, public, extends(pc_t) :: device_jacobi_t
50 real(kind=rp), allocatable :: d(:,:,:,:)
51 type(c_ptr) :: d_d = c_null_ptr
52 type(gs_t), pointer :: gs_h
53 type(dofmap_t), pointer :: dof
54 type(coef_t), pointer :: coef
55 type(c_ptr) :: gs_event = c_null_ptr
56 contains
57 procedure, pass(this) :: init => device_jacobi_init
58 procedure, pass(this) :: free => device_jacobi_free
59 procedure, pass(this) :: solve => device_jacobi_solve
60 procedure, pass(this) :: update => device_jacobi_update
61 end type device_jacobi_t
62
63 interface
64 subroutine hip_jacobi_update(d_d, dxt_d, dyt_d, dzt_d, &
65 G11_d, G22_d, G33_d, G12_d, G13_d, G23_d, nelv, lx) &
66 bind(c, name='hip_jacobi_update')
67 use, intrinsic :: iso_c_binding
68 type(c_ptr), value :: d_d, dxt_d, dyt_d, dzt_d
69 type(c_ptr), value :: G11_d, G22_d, G33_d, G12_d, G13_d, G23_d
70 integer(c_int) :: nelv, lx
71 end subroutine hip_jacobi_update
72 end interface
73
74 interface
75 subroutine cuda_jacobi_update(d_d, dxt_d, dyt_d, dzt_d, &
76 G11_d, G22_d, G33_d, G12_d, G13_d, G23_d, nelv, lx) &
77 bind(c, name='cuda_jacobi_update')
78 use, intrinsic :: iso_c_binding
79 type(c_ptr), value :: d_d, dxt_d, dyt_d, dzt_d
80 type(c_ptr), value :: G11_d, G22_d, G33_d, G12_d, G13_d, G23_d
81 integer(c_int) :: nelv, lx
82 end subroutine cuda_jacobi_update
83 end interface
84
85 interface
86 subroutine opencl_jacobi_update(d_d, dxt_d, dyt_d, dzt_d, &
87 G11_d, G22_d, G33_d, G12_d, G13_d, G23_d, nelv, lx) &
88 bind(c, name='opencl_jacobi_update')
89 use, intrinsic :: iso_c_binding
90 type(c_ptr), value :: d_d, dxt_d, dyt_d, dzt_d
91 type(c_ptr), value :: G11_d, G22_d, G33_d, G12_d, G13_d, G23_d
92 integer(c_int) :: nelv, lx
93 end subroutine opencl_jacobi_update
94 end interface
95
96 interface
97 subroutine metal_jacobi_update(d_d, dxt_d, dyt_d, dzt_d, &
98 G11_d, G22_d, G33_d, G12_d, G13_d, G23_d, nelv, lx) &
99 bind(c, name='metal_jacobi_update')
100 use, intrinsic :: iso_c_binding
101 type(c_ptr), value :: d_d, dxt_d, dyt_d, dzt_d
102 type(c_ptr), value :: G11_d, G22_d, G33_d, G12_d, G13_d, G23_d
103 integer(c_int) :: nelv, lx
104 end subroutine metal_jacobi_update
105 end interface
106
107contains
108
109 subroutine device_jacobi_init(this, coef, dof, gs_h)
110 class(device_jacobi_t), intent(inout) :: this
111 type(coef_t), intent(in), target :: coef
112 type(dofmap_t), intent(in), target :: dof
113 type(gs_t), intent(inout), target :: gs_h
114
115 call this%free()
116
117 this%gs_h => gs_h
118 this%dof => dof
119 this%coef => coef
120
121 allocate(this%d(dof%Xh%lx,dof%Xh%ly,dof%Xh%lz, dof%msh%nelv))
122
123 call device_map(this%d, this%d_d, size(this%d))
124
125 call device_event_create(this%gs_event, 2)
126
127 call device_jacobi_update(this)
128
129 end subroutine device_jacobi_init
130
131 subroutine device_jacobi_free(this)
132 class(device_jacobi_t), intent(inout) :: this
133
134 if (allocated(this%d)) then
135 if (c_associated(this%d_d)) then
136 call device_unmap(this%d, this%d_d)
137 end if
138 deallocate(this%d)
139 end if
140
141 if (c_associated(this%gs_event)) then
142 call device_event_destroy(this%gs_event)
143 end if
144
145 nullify(this%dof)
146 nullify(this%gs_h)
147 nullify(this%coef)
148 end subroutine device_jacobi_free
149
152 subroutine device_jacobi_solve(this, z, r, n)
153 integer, intent(in) :: n
154 class(device_jacobi_t), intent(inout) :: this
155 real(kind=rp), dimension(n), intent(inout) :: z
156 real(kind=rp), dimension(n), intent(inout) :: r
157 type(c_ptr) :: z_d, r_d
158
159 z_d = device_get_ptr(z)
160 r_d = device_get_ptr(r)
161
162 call device_col3(z_d, r_d, this%d_d, n)
163
164 end subroutine device_jacobi_solve
165
166 subroutine device_jacobi_update(this)
167 class(device_jacobi_t), intent(inout) :: this
168 integer :: lz, ly, lx
169 associate(dof => this%dof, coef => this%coef, xh => this%dof%Xh, &
170 gs_h => this%gs_h, nelv => this%dof%msh%nelv)
171
172 lx = xh%lx
173 ly = xh%ly
174 lz = xh%lz
175
176#ifdef HAVE_HIP
177 call hip_jacobi_update(this%d_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, &
178 coef%G11_d, coef%G22_d, coef%G33_d, &
179 coef%G12_d, coef%G13_d, coef%G23_d, &
180 nelv, lx)
181#elif HAVE_CUDA
182 call cuda_jacobi_update(this%d_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, &
183 coef%G11_d, coef%G22_d, coef%G33_d, &
184 coef%G12_d, coef%G13_d, coef%G23_d, &
185 nelv, lx)
186#elif HAVE_OPENCL
187 call opencl_jacobi_update(this%d_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, &
188 coef%G11_d, coef%G22_d, coef%G33_d, &
189 coef%G12_d, coef%G13_d, coef%G23_d, &
190 nelv, lx)
191#elif HAVE_METAL
192 call metal_jacobi_update(this%d_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, &
193 coef%G11_d, coef%G22_d, coef%G33_d, &
194 coef%G12_d, coef%G13_d, coef%G23_d, &
195 nelv, lx)
196#endif
197
198 call device_col2(this%d_d, coef%h1_d, coef%dof%size())
199
200 if (coef%ifh2) then
201 call device_addcol3(this%d_d, coef%h2_d, coef%B_d, coef%dof%size())
202 end if
203
204 call gs_h%op(this%d, dof%size(), gs_op_add, this%gs_event)
205 call device_event_sync(this%gs_event)
206
207 call device_invcol1(this%d_d, dof%size())
208 end associate
209 end subroutine device_jacobi_update
210
211end module device_jacobi
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Return the device pointer for an associated Fortran array.
Definition device.F90:108
Map a Fortran array to a device (allocate and associate)
Definition device.F90:78
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:84
Coefficients.
Definition coef.f90:34
Jacobi preconditioner accelerator backend.
subroutine device_jacobi_free(this)
subroutine device_jacobi_init(this, coef, dof, gs_h)
subroutine device_jacobi_update(this)
subroutine device_jacobi_solve(this, z, r, n)
The jacobi preconditioner where .
subroutine, public device_addcol3(a_d, b_d, c_d, n, strm)
Returns .
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_col3(a_d, b_d, c_d, n, strm)
Vector multiplication with 3 vectors .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1594
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1550
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1516
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Gather-scatter.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Defines a jacobi preconditioner.
Defines a canonical Krylov preconditioner.
Definition precon.f90:40