Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
pc_jacobi_device.F90
Go to the documentation of this file.
1! Copyright (c) 2021-2025, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
35 use precon, only : pc_t
36 use coefs, only : coef_t
37 use dofmap, only : dofmap_t
38 use num_types, only : rp
43 use gather_scatter, only : gs_t, gs_op_add
44 use, intrinsic :: iso_c_binding, only : c_ptr, c_null_ptr, c_associated
45 implicit none
46 private
47
49 type, public, extends(pc_t) :: device_jacobi_t
50 real(kind=rp), allocatable :: d(:,:,:,:)
51 type(c_ptr) :: d_d = c_null_ptr
52 type(gs_t), pointer :: gs_h
53 type(dofmap_t), pointer :: dof
54 type(coef_t), pointer :: coef
55 type(c_ptr) :: gs_event = c_null_ptr
56 contains
57 procedure, pass(this) :: init => device_jacobi_init
58 procedure, pass(this) :: free => device_jacobi_free
59 procedure, pass(this) :: solve => device_jacobi_solve
60 procedure, pass(this) :: update => device_jacobi_update
61 end type device_jacobi_t
62
63 interface
64 subroutine hip_jacobi_update(d_d, dxt_d, dyt_d, dzt_d, &
65 G11_d, G22_d, G33_d, G12_d, G13_d, G23_d, nelv, lx) &
66 bind(c, name='hip_jacobi_update')
67 use, intrinsic :: iso_c_binding
68 type(c_ptr), value :: d_d, dxt_d, dyt_d, dzt_d
69 type(c_ptr), value :: G11_d, G22_d, G33_d, G12_d, G13_d, G23_d
70 integer(c_int) :: nelv, lx
71 end subroutine hip_jacobi_update
72 end interface
73
74 interface
75 subroutine cuda_jacobi_update(d_d, dxt_d, dyt_d, dzt_d, &
76 G11_d, G22_d, G33_d, G12_d, G13_d, G23_d, nelv, lx) &
77 bind(c, name='cuda_jacobi_update')
78 use, intrinsic :: iso_c_binding
79 type(c_ptr), value :: d_d, dxt_d, dyt_d, dzt_d
80 type(c_ptr), value :: G11_d, G22_d, G33_d, G12_d, G13_d, G23_d
81 integer(c_int) :: nelv, lx
82 end subroutine cuda_jacobi_update
83 end interface
84
85 interface
86 subroutine opencl_jacobi_update(d_d, dxt_d, dyt_d, dzt_d, &
87 G11_d, G22_d, G33_d, G12_d, G13_d, G23_d, nelv, lx) &
88 bind(c, name='opencl_jacobi_update')
89 use, intrinsic :: iso_c_binding
90 type(c_ptr), value :: d_d, dxt_d, dyt_d, dzt_d
91 type(c_ptr), value :: G11_d, G22_d, G33_d, G12_d, G13_d, G23_d
92 integer(c_int) :: nelv, lx
93 end subroutine opencl_jacobi_update
94 end interface
95
96contains
97
98 subroutine device_jacobi_init(this, coef, dof, gs_h)
99 class(device_jacobi_t), intent(inout) :: this
100 type(coef_t), intent(in), target :: coef
101 type(dofmap_t), intent(in), target :: dof
102 type(gs_t), intent(inout), target :: gs_h
103
104 call this%free()
105
106 this%gs_h => gs_h
107 this%dof => dof
108 this%coef => coef
109
110 allocate(this%d(dof%Xh%lx,dof%Xh%ly,dof%Xh%lz, dof%msh%nelv))
111
112 call device_map(this%d, this%d_d, size(this%d))
113
114 call device_event_create(this%gs_event, 2)
115
116 call device_jacobi_update(this)
117
118 end subroutine device_jacobi_init
119
120 subroutine device_jacobi_free(this)
121 class(device_jacobi_t), intent(inout) :: this
122
123 if (allocated(this%d)) then
124 if (c_associated(this%d_d)) then
125 call device_unmap(this%d, this%d_d)
126 end if
127 deallocate(this%d)
128 end if
129
130 if (c_associated(this%gs_event)) then
131 call device_event_destroy(this%gs_event)
132 end if
133
134 nullify(this%dof)
135 nullify(this%gs_h)
136 nullify(this%coef)
137 end subroutine device_jacobi_free
138
141 subroutine device_jacobi_solve(this, z, r, n)
142 integer, intent(in) :: n
143 class(device_jacobi_t), intent(inout) :: this
144 real(kind=rp), dimension(n), intent(inout) :: z
145 real(kind=rp), dimension(n), intent(inout) :: r
146 type(c_ptr) :: z_d, r_d
147
148 z_d = device_get_ptr(z)
149 r_d = device_get_ptr(r)
150
151 call device_col3(z_d, r_d, this%d_d, n)
152
153 end subroutine device_jacobi_solve
154
155 subroutine device_jacobi_update(this)
156 class(device_jacobi_t), intent(inout) :: this
157 integer :: lz, ly, lx
158 associate(dof => this%dof, coef => this%coef, xh => this%dof%Xh, &
159 gs_h => this%gs_h, nelv => this%dof%msh%nelv)
160
161 lx = xh%lx
162 ly = xh%ly
163 lz = xh%lz
164
165#ifdef HAVE_HIP
166 call hip_jacobi_update(this%d_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, &
167 coef%G11_d, coef%G22_d, coef%G33_d, &
168 coef%G12_d, coef%G13_d, coef%G23_d, &
169 nelv, lx)
170#elif HAVE_CUDA
171 call cuda_jacobi_update(this%d_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, &
172 coef%G11_d, coef%G22_d, coef%G33_d, &
173 coef%G12_d, coef%G13_d, coef%G23_d, &
174 nelv, lx)
175#elif HAVE_OPENCL
176 call opencl_jacobi_update(this%d_d, xh%dxt_d, xh%dyt_d, xh%dzt_d, &
177 coef%G11_d, coef%G22_d, coef%G33_d, &
178 coef%G12_d, coef%G13_d, coef%G23_d, &
179 nelv, lx)
180#endif
181
182 call device_col2(this%d_d, coef%h1_d, coef%dof%size())
183
184 if (coef%ifh2) then
185 call device_addcol3(this%d_d, coef%h2_d, coef%B_d, coef%dof%size())
186 end if
187
188 call gs_h%op(this%d, dof%size(), gs_op_add, this%gs_event)
189 call device_event_sync(this%gs_event)
190
191 call device_invcol1(this%d_d, dof%size())
192 end associate
193 end subroutine device_jacobi_update
194
195end module device_jacobi
__device__ T solve(const T u, const T y, const T guess, const T nu, const T kappa, const T B)
Return the device pointer for an associated Fortran array.
Definition device.F90:107
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:83
Coefficients.
Definition coef.f90:34
Jacobi preconditioner accelerator backend.
subroutine device_jacobi_free(this)
subroutine device_jacobi_init(this, coef, dof, gs_h)
subroutine device_jacobi_update(this)
subroutine device_jacobi_solve(this, z, r, n)
The jacobi preconditioner where .
subroutine, public device_addcol3(a_d, b_d, c_d, n, strm)
Returns .
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_col2(a_d, b_d, n, strm)
Vector multiplication .
subroutine, public device_col3(a_d, b_d, c_d, n, strm)
Vector multiplication with 3 vectors .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_event_sync(event)
Synchronize an event.
Definition device.F90:1515
subroutine, public device_event_destroy(event)
Destroy a device event.
Definition device.F90:1480
subroutine, public device_event_create(event, flags)
Create a device event queue.
Definition device.F90:1450
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Gather-scatter.
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Krylov preconditioner.
Definition precon.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
Defines a jacobi preconditioner.
Defines a canonical Krylov preconditioner.
Definition precon.f90:40