Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
legendre_rst_finder.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
33
35 use num_types, only: rp, dp, xp
38 use space, only: space_t
39 use utils, only: neko_error, neko_warning
40 use vector, only: vector_t
41 use matrix, only: matrix_t
42 use math, only: neko_eps, matinv39
45 use, intrinsic :: iso_c_binding, only: c_ptr, c_null_ptr
48 use device_math, only: device_vlsc3
49 implicit none
50 private
51
54 type, public :: legendre_rst_finder_t
55 type(vector_t) :: x_hat, y_hat, z_hat
56 type(space_t), pointer :: xh => null()
57 integer :: nelv = 0
58 real(kind=rp) :: tol = neko_eps
59 integer :: max_iter = 10
60 contains
61 procedure, pass(this) :: init => legendre_rst_finder_init
62 procedure, pass(this) :: free => legendre_rst_finder_free
63 procedure, pass(this) :: find => legendre_rst_finder_find
65
66contains
67
68 subroutine legendre_rst_finder_init(this, x, y, z, nelv, Xh, tol, max_iter)
69 class(legendre_rst_finder_t), intent(inout) :: this
70 type(space_t), target, intent(in) :: Xh
71 integer, intent(in) :: nelv
72 real(kind=rp), intent(in), dimension(nelv*Xh%lxyz) :: x, y, z
73 real(kind=rp), intent(in), optional :: tol
74 integer, intent(in), optional :: max_iter
75
76 call this%free()
77
78 if (present(tol)) then
79 if (tol < neko_eps) then
80 call neko_error('Tolerance for the Legendre finder is too small')
81 end if
82 this%tol = tol
83
84 else
85 this%tol = neko_eps
86 end if
87 if (present(max_iter)) then
88 if (max_iter < 1) then
89 call neko_error('Max iterations for the Legendre finder is too small')
90 end if
91 this%max_iter = max_iter
92 else
93 this%max_iter = 10
94 end if
95
96 this%Xh => xh
97 this%nelv = nelv
98
99 call this%x_hat%init(nelv*xh%lxyz)
100 call this%y_hat%init(nelv*xh%lxyz)
101 call this%z_hat%init(nelv*xh%lxyz)
102
103 call tnsr3d_cpu(this%x_hat%x, xh%lx, x, &
104 xh%lx, xh%vinv, &
105 xh%vinvt, xh%vinvt, nelv)
106 call tnsr3d_cpu(this%y_hat%x, xh%lx, y, &
107 xh%lx, xh%vinv, &
108 xh%vinvt, xh%vinvt, nelv)
109 call tnsr3d_cpu(this%z_hat%x, xh%lx, z, &
110 xh%lx, xh%vinv, &
111 xh%vinvt, xh%vinvt, nelv)
112
114 call this%x_hat%copy_from(host_to_device, .false.)
115 call this%y_hat%copy_from(host_to_device, .false.)
116 call this%z_hat%copy_from(host_to_device, .false.)
117
118 end subroutine legendre_rst_finder_init
119
121 class(legendre_rst_finder_t), intent(inout) :: this
122
123 call this%x_hat%free()
124 call this%y_hat%free()
125 call this%z_hat%free()
126
127 this%Xh => null()
128
129 end subroutine legendre_rst_finder_free
130
146 subroutine legendre_rst_finder_find(this, rst_local_cand, x_t, y_t, z_t, &
147 el_cands, n_point_cand, resx, resy, resz)
148 class(legendre_rst_finder_t), intent(inout) :: this
149 type(matrix_t), intent(inout) :: rst_local_cand
150 type(vector_t), intent(in) :: x_t, y_t, z_t
151 integer, intent(in) :: n_point_cand
152 integer, intent(inout) :: el_cands(:)
153 type(vector_t), intent(inout) :: resx, resy, resz
154 type(c_ptr) :: el_cands_d = c_null_ptr
155
157 if (n_point_cand .lt. 1) return
158
159 rst_local_cand = 0.0_rp
160
161 if (neko_bcknd_device .eq. 1) then
162 el_cands_d = device_get_ptr(el_cands)
163 call find_rst_legendre_device(this, rst_local_cand%x_d, &
164 x_t%x_d, y_t%x_d, z_t%x_d, &
165 el_cands_d, n_point_cand, &
166 resx%x_d, resy%x_d, resz%x_d)
167
168 else
169 call find_rst_legendre_cpu(this, rst_local_cand%x, x_t%x, y_t%x, z_t%x, &
170 el_cands, n_point_cand, &
171 resx%x, resy%x, resz%x)
172
173 end if
174
175 end subroutine legendre_rst_finder_find
176
183 subroutine find_rst_legendre_device(this, rst, pt_x, pt_y, pt_z, &
184 el_list, n_pts, resx, resy, resz)
185 type(legendre_rst_finder_t), intent(inout) :: this
186 type(c_ptr), intent(inout) :: rst
187 type(c_ptr), intent(in) :: pt_x, pt_y, pt_z
188 type(c_ptr), intent(in) :: el_list
189 type(c_ptr), intent(inout) :: resx, resy, resz
190 integer, intent(in) :: n_pts
191 type(vector_t) :: conv_pts
192 integer :: iter
193 logical :: converged
194 real(kind=rp) :: conv_sum
195
196 if (n_pts .eq. 0) return
197
198 call conv_pts%init(n_pts)
199
200 conv_pts = 1.0_rp
201
202 iter = 0
203 converged = .false.
204 !Iterate until found, not heavily optimized
205 do while (.not. converged)
206 iter = iter + 1
207 call device_find_rst_legendre(rst, pt_x, pt_y, pt_z, &
208 this%x_hat%x_d, this%y_hat%x_d, this%z_hat%x_d, &
209 resx, resy, resz, &
210 this%Xh%lx,el_list, n_pts, this%tol, &
211 conv_pts%x_d)
212 !This can be made more approriate... avoid memcpy at least
213 conv_sum = device_vlsc3(conv_pts%x_d,conv_pts%x_d,conv_pts%x_d,n_pts)
214 converged = conv_sum .lt. 0.5
215 !print *, conv_sum
216 if( iter .ge. this%max_iter) converged = .true.
217 end do
218
219 call conv_pts%free()
220 end subroutine find_rst_legendre_device
221
228 subroutine find_rst_legendre_cpu(this, rst, pt_x, pt_y, pt_z, &
229 el_list, n_pts, resx, resy, resz)
230 type(legendre_rst_finder_t), intent(inout) :: this
231 integer, intent(in) :: n_pts
232 real(kind=rp), intent(inout) :: rst(3, n_pts)
233 real(kind=rp), intent(in) :: pt_x(n_pts)
234 real(kind=rp), intent(in) :: pt_y(n_pts)
235 real(kind=rp), intent(in) :: pt_z(n_pts)
236 real(kind=rp), intent(inout) :: resx(n_pts)
237 real(kind=rp), intent(inout) :: resy(n_pts)
238 real(kind=rp), intent(inout) :: resz(n_pts)
239 integer, intent(in) :: el_list(n_pts)
240 real(kind=rp) :: r_legendre(1, this%Xh%lx)
241 real(kind=rp) :: s_legendre(this%Xh%lx, 1)
242 real(kind=rp) :: t_legendre(this%Xh%lx, 1)
243 real(kind=rp) :: dr_legendre(1, this%Xh%lx)
244 real(kind=rp) :: ds_legendre(this%Xh%lx, 1)
245 real(kind=rp) :: dt_legendre(this%Xh%lx, 1)
246 real(kind=rp) :: jac(3,3)
247 real(kind=xp) :: rst_d(3), jacinv(3,3)
248 integer :: conv_pts
249 logical :: converged
250 integer :: i, j, e, iter, lx
251
252
253
254 lx = this%Xh%lx
255 if (n_pts .lt. 1) return
256
257 rst = 0.0_rp
258 ! If performance critical we should do multiple points at the time
259 ! Currently we do one point at the time
260 do i = 1, n_pts
261 iter = 0
262 converged = .false.
263 do while (.not. converged)
264 iter = iter + 1
265 ! Compute legendre polynomials in this rst coordinate
266 r_legendre(1, 1) = 1.0
267 r_legendre(1, 2) = rst(1,i)
268 s_legendre(1, 1) = 1.0
269 s_legendre(2, 1) = rst(2,i)
270 t_legendre(1, 1) = 1.0
271 t_legendre(2, 1) = rst(3,i)
272 dr_legendre(1, 1) = 0.0
273 dr_legendre(1, 2) = 1.0
274 ds_legendre(1, 1) = 0.0
275 ds_legendre(2, 1) = 1.0
276 dt_legendre(1, 1) = 0.0
277 dt_legendre(2, 1) = 1.0
278 do j = 2, lx-1
279 r_legendre(1, j+1) = ((2.0_xp*(j-1.0_xp)+1.0_xp) * rst(1,i) &
280 * r_legendre(1, j) - (j-1.0_xp) &
281 * r_legendre(1, j-1)) / (real(j,xp))
282 s_legendre(j+1, 1) = ((2.0_xp*(j-1.0_xp)+1.0_xp) * rst(2,i) &
283 * s_legendre(j, 1) - (j-1.0_xp) &
284 * s_legendre(j-1, 1))/(real(j,xp))
285 t_legendre(j+1, 1) = ((2.0_xp*(j-1.0_xp)+1.0_xp) * rst(3,i) &
286 * t_legendre(j, 1) - (j-1.0_xp) &
287 * t_legendre(j-1, 1))/(real(j,xp))
288 dr_legendre(1, j+1) = ((j-1.0_xp)+1.0_xp) * r_legendre(1, j) &
289 + rst(1,i)*dr_legendre(1, j)
290 ds_legendre(j+1, 1) = ((j-1.0_xp)+1.0_xp) * s_legendre(j, 1) &
291 + rst(2,i)*ds_legendre(j, 1)
292 dt_legendre(j+1, 1) = ((j-1.0_xp)+1.0_xp) * t_legendre(j, 1) &
293 + rst(3,i)*dt_legendre(j, 1)
294 end do
295 e = (el_list(i))*this%Xh%lxyz + 1
296 ! Compute the current xyz value
297 call tnsr3d_el_cpu(resx(i), 1, this%x_hat%x(e), lx, &
298 r_legendre, s_legendre, t_legendre)
299 call tnsr3d_el_cpu(resy(i), 1, this%y_hat%x(e), lx, &
300 r_legendre, s_legendre, t_legendre)
301 call tnsr3d_el_cpu(resz(i), 1, this%z_hat%x(e), lx, &
302 r_legendre, s_legendre, t_legendre)
303 ! This should in principle be merged into some larger kernel
304 ! Compute the jacobian
305 call tnsr3d_el_cpu(jac(1,1), 1, this%x_hat%x(e), lx, &
306 dr_legendre, s_legendre, t_legendre)
307 call tnsr3d_el_cpu(jac(1,2), 1, this%y_hat%x(e), lx, &
308 dr_legendre, s_legendre, t_legendre)
309 call tnsr3d_el_cpu(jac(1,3), 1, this%z_hat%x(e), lx, &
310 dr_legendre, s_legendre, t_legendre)
311 call tnsr3d_el_cpu(jac(2,1), 1, this%x_hat%x(e), lx, &
312 r_legendre, ds_legendre, t_legendre)
313 call tnsr3d_el_cpu(jac(2,2), 1, this%y_hat%x(e), lx, &
314 r_legendre, ds_legendre, t_legendre)
315 call tnsr3d_el_cpu(jac(2,3), 1, this%z_hat%x(e), lx, &
316 r_legendre, ds_legendre, t_legendre)
317 call tnsr3d_el_cpu(jac(3,1), 1, this%x_hat%x(e), lx, &
318 r_legendre, s_legendre, dt_legendre)
319 call tnsr3d_el_cpu(jac(3,2), 1, this%y_hat%x(e), lx, &
320 r_legendre, s_legendre, dt_legendre)
321 call tnsr3d_el_cpu(jac(3,3), 1, this%z_hat%x(e), lx, &
322 r_legendre, s_legendre, dt_legendre)
323 resx(i) = pt_x(i) - resx(i)
324 resy(i) = pt_y(i) - resy(i)
325 resz(i) = pt_z(i) - resz(i)
326 ! Jacobian inverse
327 jacinv = matinv39(jac(1,1), jac(1,2), jac(1,3),&
328 jac(2,1), jac(2,2), jac(2,3),&
329 jac(3,1), jac(3,2), jac(3,3))
330 ! Update direction
331 rst_d(1) = (resx(i)*jacinv(1,1) &
332 + jacinv(2,1)*resy(i) &
333 + jacinv(3,1)*resz(i))
334 rst_d(2) = (resx(i)*jacinv(1,2) &
335 + jacinv(2,2)*resy(i) &
336 + jacinv(3,2)*resz(i))
337 rst_d(3) = (resx(i)*jacinv(1,3) &
338 + jacinv(2,3)*resy(i) &
339 + jacinv(3,3)*resz(i))
340
341 conv_pts = 0
342 if (norm2(real(rst_d,xp)) .le. this%tol) then
343 conv_pts = 1
344 end if
345 if (norm2(real(rst_d,xp)) .gt. 4.0) then
346 conv_pts = 1
347 end if
348 ! Update rst coordinates
349 rst(1,i) = rst(1,i) + rst_d(1)
350 rst(2,i) = rst(2,i) + rst_d(2)
351 rst(3,i) = rst(3,i) + rst_d(3)
352
353 converged = conv_pts .eq. 1
354 if (iter .ge. this%max_iter) converged = .true.
355 end do
356 end do
357 end subroutine find_rst_legendre_cpu
358end module legendre_rst_finder
double real
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Copy data between host and device (or device and device)
Definition device.F90:71
subroutine, public device_find_rst_legendre(rst_d, pt_x_d, pt_y_d, pt_z_d, x_hat_d, y_hat_d, z_hat_d, resx_d, resy_d, resz_d, lx, el_ids_d, n_pts, tol, conv_pts_d)
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n, strm)
Compute multiplication sum .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
integer, parameter, public device_to_host
Definition device.F90:47
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:192
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine find_rst_legendre_device(this, rst, pt_x, pt_y, pt_z, el_list, n_pts, resx, resy, resz)
Using the Legendre polynomials to find the rst coordinates on GPU.
subroutine legendre_rst_finder_init(this, x, y, z, nelv, xh, tol, max_iter)
subroutine legendre_rst_finder_find(this, rst_local_cand, x_t, y_t, z_t, el_cands, n_point_cand, resx, resy, resz)
Given a set of element candidates containing the given points and computes the local RST coordinates ...
subroutine find_rst_legendre_cpu(this, rst, pt_x, pt_y, pt_z, el_list, n_pts, resx, resy, resz)
Using the Legendre polynomials to find the rst coordinates.
subroutine legendre_rst_finder_free(this)
Definition math.f90:60
real(rp) function, dimension(3, 3), public matinv39(a11, a12, a13, a21, a22, a23, a31, a32, a33)
Definition math.f90:1486
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:69
Defines a matrix.
Definition matrix.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346
Defines a vector.
Definition vector.f90:34
Type to compute local element (rst) coordinates for a gives set points in physical (xyz) space on a S...
The function space for the SEM solution fields.
Definition space.f90:63