Neko 1.99.1
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
legendre_rst_finder.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2023, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
33
35 use num_types, only: rp, dp, xp, i8
38 use space, only: space_t
39 use utils, only: neko_error, neko_warning
40 use vector, only: vector_t
41 use matrix, only: matrix_t
42 use tensor, only: tnsr3d
43 use math, only: neko_eps, matinv39
46 use, intrinsic :: iso_c_binding, only: c_ptr, c_null_ptr, &
47 c_sizeof, c_loc, c_bool
50 use device_math, only: device_vlsc3
51 implicit none
52 private
53
56 type, public :: legendre_rst_finder_t
57 type(vector_t) :: x_hat, y_hat, z_hat
58 type(space_t), pointer :: xh => null()
59 integer :: nelv = 0
60 real(kind=rp) :: tol = neko_eps
61 integer :: max_iter = 10
62 contains
63 procedure, pass(this) :: init => legendre_rst_finder_init
64 procedure, pass(this) :: free => legendre_rst_finder_free
65 procedure, pass(this) :: find => legendre_rst_finder_find
67
68contains
69
70 subroutine legendre_rst_finder_init(this, x, y, z, nelv, Xh, tol, max_iter)
71 class(legendre_rst_finder_t), intent(inout) :: this
72 type(space_t), target, intent(in) :: Xh
73 integer, intent(in) :: nelv
74 real(kind=rp), intent(in), dimension(nelv*Xh%lxyz) :: x, y, z
75 real(kind=rp), intent(in), optional :: tol
76 integer, intent(in), optional :: max_iter
77
78 call this%free()
79
80 if (present(tol)) then
81 if (tol < neko_eps) then
82 call neko_error('Tolerance for the Legendre finder is too small')
83 end if
84 this%tol = tol
85
86 else
87 this%tol = neko_eps
88 end if
89 if (present(max_iter)) then
90 if (max_iter < 1) then
91 call neko_error('Max iterations for the Legendre finder is too small')
92 end if
93 this%max_iter = max_iter
94 else
95 this%max_iter = 10
96 end if
97
98 this%Xh => xh
99 this%nelv = nelv
100
101 call this%x_hat%init(nelv*xh%lxyz)
102 call this%y_hat%init(nelv*xh%lxyz)
103 call this%z_hat%init(nelv*xh%lxyz)
104
105 call tnsr3d_cpu(this%x_hat%x, xh%lx, x, &
106 xh%lx, xh%vinv, &
107 xh%vinvt, xh%vinvt, nelv)
108 call tnsr3d_cpu(this%y_hat%x, xh%lx, y, &
109 xh%lx, xh%vinv, &
110 xh%vinvt, xh%vinvt, nelv)
111 call tnsr3d_cpu(this%z_hat%x, xh%lx, z, &
112 xh%lx, xh%vinv, &
113 xh%vinvt, xh%vinvt, nelv)
114
116 call this%x_hat%copy_from(host_to_device,.false.)
117 call this%y_hat%copy_from(host_to_device,.false.)
118 call this%z_hat%copy_from(host_to_device,.false.)
119
120 end subroutine legendre_rst_finder_init
121
123 class(legendre_rst_finder_t), intent(inout) :: this
124
125 call this%x_hat%free()
126 call this%y_hat%free()
127 call this%z_hat%free()
128
129 this%Xh => null()
130
131 end subroutine legendre_rst_finder_free
132
148 subroutine legendre_rst_finder_find(this, rst_local_cand, x_t, y_t, z_t, &
149 el_cands, n_point_cand, resx, resy, resz)
150 class(legendre_rst_finder_t), intent(inout) :: this
151 type(matrix_t), intent(inout) :: rst_local_cand
152 type(vector_t), intent(in) :: x_t, y_t, z_t
153 integer, intent(in) :: n_point_cand
154 integer, intent(inout) :: el_cands(:)
155 type(vector_t), intent(inout) :: resx, resy, resz
156 type(c_ptr) :: el_cands_d = c_null_ptr
157
159 if (n_point_cand .lt. 1) return
160
161 rst_local_cand = 0.0_rp
162
163 if (neko_bcknd_device .eq. 1) then
164 el_cands_d = device_get_ptr(el_cands)
165 call find_rst_legendre_device(this, rst_local_cand%x_d, &
166 x_t%x_d, y_t%x_d, z_t%x_d, &
167 el_cands_d, n_point_cand, &
168 resx%x_d, resy%x_d, resz%x_d)
169
170 else
171 call find_rst_legendre_cpu(this, rst_local_cand%x, x_t%x, y_t%x, z_t%x, &
172 el_cands, n_point_cand, &
173 resx%x, resy%x, resz%x)
174
175 end if
176
177 end subroutine legendre_rst_finder_find
178
185 subroutine find_rst_legendre_device(this, rst, pt_x, pt_y, pt_z, &
186 el_list, n_pts, resx, resy, resz)
187 type(legendre_rst_finder_t), intent(inout) :: this
188 type(c_ptr), intent(inout) :: rst
189 type(c_ptr), intent(in) :: pt_x, pt_y, pt_z
190 type(c_ptr), intent(in) :: el_list
191 type(c_ptr), intent(inout) :: resx, resy, resz
192 integer, intent(in) :: n_pts
193 type(vector_t) :: conv_pts
194 integer :: i, iter
195 logical :: converged
196 real(kind=rp) :: conv_sum
197
198 if (n_pts .eq. 0) return
199
200 call conv_pts%init(n_pts)
201
202 conv_pts = 1.0_rp
203
204 iter = 0
205 converged = .false.
206 !Iterate until found, not heavily optimized
207 do while (.not. converged)
208 call device_find_rst_legendre(rst, pt_x, pt_y, pt_z, &
209 this%x_hat%x_d, this%y_hat%x_d, this%z_hat%x_d, &
210 resx, resy, resz, &
211 this%Xh%lx,el_list, n_pts, this%tol, &
212 conv_pts%x_d)
213 !This can be made more approriate... avoid memcpy at least
214 conv_sum = device_vlsc3(conv_pts%x_d,conv_pts%x_d,conv_pts%x_d,n_pts)
215 converged = conv_sum .lt. 0.5
216 print *, conv_sum
217 if( iter .ge. this%max_iter) converged = .true.
218 end do
219
220 call conv_pts%free()
221 end subroutine find_rst_legendre_device
222
229 subroutine find_rst_legendre_cpu(this, rst, pt_x, pt_y, pt_z, &
230 el_list, n_pts, resx, resy, resz)
231 type(legendre_rst_finder_t), intent(inout) :: this
232 integer, intent(in) :: n_pts
233 real(kind=rp), intent(inout) :: rst(3, n_pts)
234 real(kind=rp), intent(in) :: pt_x(n_pts)
235 real(kind=rp), intent(in) :: pt_y(n_pts)
236 real(kind=rp), intent(in) :: pt_z(n_pts)
237 real(kind=rp), intent(inout) :: resx(n_pts)
238 real(kind=rp), intent(inout) :: resy(n_pts)
239 real(kind=rp), intent(inout) :: resz(n_pts)
240 integer, intent(in) :: el_list(n_pts)
241 real(kind=rp) :: r_legendre(this%Xh%lx)
242 real(kind=rp) :: s_legendre(this%Xh%lx)
243 real(kind=rp) :: t_legendre(this%Xh%lx)
244 real(kind=rp) :: dr_legendre(this%Xh%lx)
245 real(kind=rp) :: ds_legendre(this%Xh%lx)
246 real(kind=rp) :: dt_legendre(this%Xh%lx)
247 real(kind=rp) :: jac(3,3)
248 real(kind=xp) :: tmp(this%Xh%lx), tmp2(this%Xh%lx)
249 real(kind=xp) :: rst_d(3), jacinv(3,3)
250 integer :: conv_pts
251 logical :: converged
252 integer :: i, j, e, iter, lx, lx2
253
254
255
256 lx = this%Xh%lx
257 if (n_pts .lt. 1) return
258
259 rst = 0.0_rp
260 ! If performance critical we should do multiple points at the time
261 ! Currently we do one point at the time
262 do i = 1, n_pts
263 iter = 0
264 converged = .false.
265 do while (.not. converged)
266 iter = iter + 1
267 ! Compute legendre polynomials in this rst coordinate
268 r_legendre(1) = 1.0
269 r_legendre(2) = rst(1,i)
270 s_legendre(1) = 1.0
271 s_legendre(2) = rst(2,i)
272 t_legendre(1) = 1.0
273 t_legendre(2) = rst(3,i)
274 dr_legendre(1) = 0.0
275 dr_legendre(2) = 1.0
276 ds_legendre(1) = 0.0
277 ds_legendre(2) = 1.0
278 dt_legendre(1) = 0.0
279 dt_legendre(2) = 1.0
280 do j = 2, lx-1
281 r_legendre(j+1) = ((2.0_xp*(j-1.0_xp)+1.0_xp) * rst(1,i) &
282 * r_legendre(j) - (j-1.0_xp) &
283 * r_legendre(j-1)) / (real(j,xp))
284 s_legendre(j+1) = ((2.0_xp*(j-1.0_xp)+1.0_xp) * rst(2,i) &
285 * s_legendre(j) - (j-1.0_xp) &
286 * s_legendre(j-1))/(real(j,xp))
287 t_legendre(j+1) = ((2.0_xp*(j-1.0_xp)+1.0_xp) * rst(3,i) &
288 * t_legendre(j) - (j-1.0_xp) &
289 * t_legendre(j-1))/(real(j,xp))
290 dr_legendre(j+1) = ((j-1.0_xp)+1.0_xp) * r_legendre(j) &
291 + rst(1,i)*dr_legendre(j)
292 ds_legendre(j+1) = ((j-1.0_xp)+1.0_xp) * s_legendre(j) &
293 + rst(2,i)*ds_legendre(j)
294 dt_legendre(j+1) = ((j-1.0_xp)+1.0_xp) * t_legendre(j) &
295 + rst(3,i)*dt_legendre(j)
296 end do
297 e = (el_list(i))*this%Xh%lxyz + 1
298 ! Compute the current xyz value
299 call tnsr3d_el_cpu(resx(i), 1, this%x_hat%x(e), lx, &
300 r_legendre, s_legendre, t_legendre)
301 call tnsr3d_el_cpu(resy(i), 1, this%y_hat%x(e), lx, &
302 r_legendre, s_legendre, t_legendre)
303 call tnsr3d_el_cpu(resz(i), 1, this%z_hat%x(e), lx, &
304 r_legendre, s_legendre, t_legendre)
305 ! This should in principle be merged into some larger kernel
306 ! Compute the jacobian
307 call tnsr3d_el_cpu(jac(1,1), 1, this%x_hat%x(e), lx, &
308 dr_legendre, s_legendre(1), t_legendre)
309 call tnsr3d_el_cpu(jac(1,2), 1, this%y_hat%x(e), lx, &
310 dr_legendre, s_legendre(1), t_legendre)
311 call tnsr3d_el_cpu(jac(1,3), 1, this%z_hat%x(e), lx, &
312 dr_legendre, s_legendre, t_legendre)
313 call tnsr3d_el_cpu(jac(2,1), 1, this%x_hat%x(e), lx, &
314 r_legendre, ds_legendre, t_legendre)
315 call tnsr3d_el_cpu(jac(2,2), 1, this%y_hat%x(e), lx, &
316 r_legendre, ds_legendre, t_legendre)
317 call tnsr3d_el_cpu(jac(2,3), 1, this%z_hat%x(e), lx, &
318 r_legendre, ds_legendre, t_legendre)
319 call tnsr3d_el_cpu(jac(3,1), 1, this%x_hat%x(e), lx, &
320 r_legendre, s_legendre, dt_legendre)
321 call tnsr3d_el_cpu(jac(3,2), 1, this%y_hat%x(e), lx, &
322 r_legendre, s_legendre, dt_legendre)
323 call tnsr3d_el_cpu(jac(3,3), 1, this%z_hat%x(e), lx, &
324 r_legendre, s_legendre, dt_legendre)
325 resx(i) = pt_x(i) - resx(i)
326 resy(i) = pt_y(i) - resy(i)
327 resz(i) = pt_z(i) - resz(i)
328 ! Jacobian inverse
329 jacinv = matinv39(jac(1,1), jac(1,2), jac(1,3),&
330 jac(2,1), jac(2,2), jac(2,3),&
331 jac(3,1), jac(3,2), jac(3,3))
332 ! Update direction
333 rst_d(1) = (resx(i)*jacinv(1,1) &
334 + jacinv(2,1)*resy(i) &
335 + jacinv(3,1)*resz(i))
336 rst_d(2) = (resx(i)*jacinv(1,2) &
337 + jacinv(2,2)*resy(i) &
338 + jacinv(3,2)*resz(i))
339 rst_d(3) = (resx(i)*jacinv(1,3) &
340 + jacinv(2,3)*resy(i) &
341 + jacinv(3,3)*resz(i))
342
343 conv_pts = 0
344 if (norm2(real(rst_d,xp)) .le. this%tol) then
345 conv_pts = 1
346 end if
347 if (norm2(real(rst_d,xp)) .gt. 4.0) then
348 conv_pts = 1
349 end if
350 ! Update rst coordinates
351 rst(1,i) = rst(1,i) + rst_d(1)
352 rst(2,i) = rst(2,i) + rst_d(2)
353 rst(3,i) = rst(3,i) + rst_d(3)
354
355 converged = conv_pts .eq. 1
356 if (iter .ge. this%max_iter) converged = .true.
357 end do
358 end do
359 end subroutine find_rst_legendre_cpu
360end module legendre_rst_finder
double real
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Copy data between host and device (or device and device)
Definition device.F90:71
subroutine, public device_find_rst_legendre(rst_d, pt_x_d, pt_y_d, pt_z_d, x_hat_d, y_hat_d, z_hat_d, resx_d, resy_d, resz_d, lx, el_ids_d, n_pts, tol, conv_pts_d)
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n, strm)
Compute multiplication sum .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
integer, parameter, public device_to_host
Definition device.F90:47
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:192
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine find_rst_legendre_device(this, rst, pt_x, pt_y, pt_z, el_list, n_pts, resx, resy, resz)
Using the Legendre polynomials to find the rst coordinates on GPU.
subroutine legendre_rst_finder_init(this, x, y, z, nelv, xh, tol, max_iter)
subroutine legendre_rst_finder_find(this, rst_local_cand, x_t, y_t, z_t, el_cands, n_point_cand, resx, resy, resz)
Given a set of element candidates containing the given points and computes the local RST coordinates ...
subroutine find_rst_legendre_cpu(this, rst, pt_x, pt_y, pt_z, el_list, n_pts, resx, resy, resz)
Using the Legendre polynomials to find the rst coordinates.
subroutine legendre_rst_finder_free(this)
Definition math.f90:60
real(rp) function, dimension(3, 3), public matinv39(a11, a12, a13, a21, a22, a23, a31, a32, a33)
Definition math.f90:1486
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:69
Defines a matrix.
Definition matrix.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
Tensor operations.
Definition tensor.f90:61
subroutine, public tnsr3d(v, nv, u, nu, a, bt, ct, nelv)
Tensor product performed on nelv elements.
Definition tensor.f90:234
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346
Defines a vector.
Definition vector.f90:34
Type to compute local element (rst) coordinates for a gives set points in physical (xyz) space on a S...
The function space for the SEM solution fields.
Definition space.f90:63