Neko 1.0.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
legendre_rst_finder.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2023, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
33
35 use num_types, only: rp, dp, xp, i8
38 use space, only: space_t
39 use utils, only: neko_error, neko_warning
40 use vector, only: vector_t
41 use matrix, only: matrix_t
42 use tensor, only: tnsr3d
43 use math, only: neko_eps, matinv39
46 use, intrinsic :: iso_c_binding, only: c_ptr, c_null_ptr, &
47 c_sizeof, c_loc, c_bool
50 use device_math, only: device_vlsc3
51 implicit none
52 private
53
56 type, public :: legendre_rst_finder_t
57 type(vector_t) :: x_hat, y_hat, z_hat
58 type(space_t), pointer :: xh => null()
59 integer :: nelv = 0
60 real(kind=rp) :: tol = neko_eps
61 integer :: max_iter = 10
62 contains
63 procedure, pass(this) :: init => legendre_rst_finder_init
64 procedure, pass(this) :: free => legendre_rst_finder_free
65 procedure, pass(this) :: find => legendre_rst_finder_find
67
68contains
69
70 subroutine legendre_rst_finder_init(this, x, y, z, nelv, Xh, tol, max_iter)
71 class(legendre_rst_finder_t), intent(inout) :: this
72 type(space_t), target, intent(in) :: Xh
73 integer, intent(in) :: nelv
74 real(kind=rp), intent(in), dimension(nelv*Xh%lxyz) :: x, y, z
75 real(kind=rp), intent(in), optional :: tol
76 integer, intent(in), optional :: max_iter
77
78 call this%free()
79
80 if (present(tol)) then
81 if (tol < neko_eps) then
82 call neko_error('Tolerance for the Legendre finder is too small')
83 end if
84 this%tol = tol
85
86 else
87 this%tol = neko_eps
88 end if
89 if (present(max_iter)) then
90 if (max_iter < 1) then
91 call neko_error('Max iterations for the Legendre finder is too small')
92 end if
93 this%max_iter = max_iter
94 else
95 this%max_iter = 10
96 end if
97
98 this%Xh => xh
99 this%nelv = nelv
100
101 call this%x_hat%init(nelv*xh%lxyz)
102 call this%y_hat%init(nelv*xh%lxyz)
103 call this%z_hat%init(nelv*xh%lxyz)
104
105 call tnsr3d_cpu(this%x_hat%x, xh%lx, x, &
106 xh%lx, xh%vinv, &
107 xh%vinvt, xh%vinvt, nelv)
108 call tnsr3d_cpu(this%y_hat%x, xh%lx, y, &
109 xh%lx, xh%vinv, &
110 xh%vinvt, xh%vinvt, nelv)
111 call tnsr3d_cpu(this%z_hat%x, xh%lx, z, &
112 xh%lx, xh%vinv, &
113 xh%vinvt, xh%vinvt, nelv)
114
116 call this%x_hat%copy_from(host_to_device,.false.)
117 call this%y_hat%copy_from(host_to_device,.false.)
118 call this%z_hat%copy_from(host_to_device,.false.)
119
120 end subroutine legendre_rst_finder_init
121
123 class(legendre_rst_finder_t), intent(inout) :: this
124
125 call this%x_hat%free()
126 call this%y_hat%free()
127 call this%z_hat%free()
128
129 this%Xh => null()
130
131 end subroutine legendre_rst_finder_free
132
148 subroutine legendre_rst_finder_find(this, rst_local_cand, x_t, y_t, z_t, &
149 el_cands, n_point_cand, resx, resy, resz)
150 class(legendre_rst_finder_t), intent(inout) :: this
151 type(matrix_t), intent(inout) :: rst_local_cand
152 type(vector_t), intent(in) :: x_t, y_t, z_t
153 integer, intent(in) :: n_point_cand
154 integer, intent(inout) :: el_cands(:)
155 type(vector_t), intent(inout) :: resx, resy, resz
156 type(c_ptr) :: el_cands_d = c_null_ptr
157
159 if (n_point_cand .lt. 1) return
160
161 rst_local_cand = 0.0_rp
162
163 if (neko_bcknd_device .eq. 1) then
164 el_cands_d = device_get_ptr(el_cands)
165 call find_rst_legendre_device(this, rst_local_cand%x_d, &
166 x_t%x_d, y_t%x_d, z_t%x_d, &
167 el_cands_d, n_point_cand, &
168 resx%x_d, resy%x_d, resz%x_d)
169
170 else
171 call find_rst_legendre_cpu(this, rst_local_cand%x, x_t%x, y_t%x, z_t%x, &
172 el_cands, n_point_cand, &
173 resx%x, resy%x, resz%x)
174
175 end if
176
177 end subroutine legendre_rst_finder_find
178
185 subroutine find_rst_legendre_device(this, rst, pt_x, pt_y, pt_z, &
186 el_list, n_pts, resx, resy, resz)
187 type(legendre_rst_finder_t), intent(inout) :: this
188 type(c_ptr), intent(inout) :: rst
189 type(c_ptr), intent(in) :: pt_x, pt_y, pt_z
190 type(c_ptr), intent(in) :: el_list
191 type(c_ptr), intent(inout) :: resx, resy, resz
192 integer, intent(in) :: n_pts
193 type(vector_t) :: conv_pts
194 integer :: i, iter
195 logical :: converged
196 real(kind=rp) :: conv_sum
197
198 if (n_pts .eq. 0) return
199
200 call conv_pts%init(n_pts)
201
202 conv_pts = 1.0_rp
203
204 iter = 0
205 converged = .false.
206 !Iterate until found, not heavily optimized
207 do while (.not. converged)
208 iter = iter + 1
209 call device_find_rst_legendre(rst, pt_x, pt_y, pt_z, &
210 this%x_hat%x_d, this%y_hat%x_d, this%z_hat%x_d, &
211 resx, resy, resz, &
212 this%Xh%lx,el_list, n_pts, this%tol, &
213 conv_pts%x_d)
214 !This can be made more approriate... avoid memcpy at least
215 conv_sum = device_vlsc3(conv_pts%x_d,conv_pts%x_d,conv_pts%x_d,n_pts)
216 converged = conv_sum .lt. 0.5
217 !print *, conv_sum
218 if( iter .ge. this%max_iter) converged = .true.
219 end do
220
221 call conv_pts%free()
222 end subroutine find_rst_legendre_device
223
230 subroutine find_rst_legendre_cpu(this, rst, pt_x, pt_y, pt_z, &
231 el_list, n_pts, resx, resy, resz)
232 type(legendre_rst_finder_t), intent(inout) :: this
233 integer, intent(in) :: n_pts
234 real(kind=rp), intent(inout) :: rst(3, n_pts)
235 real(kind=rp), intent(in) :: pt_x(n_pts)
236 real(kind=rp), intent(in) :: pt_y(n_pts)
237 real(kind=rp), intent(in) :: pt_z(n_pts)
238 real(kind=rp), intent(inout) :: resx(n_pts)
239 real(kind=rp), intent(inout) :: resy(n_pts)
240 real(kind=rp), intent(inout) :: resz(n_pts)
241 integer, intent(in) :: el_list(n_pts)
242 real(kind=rp) :: r_legendre(this%Xh%lx)
243 real(kind=rp) :: s_legendre(this%Xh%lx)
244 real(kind=rp) :: t_legendre(this%Xh%lx)
245 real(kind=rp) :: dr_legendre(this%Xh%lx)
246 real(kind=rp) :: ds_legendre(this%Xh%lx)
247 real(kind=rp) :: dt_legendre(this%Xh%lx)
248 real(kind=rp) :: jac(3,3)
249 real(kind=xp) :: tmp(this%Xh%lx), tmp2(this%Xh%lx)
250 real(kind=xp) :: rst_d(3), jacinv(3,3)
251 integer :: conv_pts
252 logical :: converged
253 integer :: i, j, e, iter, lx, lx2
254
255
256
257 lx = this%Xh%lx
258 if (n_pts .lt. 1) return
259
260 rst = 0.0_rp
261 ! If performance critical we should do multiple points at the time
262 ! Currently we do one point at the time
263 do i = 1, n_pts
264 iter = 0
265 converged = .false.
266 do while (.not. converged)
267 iter = iter + 1
268 ! Compute legendre polynomials in this rst coordinate
269 r_legendre(1) = 1.0
270 r_legendre(2) = rst(1,i)
271 s_legendre(1) = 1.0
272 s_legendre(2) = rst(2,i)
273 t_legendre(1) = 1.0
274 t_legendre(2) = rst(3,i)
275 dr_legendre(1) = 0.0
276 dr_legendre(2) = 1.0
277 ds_legendre(1) = 0.0
278 ds_legendre(2) = 1.0
279 dt_legendre(1) = 0.0
280 dt_legendre(2) = 1.0
281 do j = 2, lx-1
282 r_legendre(j+1) = ((2.0_xp*(j-1.0_xp)+1.0_xp) * rst(1,i) &
283 * r_legendre(j) - (j-1.0_xp) &
284 * r_legendre(j-1)) / (real(j,xp))
285 s_legendre(j+1) = ((2.0_xp*(j-1.0_xp)+1.0_xp) * rst(2,i) &
286 * s_legendre(j) - (j-1.0_xp) &
287 * s_legendre(j-1))/(real(j,xp))
288 t_legendre(j+1) = ((2.0_xp*(j-1.0_xp)+1.0_xp) * rst(3,i) &
289 * t_legendre(j) - (j-1.0_xp) &
290 * t_legendre(j-1))/(real(j,xp))
291 dr_legendre(j+1) = ((j-1.0_xp)+1.0_xp) * r_legendre(j) &
292 + rst(1,i)*dr_legendre(j)
293 ds_legendre(j+1) = ((j-1.0_xp)+1.0_xp) * s_legendre(j) &
294 + rst(2,i)*ds_legendre(j)
295 dt_legendre(j+1) = ((j-1.0_xp)+1.0_xp) * t_legendre(j) &
296 + rst(3,i)*dt_legendre(j)
297 end do
298 e = (el_list(i))*this%Xh%lxyz + 1
299 ! Compute the current xyz value
300 call tnsr3d_el_cpu(resx(i), 1, this%x_hat%x(e), lx, &
301 r_legendre, s_legendre, t_legendre)
302 call tnsr3d_el_cpu(resy(i), 1, this%y_hat%x(e), lx, &
303 r_legendre, s_legendre, t_legendre)
304 call tnsr3d_el_cpu(resz(i), 1, this%z_hat%x(e), lx, &
305 r_legendre, s_legendre, t_legendre)
306 ! This should in principle be merged into some larger kernel
307 ! Compute the jacobian
308 call tnsr3d_el_cpu(jac(1,1), 1, this%x_hat%x(e), lx, &
309 dr_legendre, s_legendre(1), t_legendre)
310 call tnsr3d_el_cpu(jac(1,2), 1, this%y_hat%x(e), lx, &
311 dr_legendre, s_legendre(1), t_legendre)
312 call tnsr3d_el_cpu(jac(1,3), 1, this%z_hat%x(e), lx, &
313 dr_legendre, s_legendre, t_legendre)
314 call tnsr3d_el_cpu(jac(2,1), 1, this%x_hat%x(e), lx, &
315 r_legendre, ds_legendre, t_legendre)
316 call tnsr3d_el_cpu(jac(2,2), 1, this%y_hat%x(e), lx, &
317 r_legendre, ds_legendre, t_legendre)
318 call tnsr3d_el_cpu(jac(2,3), 1, this%z_hat%x(e), lx, &
319 r_legendre, ds_legendre, t_legendre)
320 call tnsr3d_el_cpu(jac(3,1), 1, this%x_hat%x(e), lx, &
321 r_legendre, s_legendre, dt_legendre)
322 call tnsr3d_el_cpu(jac(3,2), 1, this%y_hat%x(e), lx, &
323 r_legendre, s_legendre, dt_legendre)
324 call tnsr3d_el_cpu(jac(3,3), 1, this%z_hat%x(e), lx, &
325 r_legendre, s_legendre, dt_legendre)
326 resx(i) = pt_x(i) - resx(i)
327 resy(i) = pt_y(i) - resy(i)
328 resz(i) = pt_z(i) - resz(i)
329 ! Jacobian inverse
330 jacinv = matinv39(jac(1,1), jac(1,2), jac(1,3),&
331 jac(2,1), jac(2,2), jac(2,3),&
332 jac(3,1), jac(3,2), jac(3,3))
333 ! Update direction
334 rst_d(1) = (resx(i)*jacinv(1,1) &
335 + jacinv(2,1)*resy(i) &
336 + jacinv(3,1)*resz(i))
337 rst_d(2) = (resx(i)*jacinv(1,2) &
338 + jacinv(2,2)*resy(i) &
339 + jacinv(3,2)*resz(i))
340 rst_d(3) = (resx(i)*jacinv(1,3) &
341 + jacinv(2,3)*resy(i) &
342 + jacinv(3,3)*resz(i))
343
344 conv_pts = 0
345 if (norm2(real(rst_d,xp)) .le. this%tol) then
346 conv_pts = 1
347 end if
348 if (norm2(real(rst_d,xp)) .gt. 4.0) then
349 conv_pts = 1
350 end if
351 ! Update rst coordinates
352 rst(1,i) = rst(1,i) + rst_d(1)
353 rst(2,i) = rst(2,i) + rst_d(2)
354 rst(3,i) = rst(3,i) + rst_d(3)
355
356 converged = conv_pts .eq. 1
357 if (iter .ge. this%max_iter) converged = .true.
358 end do
359 end do
360 end subroutine find_rst_legendre_cpu
361end module legendre_rst_finder
double real
Return the device pointer for an associated Fortran array.
Definition device.F90:101
Copy data between host and device (or device and device)
Definition device.F90:71
subroutine, public device_find_rst_legendre(rst_d, pt_x_d, pt_y_d, pt_z_d, x_hat_d, y_hat_d, z_hat_d, resx_d, resy_d, resz_d, lx, el_ids_d, n_pts, tol, conv_pts_d)
real(kind=rp) function, public device_vlsc3(u_d, v_d, w_d, n, strm)
Compute multiplication sum .
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
integer, parameter, public device_to_host
Definition device.F90:47
subroutine, public device_alloc(x_d, s)
Allocate memory on the device.
Definition device.F90:192
type(c_ptr), bind(C), public glb_cmd_queue
Global command queue.
Definition device.F90:51
subroutine find_rst_legendre_device(this, rst, pt_x, pt_y, pt_z, el_list, n_pts, resx, resy, resz)
Using the Legendre polynomials to find the rst coordinates on GPU.
subroutine legendre_rst_finder_init(this, x, y, z, nelv, xh, tol, max_iter)
subroutine legendre_rst_finder_find(this, rst_local_cand, x_t, y_t, z_t, el_cands, n_point_cand, resx, resy, resz)
Given a set of element candidates containing the given points and computes the local RST coordinates ...
subroutine find_rst_legendre_cpu(this, rst, pt_x, pt_y, pt_z, el_list, n_pts, resx, resy, resz)
Using the Legendre polynomials to find the rst coordinates.
subroutine legendre_rst_finder_free(this)
Definition math.f90:60
real(rp) function, dimension(3, 3), public matinv39(a11, a12, a13, a21, a22, a23, a31, a32, a33)
Definition math.f90:1486
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:69
Defines a matrix.
Definition matrix.f90:34
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public i8
Definition num_types.f90:7
integer, parameter, public xp
Definition num_types.f90:14
integer, parameter, public dp
Definition num_types.f90:9
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
subroutine, public tnsr3d_cpu(v, nv, u, nu, a, bt, ct, nelv)
subroutine, public tnsr3d_el_cpu(v, nv, u, nu, a, bt, ct)
Tensor operations.
Definition tensor.f90:61
subroutine, public tnsr3d(v, nv, u, nu, a, bt, ct, nelv)
Tensor product performed on nelv elements.
Definition tensor.f90:234
Utilities.
Definition utils.f90:35
subroutine, public neko_warning(warning_msg)
Reports a warning to standard output.
Definition utils.f90:346
Defines a vector.
Definition vector.f90:34
Type to compute local element (rst) coordinates for a gives set points in physical (xyz) space on a S...
The function space for the SEM solution fields.
Definition space.f90:63