Neko  0.8.99
A portable framework for high-order spectral element flow simulations
coef.f90
Go to the documentation of this file.
1 ! Copyright (c) 2020-2024, The Neko Authors
2 ! All rights reserved.
3 !
4 ! Redistribution and use in source and binary forms, with or without
5 ! modification, are permitted provided that the following conditions
6 ! are met:
7 !
8 ! * Redistributions of source code must retain the above copyright
9 ! notice, this list of conditions and the following disclaimer.
10 !
11 ! * Redistributions in binary form must reproduce the above
12 ! copyright notice, this list of conditions and the following
13 ! disclaimer in the documentation and/or other materials provided
14 ! with the distribution.
15 !
16 ! * Neither the name of the authors nor the names of its
17 ! contributors may be used to endorse or promote products derived
18 ! from this software without specific prior written permission.
19 !
20 ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 ! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 ! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 ! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 ! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 ! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 ! POSSIBILITY OF SUCH DAMAGE.
32 !
34 module coefs
35  use gather_scatter, only : gs_t
36  use gs_ops, only : gs_op_add
37  use neko_config, only : neko_bcknd_device
38  use num_types, only : rp
39  use dofmap, only : dofmap_t
40  use space, only: space_t
41  use math, only : rone, invcol1, addcol3, subcol3, copy, &
43  use mesh, only : mesh_t
47  use mxm_wrapper, only : mxm
48  use device
49  use, intrinsic :: iso_c_binding
50  implicit none
51  private
52 
55  type, public :: coef_t
57  real(kind=rp), allocatable :: g11(:,:,:,:)
59  real(kind=rp), allocatable :: g22(:,:,:,:)
61  real(kind=rp), allocatable :: g33(:,:,:,:)
63  real(kind=rp), allocatable :: g12(:,:,:,:)
65  real(kind=rp), allocatable :: g13(:,:,:,:)
67  real(kind=rp), allocatable :: g23(:,:,:,:)
68 
69  real(kind=rp), allocatable :: mult(:,:,:,:)
74  real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
75  real(kind=rp), allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
76  real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
80  real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
81  real(kind=rp), allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
82  real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
83 
84  real(kind=rp), allocatable :: h1(:,:,:,:)
85  real(kind=rp), allocatable :: h2(:,:,:,:)
86  logical :: ifh2
87 
88  real(kind=rp), allocatable :: jac(:,:,:,:)
89  real(kind=rp), allocatable :: jacinv(:,:,:,:)
90  real(kind=rp), allocatable :: b(:,:,:,:)
91  real(kind=rp), allocatable :: binv(:,:,:,:)
92 
93  real(kind=rp), allocatable :: area(:,:,:,:)
94  real(kind=rp), allocatable :: nx(:,:,:,:)
95  real(kind=rp), allocatable :: ny(:,:,:,:)
96  real(kind=rp), allocatable :: nz(:,:,:,:)
98 
99  real(kind=rp) :: volume
100 
101  type(space_t), pointer :: xh => null()
102  type(mesh_t), pointer :: msh => null()
103  type(dofmap_t), pointer :: dof => null()
104  type(gs_t), pointer :: gs_h=> null()
105 
106  !
107  ! Device pointers (if present)
108  !
109 
110  type(c_ptr) :: g11_d = c_null_ptr
111  type(c_ptr) :: g22_d = c_null_ptr
112  type(c_ptr) :: g33_d = c_null_ptr
113  type(c_ptr) :: g12_d = c_null_ptr
114  type(c_ptr) :: g13_d = c_null_ptr
115  type(c_ptr) :: g23_d = c_null_ptr
116  type(c_ptr) :: dxdr_d = c_null_ptr
117  type(c_ptr) :: dydr_d = c_null_ptr
118  type(c_ptr) :: dzdr_d = c_null_ptr
119  type(c_ptr) :: dxds_d = c_null_ptr
120  type(c_ptr) :: dyds_d = c_null_ptr
121  type(c_ptr) :: dzds_d = c_null_ptr
122  type(c_ptr) :: dxdt_d = c_null_ptr
123  type(c_ptr) :: dydt_d = c_null_ptr
124  type(c_ptr) :: dzdt_d = c_null_ptr
125  type(c_ptr) :: drdx_d = c_null_ptr
126  type(c_ptr) :: drdy_d = c_null_ptr
127  type(c_ptr) :: drdz_d = c_null_ptr
128  type(c_ptr) :: dsdx_d = c_null_ptr
129  type(c_ptr) :: dsdy_d = c_null_ptr
130  type(c_ptr) :: dsdz_d = c_null_ptr
131  type(c_ptr) :: dtdx_d = c_null_ptr
132  type(c_ptr) :: dtdy_d = c_null_ptr
133  type(c_ptr) :: dtdz_d = c_null_ptr
134  type(c_ptr) :: mult_d = c_null_ptr
135  type(c_ptr) :: h1_d = c_null_ptr
136  type(c_ptr) :: h2_d = c_null_ptr
137  type(c_ptr) :: jac_d = c_null_ptr
138  type(c_ptr) :: jacinv_d = c_null_ptr
139  type(c_ptr) :: b_d = c_null_ptr
140  type(c_ptr) :: binv_d = c_null_ptr
141  type(c_ptr) :: area_d = c_null_ptr
142  type(c_ptr) :: nx_d = c_null_ptr
143  type(c_ptr) :: ny_d = c_null_ptr
144  type(c_ptr) :: nz_d = c_null_ptr
145 
146 
147  contains
148  procedure, private, pass(this) :: init_empty => coef_init_empty
149  procedure, private, pass(this) :: init_all => coef_init_all
150  procedure, pass(this) :: free => coef_free
151  procedure, pass(this) :: get_normal => coef_get_normal
152  procedure, pass(this) :: get_area => coef_get_area
153  generic :: init => init_empty, init_all
154  end type coef_t
155 
156 contains
157 
159  subroutine coef_init_empty(this, Xh, msh)
160  class(coef_t), intent(inout) :: this
161  type(space_t), intent(inout), target :: Xh
162  type(mesh_t), intent(inout), target :: msh
163  integer :: n
164  call this%free()
165  this%msh => msh
166  this%Xh => xh
167 
168  allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
169  allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
170  allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
171 
172  allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
173  allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
174  allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
175 
176  allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
177  allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
178  allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
179 
180 
181  !
182  ! Setup device memory (if present)
183  !
184 
185  n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
186  if (neko_bcknd_device .eq. 1) then
187 
188  call device_map(this%drdx, this%drdx_d, n)
189  call device_map(this%drdy, this%drdy_d, n)
190  call device_map(this%drdz, this%drdz_d, n)
191 
192  call device_map(this%dsdx, this%dsdx_d, n)
193  call device_map(this%dsdy, this%dsdy_d, n)
194  call device_map(this%dsdz, this%dsdz_d, n)
195 
196  call device_map(this%dtdx, this%dtdx_d, n)
197  call device_map(this%dtdy, this%dtdy_d, n)
198  call device_map(this%dtdz, this%dtdz_d, n)
199 
200  end if
201 
202  end subroutine coef_init_empty
203 
205  subroutine coef_init_all(this, gs_h)
206  class(coef_t), intent(inout) :: this
207  type(gs_t), intent(inout), target :: gs_h
208  integer :: n, m
209  call this%free()
210 
211  this%msh => gs_h%dofmap%msh
212  this%Xh => gs_h%dofmap%Xh
213  this%dof => gs_h%dofmap
214  this%gs_h => gs_h
215 
216  !
217  ! Allocate arrays for geometric data
218  !
220  allocate(this%G11(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
221  allocate(this%G22(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
222  allocate(this%G33(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
223  allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
224  allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
225  allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
226 
227  allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
228  allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
229  allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
230 
231  allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
232  allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
233  allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
234 
235  allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
236  allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
237  allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
238 
239  allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
240  allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
241  allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
242 
243  allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
244  allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
245  allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
246 
247  allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
248  allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
249  allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
250 
251  allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
252  allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
253 
254  allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
255  allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
256  allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
257  allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
258 
259  allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
260  allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
261 
262  allocate(this%h1(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
263  allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
264 
265  allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
266 
267 
268  !
269  ! Setup device memory (if present)
270  !
271 
272  n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
273  if (neko_bcknd_device .eq. 1) then
274  call device_map(this%G11, this%G11_d, n)
275  call device_map(this%G22, this%G22_d, n)
276  call device_map(this%G33, this%G33_d, n)
277  call device_map(this%G12, this%G12_d, n)
278  call device_map(this%G13, this%G13_d, n)
279  call device_map(this%G23, this%G23_d, n)
280 
281  call device_map(this%dxdr, this%dxdr_d, n)
282  call device_map(this%dydr, this%dydr_d, n)
283  call device_map(this%dzdr, this%dzdr_d, n)
284 
285  call device_map(this%dxds, this%dxds_d, n)
286  call device_map(this%dyds, this%dyds_d, n)
287  call device_map(this%dzds, this%dzds_d, n)
288 
289  call device_map(this%dxdt, this%dxdt_d, n)
290  call device_map(this%dydt, this%dydt_d, n)
291  call device_map(this%dzdt, this%dzdt_d, n)
292 
293  call device_map(this%drdx, this%drdx_d, n)
294  call device_map(this%drdy, this%drdy_d, n)
295  call device_map(this%drdz, this%drdz_d, n)
296 
297  call device_map(this%dsdx, this%dsdx_d, n)
298  call device_map(this%dsdy, this%dsdy_d, n)
299  call device_map(this%dsdz, this%dsdz_d, n)
300 
301  call device_map(this%dtdx, this%dtdx_d, n)
302  call device_map(this%dtdy, this%dtdy_d, n)
303  call device_map(this%dtdz, this%dtdz_d, n)
304 
305  call device_map(this%mult, this%mult_d, n)
306  call device_map(this%h1, this%h1_d, n)
307  call device_map(this%h2, this%h2_d, n)
308 
309  call device_map(this%jac, this%jac_d, n)
310  call device_map(this%jacinv, this%jacinv_d, n)
311  call device_map(this%B, this%B_d, n)
312  call device_map(this%Binv, this%Binv_d, n)
313 
314  m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
315 
316  call device_map(this%area, this%area_d, m)
317  call device_map(this%nx, this%nx_d, m)
318  call device_map(this%ny, this%ny_d, m)
319  call device_map(this%nz, this%nz_d, m)
320 
321  end if
322 
323  call coef_generate_dxyzdrst(this)
324 
325  call coef_generate_geo(this)
326 
328 
329  call coef_generate_mass(this)
330 
331 
332  ! This is a placeholder, just for now
333  ! We can probably find a prettier solution
334  if (neko_bcknd_device .eq. 1) then
335  call device_rone(this%h1_d, n)
336  call device_rone(this%h2_d, n)
337  call device_memcpy(this%h1, this%h1_d, n, &
338  device_to_host, sync=.false.)
339  call device_memcpy(this%h2, this%h2_d, n, &
340  device_to_host, sync=.false.)
341  else
342  call rone(this%h1,n)
343  call rone(this%h2,n)
344  end if
345 
346  this%ifh2 = .false.
347 
348  !
349  ! Set up multiplicity
350  !
351  if (neko_bcknd_device .eq. 1) then
352  call device_rone(this%mult_d, n)
353  else
354  call rone(this%mult, n)
355  end if
356 
357  call gs_h%op(this%mult, n, gs_op_add)
358 
359  if (neko_bcknd_device .eq. 1) then
360  call device_invcol1(this%mult_d, n)
361  call device_memcpy(this%mult, this%mult_d, n, &
362  device_to_host, sync=.true.)
363  else
364  call invcol1(this%mult, n)
365  end if
366 
367  end subroutine coef_init_all
368 
370  subroutine coef_free(this)
371  class(coef_t), intent(inout) :: this
372 
373  if (allocated(this%G11)) then
374  deallocate(this%G11)
375  end if
376 
377  if (allocated(this%G22)) then
378  deallocate(this%G22)
379  end if
380 
381  if (allocated(this%G33)) then
382  deallocate(this%G33)
383  end if
384 
385  if (allocated(this%G12)) then
386  deallocate(this%G12)
387  end if
388 
389  if (allocated(this%G13)) then
390  deallocate(this%G13)
391  end if
392 
393  if (allocated(this%G23)) then
394  deallocate(this%G23)
395  end if
396 
397  if (allocated(this%mult)) then
398  deallocate(this%mult)
399  end if
400 
401  if (allocated(this%B)) then
402  deallocate(this%B)
403  end if
404 
405  if (allocated(this%Binv)) then
406  deallocate(this%Binv)
407  end if
408 
409  if(allocated(this%dxdr)) then
410  deallocate(this%dxdr)
411  end if
412 
413  if(allocated(this%dxds)) then
414  deallocate(this%dxds)
415  end if
416 
417  if(allocated(this%dxdt)) then
418  deallocate(this%dxdt)
419  end if
420 
421  if(allocated(this%dydr)) then
422  deallocate(this%dydr)
423  end if
424 
425  if(allocated(this%dyds)) then
426  deallocate(this%dyds)
427  end if
428 
429  if(allocated(this%dydt)) then
430  deallocate(this%dydt)
431  end if
432 
433  if(allocated(this%dzdr)) then
434  deallocate(this%dzdr)
435  end if
436 
437  if(allocated(this%dzds)) then
438  deallocate(this%dzds)
439  end if
440 
441  if(allocated(this%dzdt)) then
442  deallocate(this%dzdt)
443  end if
444 
445  if(allocated(this%drdx)) then
446  deallocate(this%drdx)
447  end if
448 
449  if(allocated(this%dsdx)) then
450  deallocate(this%dsdx)
451  end if
452 
453  if(allocated(this%dtdx)) then
454  deallocate(this%dtdx)
455  end if
456 
457  if(allocated(this%drdy)) then
458  deallocate(this%drdy)
459  end if
460 
461  if(allocated(this%dsdy)) then
462  deallocate(this%dsdy)
463  end if
464 
465  if(allocated(this%dtdy)) then
466  deallocate(this%dtdy)
467  end if
468 
469  if(allocated(this%drdz)) then
470  deallocate(this%drdz)
471  end if
472 
473  if(allocated(this%dsdz)) then
474  deallocate(this%dsdz)
475  end if
476 
477  if(allocated(this%dtdz)) then
478  deallocate(this%dtdz)
479  end if
480 
481  if(allocated(this%jac)) then
482  deallocate(this%jac)
483  end if
484 
485  if(allocated(this%jacinv)) then
486  deallocate(this%jacinv)
487  end if
488 
489  if(allocated(this%h1)) then
490  deallocate(this%h1)
491  end if
492 
493  if(allocated(this%h2)) then
494  deallocate(this%h2)
495  end if
496 
497  if (allocated(this%area)) then
498  deallocate(this%area)
499  end if
500 
501  if (allocated(this%nx)) then
502  deallocate(this%nx)
503  end if
504 
505  if (allocated(this%ny)) then
506  deallocate(this%ny)
507  end if
508 
509  if (allocated(this%nz)) then
510  deallocate(this%nz)
511  end if
512 
513 
514  nullify(this%msh)
515  nullify(this%Xh)
516  nullify(this%dof)
517 
518  !
519  ! Cleanup the device (if present)
520  !
521 
522  if (c_associated(this%G11_d)) then
523  call device_free(this%G11_d)
524  end if
525 
526  if (c_associated(this%G22_d)) then
527  call device_free(this%G22_d)
528  end if
529 
530  if (c_associated(this%G33_d)) then
531  call device_free(this%G33_d)
532  end if
533 
534  if (c_associated(this%G12_d)) then
535  call device_free(this%G12_d)
536  end if
537 
538  if (c_associated(this%G13_d)) then
539  call device_free(this%G13_d)
540  end if
541 
542  if (c_associated(this%G23_d)) then
543  call device_free(this%G23_d)
544  end if
545 
546  if (c_associated(this%dxdr_d)) then
547  call device_free(this%dxdr_d)
548  end if
549 
550  if (c_associated(this%dydr_d)) then
551  call device_free(this%dydr_d)
552  end if
553 
554  if (c_associated(this%dzdr_d)) then
555  call device_free(this%dzdr_d)
556  end if
557 
558  if (c_associated(this%dxds_d)) then
559  call device_free(this%dxds_d)
560  end if
561 
562  if (c_associated(this%dyds_d)) then
563  call device_free(this%dyds_d)
564  end if
565 
566  if (c_associated(this%dzds_d)) then
567  call device_free(this%dzds_d)
568  end if
569 
570  if (c_associated(this%dxdt_d)) then
571  call device_free(this%dxdt_d)
572  end if
573 
574  if (c_associated(this%dydt_d)) then
575  call device_free(this%dydt_d)
576  end if
577 
578  if (c_associated(this%dzdt_d)) then
579  call device_free(this%dzdt_d)
580  end if
581 
582  if (c_associated(this%drdx_d)) then
583  call device_free(this%drdx_d)
584  end if
585 
586  if (c_associated(this%drdy_d)) then
587  call device_free(this%drdy_d)
588  end if
589 
590  if (c_associated(this%drdz_d)) then
591  call device_free(this%drdz_d)
592  end if
593 
594  if (c_associated(this%dsdx_d)) then
595  call device_free(this%dsdx_d)
596  end if
597 
598  if (c_associated(this%dsdy_d)) then
599  call device_free(this%dsdy_d)
600  end if
601 
602  if (c_associated(this%dsdz_d)) then
603  call device_free(this%dsdz_d)
604  end if
605 
606  if (c_associated(this%dtdx_d)) then
607  call device_free(this%dtdx_d)
608  end if
609 
610  if (c_associated(this%dtdy_d)) then
611  call device_free(this%dtdy_d)
612  end if
613 
614  if (c_associated(this%dtdz_d)) then
615  call device_free(this%dtdz_d)
616  end if
617 
618  if (c_associated(this%mult_d)) then
619  call device_free(this%mult_d)
620  end if
621 
622  if (c_associated(this%h1_d)) then
623  call device_free(this%h1_d)
624  end if
625 
626  if (c_associated(this%h2_d)) then
627  call device_free(this%h2_d)
628  end if
629 
630  if (c_associated(this%jac_d)) then
631  call device_free(this%jac_d)
632  end if
633 
634  if (c_associated(this%jacinv_d)) then
635  call device_free(this%jacinv_d)
636  end if
637 
638  if (c_associated(this%B_d)) then
639  call device_free(this%B_d)
640  end if
641 
642  if (c_associated(this%Binv_d)) then
643  call device_free(this%Binv_d)
644  end if
645 
646  if (c_associated(this%area_d)) then
647  call device_free(this%area_d)
648  end if
649 
650  if (c_associated(this%nx_d)) then
651  call device_free(this%nx_d)
652  end if
653 
654  if (c_associated(this%ny_d)) then
655  call device_free(this%ny_d)
656  end if
657 
658  if (c_associated(this%nz_d)) then
659  call device_free(this%nz_d)
660  end if
661 
662 
663  end subroutine coef_free
664 
666  type(coef_t), intent(inout) :: c
667  integer :: e,i,lxy,lyz,ntot
668 
669  lxy=c%Xh%lx*c%Xh%ly
670  lyz=c%Xh%ly*c%Xh%lz
671  ntot = c%dof%size()
672 
673  associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
674  dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
675  dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
676  dxdr => c%dxdr, dydr => c%dydr, dzdr => c%dzdr, &
677  dxds => c%dxds, dyds => c%dyds, dzds => c%dzds, &
678  dxdt => c%dxdt, dydt => c%dydt, dzdt => c%dzdt, &
679  dx => c%Xh%dx, dy => c%Xh%dy, dz => c%Xh%dz, &
680  x => c%dof%x, y => c%dof%y, z => c%dof%z, &
681  lx => c%Xh%lx, ly => c%Xh%ly, lz => c%Xh%lz, &
682  dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
683  jacinv => c%jacinv, jac => c%jac)
684 
685  if (neko_bcknd_device .eq. 1) then
686 
687  call device_coef_generate_dxydrst(c%drdx_d, c%drdy_d, c%drdz_d, &
688  c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
689  c%dxdr_d, c%dydr_d, c%dzdr_d, c%dxds_d, c%dyds_d, c%dzds_d, &
690  c%dxdt_d, c%dydt_d, c%dzdt_d, c%Xh%dx_d, c%Xh%dy_d, c%Xh%dz_d, &
691  c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
692  c%Xh%lx, c%msh%nelv)
693 
694  call device_memcpy(dxdr, c%dxdr_d, ntot, device_to_host, sync=.false.)
695  call device_memcpy(dydr, c%dydr_d, ntot, device_to_host, sync=.false.)
696  call device_memcpy(dzdr, c%dzdr_d, ntot, device_to_host, sync=.false.)
697  call device_memcpy(dxds, c%dxds_d, ntot, device_to_host, sync=.false.)
698  call device_memcpy(dyds, c%dyds_d, ntot, device_to_host, sync=.false.)
699  call device_memcpy(dzds, c%dzds_d, ntot, device_to_host, sync=.false.)
700  call device_memcpy(dxdt, c%dxdt_d, ntot, device_to_host, sync=.false.)
701  call device_memcpy(dydt, c%dydt_d, ntot, device_to_host, sync=.false.)
702  call device_memcpy(dzdt, c%dzdt_d, ntot, device_to_host, sync=.false.)
703  call device_memcpy(drdx, c%drdx_d, ntot, device_to_host, sync=.false.)
704  call device_memcpy(drdy, c%drdy_d, ntot, device_to_host, sync=.false.)
705  call device_memcpy(drdz, c%drdz_d, ntot, device_to_host, sync=.false.)
706  call device_memcpy(dsdx, c%dsdx_d, ntot, device_to_host, sync=.false.)
707  call device_memcpy(dsdy, c%dsdy_d, ntot, device_to_host, sync=.false.)
708  call device_memcpy(dsdz, c%dsdz_d, ntot, device_to_host, sync=.false.)
709  call device_memcpy(dtdx, c%dtdx_d, ntot, device_to_host, sync=.false.)
710  call device_memcpy(dtdy, c%dtdy_d, ntot, device_to_host, sync=.false.)
711  call device_memcpy(dtdz, c%dtdz_d, ntot, device_to_host, sync=.false.)
712  call device_memcpy(jac, c%jac_d, ntot, device_to_host, sync=.false.)
713  call device_memcpy(jacinv, c%jacinv_d, ntot, &
714  device_to_host, sync=.true.)
715 
716  else
717  do e = 1, c%msh%nelv
718  call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
719  call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
720  call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
721 
722  do i = 1, lz
723  call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
724  call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
725  call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
726  end do
727 
728  ! We actually take 2d into account, wow, need to do that for the rest.
729  if(c%msh%gdim .eq. 3) then
730  call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
731  call mxm(y(1,1,1,e), lxy, dzt, lz, dydt(1,1,1,e), lz)
732  call mxm(z(1,1,1,e), lxy, dzt, lz, dzdt(1,1,1,e), lz)
733  else
734  call rzero(dxdt(1,1,1,e), lxy)
735  call rzero(dydt(1,1,1,e), lxy)
736  call rone(dzdt(1,1,1,e), lxy)
737  end if
738  end do
739 
740  if (c%msh%gdim .eq. 2) then
741  call rzero (jac, ntot)
742  call addcol3 (jac, dxdr, dyds, ntot)
743  call subcol3 (jac, dxds, dydr, ntot)
744  call copy (drdx, dyds, ntot)
745  call copy (drdy, dxds, ntot)
746  call chsign (drdy, ntot)
747  call copy (dsdx, dydr, ntot)
748  call chsign (dsdx, ntot)
749  call copy (dsdy, dxdr, ntot)
750  call rzero (drdz, ntot)
751  call rzero (dsdz, ntot)
752  call rone (dtdz, ntot)
753  else
754 
755  do i = 1, ntot
756  c%jac(i, 1, 1, 1) = 0.0_rp
757  end do
758 
759  do i = 1, ntot
760  c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1) &
761  * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
762 
763  c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdt(i, 1, 1, 1) &
764  * c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
765 
766  c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxds(i, 1, 1, 1) &
767  * c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
768  end do
769 
770  do i = 1, ntot
771  c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdr(i, 1, 1, 1) &
772  * c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
773 
774  c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxds(i, 1, 1, 1) &
775  * c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
776 
777  c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdt(i, 1, 1, 1) &
778  * c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
779  end do
780 
781  do i = 1, ntot
782  c%drdx(i, 1, 1, 1) = c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
783  - c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
784 
785  c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
786  - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
787 
788  c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
789  - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
790  end do
791 
792  do i = 1, ntot
793  c%dsdx(i, 1, 1, 1) = c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
794  - c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
795 
796  c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
797  - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
798 
799  c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
800  - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
801  end do
802 
803  do i = 1, ntot
804  c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
805  - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
806 
807  c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
808  - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
809 
810  c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
811  - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
812  end do
813 
814  end if
815  call invers2(jacinv, jac, ntot)
816  end if
817  end associate
818 
819  end subroutine coef_generate_dxyzdrst
820 
823  subroutine coef_generate_geo(c)
824  type(coef_t), intent(inout) :: c
825  integer :: e, i, lxyz, ntot
826 
827  lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
828  ntot = c%dof%size()
829 
830  if (neko_bcknd_device .eq. 1) then
831 
832  call device_coef_generate_geo(c%G11_d, c%G12_d, c%G13_d, &
833  c%G22_d, c%G23_d, c%G33_d, &
834  c%drdx_d, c%drdy_d, c%drdz_d, &
835  c%dsdx_d, c%dsdy_d, c%dsdz_d, &
836  c%dtdx_d, c%dtdy_d, c%dtdz_d, &
837  c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
838  c%Xh%lx, c%msh%gdim)
839 
840  call device_memcpy(c%G11, c%G11_d, ntot, device_to_host, sync=.false.)
841  call device_memcpy(c%G22, c%G22_d, ntot, device_to_host, sync=.false.)
842  call device_memcpy(c%G33, c%G33_d, ntot, device_to_host, sync=.false.)
843  call device_memcpy(c%G12, c%G12_d, ntot, device_to_host, sync=.false.)
844  call device_memcpy(c%G13, c%G13_d, ntot, device_to_host, sync=.false.)
845  call device_memcpy(c%G23, c%G23_d, ntot, device_to_host, sync=.true.)
846 
847  else
848  if(c%msh%gdim .eq. 2) then
849 
850  do i = 1, ntot
851  c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
852  + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
853 
854  c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
855  + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
856 
857  c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
858  + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
859  end do
860 
861  do i = 1, ntot
862  c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
863  c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
864  c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
865  c%G33(i, 1, 1, 1) = 0.0_rp
866  c%G13(i, 1, 1, 1) = 0.0_rp
867  c%G23(i, 1, 1, 1) = 0.0_rp
868  end do
869 
870  do concurrent(e = 1:c%msh%nelv)
871  do concurrent(i = 1:lxyz)
872  c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
873  c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
874  c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
875  end do
876  end do
877 
878  else
879 
880  do i = 1, ntot
881  c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
882  + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) &
883  + c%drdz(i, 1, 1, 1) * c%drdz(i, 1, 1, 1)
884 
885  c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
886  + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
887  + c%dsdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
888 
889  c%G33(i, 1, 1, 1) = c%dtdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
890  + c%dtdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
891  + c%dtdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
892  end do
893 
894  do i = 1, ntot
895  c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
896  c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
897  c%G33(i, 1, 1, 1) = c%G33(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
898  end do
899 
900  do i = 1, ntot
901  c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
902  + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
903  + c%drdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
904 
905  c%G13(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
906  + c%drdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
907  + c%drdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
908 
909  c%G23(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
910  + c%dsdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
911  + c%dsdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
912  end do
913 
914  do i = 1, ntot
915  c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
916  c%G13(i, 1, 1, 1) = c%G13(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
917  c%G23(i, 1, 1, 1) = c%G23(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
918  end do
919 
920  do concurrent(e = 1:c%msh%nelv)
921  do concurrent(i = 1:lxyz)
922  c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
923  c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
924  c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
925 
926  c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
927  c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
928  c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
929  end do
930  end do
931 
932  end if
933  end if
934 
935  end subroutine coef_generate_geo
936 
939  subroutine coef_generate_mass(c)
940  type(coef_t), intent(inout) :: c
941  integer :: e, i, lxyz, ntot
942 
943  lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
944  ntot = c%dof%size()
945 
947  do concurrent(e = 1:c%msh%nelv)
948  ! Here we need to handle things differently for axis symmetric elements
949  do concurrent(i = 1:lxyz)
950  c%B(i,1,1,e) = c%jac(i,1,1,e) * c%Xh%w3(i,1,1)
951  c%Binv(i,1,1,e) = c%B(i,1,1,e)
952  end do
953  end do
954 
955  if (neko_bcknd_device .eq. 1) then
956  call device_memcpy(c%B, c%B_d, ntot, host_to_device, sync=.false.)
957  call device_memcpy(c%Binv, c%Binv_d, ntot, host_to_device, sync=.false.)
958  end if
959 
960  call c%gs_h%op(c%Binv, ntot, gs_op_add)
961 
962  if (neko_bcknd_device .eq. 1) then
963  call device_invcol1(c%Binv_d, ntot)
964  call device_memcpy(c%Binv, c%Binv_d, ntot, device_to_host, sync=.true.)
965  else
966  call invcol1(c%Binv, ntot)
967  end if
968 
970  if (neko_bcknd_device .eq. 1) then
971  c%volume = device_glsum(c%B_d, ntot)
972  else
973  c%volume = glsum(c%B, ntot)
974  end if
975 
976  end subroutine coef_generate_mass
977 
978  pure function coef_get_normal(this, i, j, k, e, facet) result(normal)
979  class(coef_t), intent(in) :: this
980  integer, intent(in) :: i, j, k, e, facet
981  real(kind=rp) :: normal(3)
982 
983  select case (facet)
984  case(1,2)
985  normal(1) = this%nx(j, k, facet, e)
986  normal(2) = this%ny(j, k, facet, e)
987  normal(3) = this%nz(j, k, facet, e)
988  case(3,4)
989  normal(1) = this%nx(i, k, facet, e)
990  normal(2) = this%ny(i, k, facet, e)
991  normal(3) = this%nz(i, k, facet, e)
992  case(5,6)
993  normal(1) = this%nx(i, j, facet, e)
994  normal(2) = this%ny(i, j, facet, e)
995  normal(3) = this%nz(i, j, facet, e)
996  end select
997  end function coef_get_normal
998 
999  pure function coef_get_area(this, i, j, k, e, facet) result(area)
1000  class(coef_t), intent(in) :: this
1001  integer, intent(in) :: i, j, k, e, facet
1002  real(kind=rp) :: area
1003 
1004  select case (facet)
1005  case(1,2)
1006  area = this%area(j, k, facet, e)
1007  case(3,4)
1008  area = this%area(i, k, facet, e)
1009  case(5,6)
1010  area = this%area(i, j, facet, e)
1011  end select
1012  end function coef_get_area
1013 
1014 
1017  type(coef_t), intent(inout) :: coef
1018  real(kind=rp), allocatable :: a(:,:,:,:)
1019  real(kind=rp), allocatable :: b(:,:,:,:)
1020  real(kind=rp), allocatable :: c(:,:,:,:)
1021  real(kind=rp), allocatable :: dot(:,:,:,:)
1022  integer :: n, e, i, j, k, lx
1023  real(kind=rp) :: weight, len
1024  n = coef%dof%size()
1025  lx = coef%Xh%lx
1026 
1027  allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1028  allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1029  allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1030  allocate(dot(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1031 
1032  ! ds x dt
1033  do i = 1, n
1034  a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1035  - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1036 
1037  b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1038  - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1039 
1040  c(i, 1, 1, 1) = coef%dxds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1041  - coef%dyds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1042  end do
1043 
1044  do i = 1, n
1045  dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1046  + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1047  + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1048  end do
1049 
1050  do concurrent(e = 1:coef%msh%nelv)
1051  do concurrent(k = 1:coef%Xh%lx)
1052  do concurrent(j = 1:coef%Xh%lx)
1053  weight = coef%Xh%wy(j) * coef%Xh%wz(k)
1054  coef%area(j, k, 2, e) = sqrt(dot(lx, j, k, e)) * weight
1055  coef%area(j, k, 1, e) = sqrt(dot(1, j, k, e)) * weight
1056  coef%nx(j,k, 1, e) = -a(1, j, k, e)
1057  coef%nx(j,k, 2, e) = a(lx, j, k, e)
1058  coef%ny(j,k, 1, e) = -b(1, j, k, e)
1059  coef%ny(j,k, 2, e) = b(lx, j, k, e)
1060  coef%nz(j,k, 1, e) = -c(1, j, k, e)
1061  coef%nz(j,k, 2, e) = c(lx, j, k, e)
1062  end do
1063  end do
1064  end do
1065 
1066  ! dr x dt
1067  do i = 1, n
1068  a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1069  - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1070 
1071  b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1072  - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1073 
1074  c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1075  - coef%dydr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1076  end do
1077 
1078  do i = 1, n
1079  dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1080  + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1081  + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1082  end do
1083 
1084  do concurrent(e = 1:coef%msh%nelv)
1085  do concurrent(k = 1:coef%Xh%lx)
1086  do concurrent(j = 1:coef%Xh%lx)
1087  weight = coef%Xh%wx(j) * coef%Xh%wz(k)
1088  coef%area(j, k, 3, e) = sqrt(dot(j, 1, k, e)) * weight
1089  coef%area(j, k, 4, e) = sqrt(dot(j, lx, k, e)) * weight
1090  coef%nx(j,k, 3, e) = a(j, 1, k, e)
1091  coef%nx(j,k, 4, e) = -a(j, lx, k, e)
1092  coef%ny(j,k, 3, e) = b(j, 1, k, e)
1093  coef%ny(j,k, 4, e) = -b(j, lx, k, e)
1094  coef%nz(j,k, 3, e) = c(j, 1, k, e)
1095  coef%nz(j,k, 4, e) = -c(j, lx, k, e)
1096  end do
1097  end do
1098  end do
1099 
1100  ! dr x ds
1101  do i = 1, n
1102  a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
1103  - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
1104 
1105  b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
1106  - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
1107 
1108  c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) &
1109  - coef%dydr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1)
1110  end do
1111 
1112  do i = 1, n
1113  dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1114  + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1115  + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1116  end do
1117 
1118  do concurrent(e = 1:coef%msh%nelv)
1119  do concurrent(k = 1:coef%Xh%lx)
1120  do concurrent(j = 1:coef%Xh%lx)
1121  weight = coef%Xh%wx(j) * coef%Xh%wy(k)
1122  coef%area(j, k, 5, e) = sqrt(dot(j, k, 1, e)) * weight
1123  coef%area(j, k, 6, e) = sqrt(dot(j, k, lx, e)) * weight
1124  coef%nx(j,k, 5, e) = -a(j, k, 1, e)
1125  coef%nx(j,k, 6, e) = a(j, k, lx, e)
1126  coef%ny(j,k, 5, e) = -b(j, k, 1, e)
1127  coef%ny(j,k, 6, e) = b(j, k, lx, e)
1128  coef%nz(j,k, 5, e) = -c(j, k, 1, e)
1129  coef%nz(j,k, 6, e) = c(j, k, lx, e)
1130  end do
1131  end do
1132  end do
1133 
1134  ! Normalize
1135  do j = 1, size(coef%nz)
1136  len = sqrt(coef%nx(j,1,1,1)**2 + &
1137  coef%ny(j,1,1,1)**2 + coef%nz(j,1,1,1)**2)
1138  if (len .gt. neko_eps) then
1139  coef%nx(j,1,1,1) = coef%nx(j,1,1,1) / len
1140  coef%ny(j,1,1,1) = coef%ny(j,1,1,1) / len
1141  coef%nz(j,1,1,1) = coef%nz(j,1,1,1) / len
1142  end if
1143  end do
1144 
1145  deallocate(dot)
1146  deallocate(c)
1147  deallocate(b)
1148  deallocate(a)
1150  if (neko_bcknd_device .eq. 1) then
1151  n = size(coef%area)
1152  call device_memcpy(coef%area, coef%area_d, n, &
1153  host_to_device, sync=.false.)
1154  call device_memcpy(coef%nx, coef%nx_d, n, &
1155  host_to_device, sync=.false.)
1156  call device_memcpy(coef%ny, coef%ny_d, n, &
1157  host_to_device, sync=.false.)
1158  call device_memcpy(coef%nz, coef%nz_d, n, &
1159  host_to_device, sync=.false.)
1160  end if
1161 
1162  end subroutine coef_generate_area_and_normal
1163 
1164 
1165 end module coefs
Map a Fortran array to a device (allocate and associate)
Definition: device.F90:57
Copy data between host and device (or device and device)
Definition: device.F90:51
Coefficients.
Definition: coef.f90:34
subroutine coef_init_empty(this, Xh, msh)
Initialize empty coefs for a space and a mesh.
Definition: coef.f90:160
subroutine coef_generate_geo(c)
Generate geometric data for the given mesh.
Definition: coef.f90:824
subroutine coef_free(this)
Deallocate coefficients.
Definition: coef.f90:371
pure real(kind=rp) function coef_get_area(this, i, j, k, e, facet)
Definition: coef.f90:1000
pure real(kind=rp) function, dimension(3) coef_get_normal(this, i, j, k, e, facet)
Definition: coef.f90:979
subroutine coef_generate_dxyzdrst(c)
Definition: coef.f90:666
subroutine coef_generate_area_and_normal(coef)
Generate facet area and surface normals.
Definition: coef.f90:1017
subroutine coef_init_all(this, gs_h)
Initialize coefficients.
Definition: coef.f90:206
subroutine coef_generate_mass(c)
Generate mass matrix B for the given mesh and space.
Definition: coef.f90:940
subroutine, public device_coef_generate_dxydrst(drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, dx_d, dy_d, dz_d, x_d, y_d, z_d, jacinv_d, jac_d, lx, nel)
subroutine, public device_coef_generate_geo(G11_d, G12_d, G13_d, G22_d, G23_d, G33_d, drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, jacinv_d, w3_d, nel, lx, gdim)
subroutine, public device_rone(a_d, n)
Set all elements to one.
subroutine, public device_invcol1(a_d, n)
Invert a vector .
real(kind=rp) function, public device_glsum(a_d, n)
Sum a vector of length n.
Device abstraction, common interface for various accelerators.
Definition: device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition: device.F90:185
integer, parameter, public device_to_host
Definition: device.F90:47
Defines a mapping of the degrees of freedom.
Definition: dofmap.f90:35
Gather-scatter.
Defines Gather-scatter operations.
Definition: gs_ops.f90:34
integer, parameter, public gs_op_add
Definition: gs_ops.f90:36
Definition: math.f90:60
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition: math.f90:460
subroutine, public subcol3(a, b, c, n)
Returns .
Definition: math.f90:716
subroutine, public rone(a, n)
Set all elements to one.
Definition: math.f90:217
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition: math.f90:326
subroutine, public addcol3(a, b, c, n)
Returns .
Definition: math.f90:761
subroutine, public invcol1(a, n)
Invert a vector .
Definition: math.f90:435
subroutine, public chsign(a, n)
Change sign of vector .
Definition: math.f90:400
subroutine, public copy(a, b, n)
Copy a vector .
Definition: math.f90:228
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition: math.f90:67
subroutine, public rzero(a, n)
Zero a real vector.
Definition: math.f90:184
Defines a mesh.
Definition: mesh.f90:34
Wrapper for all matrix-matrix product implementations.
Definition: mxm_wrapper.F90:2
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Definition: mxm_wrapper.F90:29
Build configurations.
Definition: neko_config.f90:34
integer, parameter neko_bcknd_device
Definition: neko_config.f90:44
integer, parameter, public rp
Global precision used in computations.
Definition: num_types.f90:12
Defines a function space.
Definition: space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition: coef.f90:55
The function space for the SEM solution fields.
Definition: space.f90:62