57     real(kind=
rp), 
allocatable :: g11(:,:,:,:)
 
   59     real(kind=
rp), 
allocatable :: g22(:,:,:,:)
 
   61     real(kind=
rp), 
allocatable :: g33(:,:,:,:)
 
   63     real(kind=
rp), 
allocatable :: g12(:,:,:,:)
 
   65     real(kind=
rp), 
allocatable :: g13(:,:,:,:)
 
   67     real(kind=
rp), 
allocatable :: g23(:,:,:,:)
 
   69     real(kind=
rp), 
allocatable :: mult(:,:,:,:) 
 
   74     real(kind=
rp), 
allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
 
   75     real(kind=
rp), 
allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
 
   76     real(kind=
rp), 
allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
 
   80     real(kind=
rp), 
allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
 
   81     real(kind=
rp), 
allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
 
   82     real(kind=
rp), 
allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
 
   84     real(kind=
rp), 
allocatable :: h1(:,:,:,:) 
 
   85     real(kind=
rp), 
allocatable :: h2(:,:,:,:) 
 
   88     real(kind=
rp), 
allocatable :: jac(:,:,:,:) 
 
   89     real(kind=
rp), 
allocatable :: jacinv(:,:,:,:) 
 
   90     real(kind=
rp), 
allocatable :: b(:,:,:,:) 
 
   91     real(kind=
rp), 
allocatable :: binv(:,:,:,:) 
 
   93     real(kind=
rp), 
allocatable :: area(:,:,:,:) 
 
   94     real(kind=
rp), 
allocatable :: nx(:,:,:,:)   
 
   95     real(kind=
rp), 
allocatable :: ny(:,:,:,:)   
 
   96     real(kind=
rp), 
allocatable :: nz(:,:,:,:)   
 
   99     real(kind=
rp) :: volume
 
  104     type(
gs_t), 
pointer :: gs_h=> null()
 
  110     type(c_ptr) :: g11_d = c_null_ptr
 
  111     type(c_ptr) :: g22_d = c_null_ptr
 
  112     type(c_ptr) :: g33_d = c_null_ptr
 
  113     type(c_ptr) :: g12_d = c_null_ptr
 
  114     type(c_ptr) :: g13_d = c_null_ptr
 
  115     type(c_ptr) :: g23_d = c_null_ptr
 
  116     type(c_ptr) :: dxdr_d = c_null_ptr
 
  117     type(c_ptr) :: dydr_d = c_null_ptr
 
  118     type(c_ptr) :: dzdr_d = c_null_ptr
 
  119     type(c_ptr) :: dxds_d = c_null_ptr
 
  120     type(c_ptr) :: dyds_d = c_null_ptr
 
  121     type(c_ptr) :: dzds_d = c_null_ptr
 
  122     type(c_ptr) :: dxdt_d = c_null_ptr
 
  123     type(c_ptr) :: dydt_d = c_null_ptr
 
  124     type(c_ptr) :: dzdt_d = c_null_ptr
 
  125     type(c_ptr) :: drdx_d = c_null_ptr
 
  126     type(c_ptr) :: drdy_d = c_null_ptr
 
  127     type(c_ptr) :: drdz_d = c_null_ptr
 
  128     type(c_ptr) :: dsdx_d = c_null_ptr
 
  129     type(c_ptr) :: dsdy_d = c_null_ptr
 
  130     type(c_ptr) :: dsdz_d = c_null_ptr
 
  131     type(c_ptr) :: dtdx_d = c_null_ptr
 
  132     type(c_ptr) :: dtdy_d = c_null_ptr
 
  133     type(c_ptr) :: dtdz_d = c_null_ptr
 
  134     type(c_ptr) :: mult_d = c_null_ptr
 
  135     type(c_ptr) :: h1_d = c_null_ptr
 
  136     type(c_ptr) :: h2_d = c_null_ptr
 
  137     type(c_ptr) :: jac_d = c_null_ptr
 
  138     type(c_ptr) :: jacinv_d = c_null_ptr
 
  139     type(c_ptr) :: b_d = c_null_ptr
 
  140     type(c_ptr) :: binv_d = c_null_ptr
 
  141     type(c_ptr) :: area_d = c_null_ptr
 
  142     type(c_ptr) :: nx_d = c_null_ptr
 
  143     type(c_ptr) :: ny_d = c_null_ptr
 
  144     type(c_ptr) :: nz_d = c_null_ptr
 
  153     generic :: init => init_empty, init_all
 
 
  206    class(
coef_t), 
intent(inout) :: this
 
  207    type(
gs_t), 
intent(inout), 
target :: gs_h
 
  211    this%msh => gs_h%dofmap%msh
 
  212    this%Xh => gs_h%dofmap%Xh
 
  213    this%dof => gs_h%dofmap
 
  220    allocate(this%G11(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  221    allocate(this%G22(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  222    allocate(this%G33(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  223    allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  224    allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  225    allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  227    allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  228    allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  229    allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  231    allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  232    allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  233    allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  235    allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  236    allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  237    allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  239    allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  240    allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  241    allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  243    allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  244    allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  245    allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  247    allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  248    allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  249    allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  251    allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  252    allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  254    allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
 
  255    allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
 
  256    allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
 
  257    allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
 
  259    allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  260    allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  262    allocate(this%h1(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  263    allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  265    allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
  272    n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
 
  310       call device_map(this%jacinv, this%jacinv_d, n)
 
  314       m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
 
  354       call rone(this%mult, n)
 
 
  666    type(
coef_t), 
intent(inout) :: c
 
  667    integer :: e,i,lxy,lyz,ntot
 
  673    associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
 
  674         dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
 
  675         dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
 
  676         dxdr => c%dxdr, dydr => c%dydr, dzdr => c%dzdr, &
 
  677         dxds => c%dxds, dyds => c%dyds, dzds => c%dzds, &
 
  678         dxdt => c%dxdt, dydt => c%dydt, dzdt => c%dzdt, &
 
  679         dx => c%Xh%dx, dy => c%Xh%dy, dz => c%Xh%dz, &
 
  680         x => c%dof%x, y => c%dof%y, z => c%dof%z, &
 
  681         lx => c%Xh%lx, ly => c%Xh%ly, lz => c%Xh%lz, &
 
  682         dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
 
  683         jacinv => c%jacinv, jac => c%jac)
 
  688              c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
 
  689              c%dxdr_d, c%dydr_d, c%dzdr_d, c%dxds_d, c%dyds_d, c%dzds_d, &
 
  690              c%dxdt_d, c%dydt_d, c%dzdt_d, c%Xh%dx_d, c%Xh%dy_d, c%Xh%dz_d, &
 
  691              c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
 
  718            call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
 
  719            call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
 
  720            call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
 
  723               call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
 
  724               call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
 
  725               call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
 
  729            if(c%msh%gdim .eq. 3) 
then 
  730               call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
 
  731               call mxm(y(1,1,1,e), lxy, dzt, lz, dydt(1,1,1,e), lz)
 
  732               call mxm(z(1,1,1,e), lxy, dzt, lz, dzdt(1,1,1,e), lz)
 
  734               call rzero(dxdt(1,1,1,e), lxy)
 
  735               call rzero(dydt(1,1,1,e), lxy)
 
  736               call rone(dzdt(1,1,1,e), lxy)
 
  740         if (c%msh%gdim .eq. 2) 
then 
  741            call rzero   (jac, ntot)
 
  742            call addcol3 (jac, dxdr, dyds, ntot)
 
  743            call subcol3 (jac, dxds, dydr, ntot)
 
  744            call copy    (drdx, dyds, ntot)
 
  745            call copy    (drdy, dxds, ntot)
 
  747            call copy    (dsdx, dydr, ntot)
 
  749            call copy    (dsdy, dxdr, ntot)
 
  750            call rzero   (drdz, ntot)
 
  751            call rzero   (dsdz, ntot)
 
  752            call rone    (dtdz, ntot)
 
  756               c%jac(i, 1, 1, 1) = 0.0_rp
 
  760               c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1)  &
 
  761                                 * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
 
  763               c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdt(i, 1, 1, 1)  &
 
  764                                 * c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
 
  766               c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxds(i, 1, 1, 1)  &
 
  767                                 * c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
 
  771               c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdr(i, 1, 1, 1)  &
 
  772                                 * c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
 
  774               c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxds(i, 1, 1, 1)  &
 
  775                                 * c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
 
  777               c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdt(i, 1, 1, 1)  &
 
  778                                 * c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
 
  782               c%drdx(i, 1, 1, 1) = c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
 
  783                                  - c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
 
  785               c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
 
  786                                  - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
 
  788               c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
 
  789                                  - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
 
  793               c%dsdx(i, 1, 1, 1) = c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
 
  794                                  - c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
 
  796               c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
 
  797                                  - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
 
  799               c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
 
  800                                  - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
 
  804               c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
 
  805                                  - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
 
  807               c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
 
  808                                  - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
 
  810               c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
 
  811                                  - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
 
  815         call invers2(jacinv, jac, ntot)
 
 
  824    type(
coef_t), 
intent(inout) :: c
 
  825    integer :: e, i, lxyz, ntot
 
  827    lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
 
  830    if (neko_bcknd_device .eq. 1) 
then 
  832       call device_coef_generate_geo(c%G11_d, c%G12_d, c%G13_d, &
 
  833                                     c%G22_d, c%G23_d, c%G33_d, &
 
  834                                     c%drdx_d, c%drdy_d, c%drdz_d, &
 
  835                                     c%dsdx_d, c%dsdy_d, c%dsdz_d, &
 
  836                                     c%dtdx_d, c%dtdy_d, c%dtdz_d, &
 
  837                                     c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
 
  840       call device_memcpy(c%G11, c%G11_d, ntot, device_to_host, sync=.false.)
 
  841       call device_memcpy(c%G22, c%G22_d, ntot, device_to_host, sync=.false.)
 
  842       call device_memcpy(c%G33, c%G33_d, ntot, device_to_host, sync=.false.)
 
  843       call device_memcpy(c%G12, c%G12_d, ntot, device_to_host, sync=.false.)
 
  844       call device_memcpy(c%G13, c%G13_d, ntot, device_to_host, sync=.false.)
 
  845       call device_memcpy(c%G23, c%G23_d, ntot, device_to_host, sync=.true.)
 
  848       if(c%msh%gdim .eq. 2) 
then 
  851             c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
 
  852                               + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
 
  854             c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
 
  855                               + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
 
  857             c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
 
  858                               + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
 
  862             c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
 
  863             c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
 
  864             c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
 
  865             c%G33(i, 1, 1, 1) = 0.0_rp
 
  866             c%G13(i, 1, 1, 1) = 0.0_rp
 
  867             c%G23(i, 1, 1, 1) = 0.0_rp
 
  870          do concurrent(e = 1:c%msh%nelv)
 
  871             do concurrent(i = 1:lxyz)
 
  872                c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
 
  873                c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
 
  874                c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
 
  881             c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
 
  882                               + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) &
 
  883                               + c%drdz(i, 1, 1, 1) * c%drdz(i, 1, 1, 1)
 
  885             c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
 
  886                               + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
 
  887                               + c%dsdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
 
  889             c%G33(i, 1, 1, 1) = c%dtdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
 
  890                               + c%dtdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
 
  891                               + c%dtdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
 
  895             c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
 
  896             c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
 
  897             c%G33(i, 1, 1, 1) = c%G33(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
 
  901             c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
 
  902                               + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
 
  903                               + c%drdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
 
  905             c%G13(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
 
  906                               + c%drdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
 
  907                               + c%drdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
 
  909             c%G23(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
 
  910                               + c%dsdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
 
  911                               + c%dsdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
 
  915             c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
 
  916             c%G13(i, 1, 1, 1) = c%G13(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
 
  917             c%G23(i, 1, 1, 1) = c%G23(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
 
  920          do concurrent(e = 1:c%msh%nelv)
 
  921             do concurrent(i = 1:lxyz)
 
  922                c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
 
  923                c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
 
  924                c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
 
  926                c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
 
  927                c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
 
  928                c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
 
 
 1017    type(
coef_t), 
intent(inout) :: coef
 
 1018    real(kind=rp), 
allocatable :: a(:,:,:,:)
 
 1019    real(kind=rp), 
allocatable :: b(:,:,:,:)
 
 1020    real(kind=rp), 
allocatable :: c(:,:,:,:)
 
 1021    real(kind=rp), 
allocatable :: dot(:,:,:,:)
 
 1022    integer :: n, e, i, j, k, lx
 
 1023    real(kind=rp) :: weight, len
 
 1027    allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
 
 1028    allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
 
 1029    allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
 
 1030    allocate(dot(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
 
 1034       a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
 
 1035                     - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
 
 1037       b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
 
 1038                     - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
 
 1040       c(i, 1, 1, 1) = coef%dxds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
 
 1041                     - coef%dyds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
 
 1045       dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
 
 1046                       + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
 
 1047                       + c(i, 1, 1, 1) * c(i, 1, 1, 1)
 
 1050    do concurrent(e = 1:coef%msh%nelv)
 
 1051       do concurrent(k = 1:coef%Xh%lx)
 
 1052          do concurrent(j = 1:coef%Xh%lx)
 
 1053             weight = coef%Xh%wy(j) * coef%Xh%wz(k)
 
 1054             coef%area(j, k, 2, e) = sqrt(dot(lx, j, k, e)) * weight
 
 1055             coef%area(j, k, 1, e) = sqrt(dot(1, j, k, e)) * weight
 
 1056             coef%nx(j,k, 1, e) = -a(1, j, k, e)
 
 1057             coef%nx(j,k, 2, e) =  a(lx, j, k, e)
 
 1058             coef%ny(j,k, 1, e) = -b(1, j, k, e)
 
 1059             coef%ny(j,k, 2, e) =  b(lx, j, k, e)
 
 1060             coef%nz(j,k, 1, e) = -c(1, j, k, e)
 
 1061             coef%nz(j,k, 2, e) =  c(lx, j, k, e)
 
 1068       a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
 
 1069                     - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
 
 1071       b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
 
 1072                     - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
 
 1074       c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
 
 1075                     - coef%dydr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
 
 1079       dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
 
 1080                       + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
 
 1081                       + c(i, 1, 1, 1) * c(i, 1, 1, 1)
 
 1084    do concurrent(e = 1:coef%msh%nelv)
 
 1085       do concurrent(k = 1:coef%Xh%lx)
 
 1086          do concurrent(j = 1:coef%Xh%lx)
 
 1087             weight = coef%Xh%wx(j) * coef%Xh%wz(k)
 
 1088             coef%area(j, k, 3, e) = sqrt(dot(j, 1, k, e)) * weight
 
 1089             coef%area(j, k, 4, e) = sqrt(dot(j, lx, k, e)) * weight
 
 1090             coef%nx(j,k, 3, e) =  a(j, 1, k, e)
 
 1091             coef%nx(j,k, 4, e) = -a(j, lx, k, e)
 
 1092             coef%ny(j,k, 3, e) =  b(j, 1, k, e)
 
 1093             coef%ny(j,k, 4, e) = -b(j, lx, k, e)
 
 1094             coef%nz(j,k, 3, e) =  c(j, 1, k, e)
 
 1095             coef%nz(j,k, 4, e) = -c(j, lx, k, e)
 
 1102       a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
 
 1103                     - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
 
 1105       b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
 
 1106                     - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
 
 1108       c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) &
 
 1109                     - coef%dydr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1)
 
 1113       dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
 
 1114                       + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
 
 1115                       + c(i, 1, 1, 1) * c(i, 1, 1, 1)
 
 1118    do concurrent(e = 1:coef%msh%nelv)
 
 1119       do concurrent(k = 1:coef%Xh%lx)
 
 1120          do concurrent(j = 1:coef%Xh%lx)
 
 1121             weight = coef%Xh%wx(j) * coef%Xh%wy(k)
 
 1122             coef%area(j, k, 5, e) = sqrt(dot(j, k, 1, e)) * weight
 
 1123             coef%area(j, k, 6, e) = sqrt(dot(j, k, lx, e)) * weight
 
 1124             coef%nx(j,k, 5, e) = -a(j, k, 1, e)
 
 1125             coef%nx(j,k, 6, e) =  a(j, k, lx, e)
 
 1126             coef%ny(j,k, 5, e) = -b(j, k, 1, e)
 
 1127             coef%ny(j,k, 6, e) =  b(j, k, lx, e)
 
 1128             coef%nz(j,k, 5, e) = -c(j, k, 1, e)
 
 1129             coef%nz(j,k, 6, e) =  c(j, k, lx, e)
 
 1135    do j = 1, 
size(coef%nz)
 
 1136       len = sqrt(coef%nx(j,1,1,1)**2 + &
 
 1137            coef%ny(j,1,1,1)**2 + coef%nz(j,1,1,1)**2)
 
 1138       if (len .gt. neko_eps) 
then 
 1139          coef%nx(j,1,1,1) = coef%nx(j,1,1,1) / len
 
 1140          coef%ny(j,1,1,1) = coef%ny(j,1,1,1) / len
 
 1141          coef%nz(j,1,1,1) = coef%nz(j,1,1,1) / len
 
 1150    if (neko_bcknd_device .eq. 1) 
then 
 1152       call device_memcpy(coef%area, coef%area_d, n, &
 
 1153                          host_to_device, sync=.false.)
 
 1154       call device_memcpy(coef%nx, coef%nx_d, n, &
 
 1155                          host_to_device, sync=.false.)
 
 1156       call device_memcpy(coef%ny, coef%ny_d, n, &
 
 1157                          host_to_device, sync=.false.)
 
 1158       call device_memcpy(coef%nz, coef%nz_d, n, &
 
 1159                          host_to_device, sync=.false.)