Neko
0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
projection.hip
Go to the documentation of this file.
1
/*
2
Copyright (c) 2022, The Neko Authors
3
All rights reserved.
4
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions
7
are met:
8
9
* Redistributions of source code must retain the above copyright
10
notice, this list of conditions and the following disclaimer.
11
12
* Redistributions in binary form must reproduce the above
13
copyright notice, this list of conditions and the following
14
disclaimer in the documentation and/or other materials provided
15
with the distribution.
16
17
* Neither the name of the authors nor the names of its
18
contributors may be used to endorse or promote products derived
19
from this software without specific prior written permission.
20
21
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32
POSSIBILITY OF SUCH DAMAGE.
33
*/
34
35
#include <hip/hip_runtime.h>
36
#include <
device/device_config.h
>
37
#include <
device/hip/check.h
>
38
39
#include "
projection_kernel.h
"
40
#include <
math/bcknd/device/hip/math_kernel.h
>
41
42
43
/*
44
* Reduction buffer
45
*/
46
int
proj_red_s
= 0;
47
real
*
proj_bufred_d
=
NULL
;
48
49
extern
"C"
{
50
51
#include <
math/bcknd/device/device_mpi_reduce.h
>
52
#include <
math/bcknd/device/device_mpi_op.h
>
53
54
void
hip_project_on
(
void
*alpha,
void
* b,
void
*xx,
void
*bb,
void
*mult,
55
void
*xbar,
int
*
j
,
int
*n){
56
57
int
pow2
= 1;
58
while
(
pow2
< (*
j
)){
59
pow2
= 2*
pow2
;
60
}
61
const
int
nt = 1024/
pow2
;
62
const
dim3
glsc3_nthrds
(
pow2
, nt, 1);
63
const
dim3
glsc3_nblcks
(((*n)+nt - 1)/nt, 1, 1);
64
const
int
glsc3_nb
= ((*n) + nt - 1)/nt;
65
if
((*
j
)*
glsc3_nb
>
proj_red_s
){
66
proj_red_s
= (*j)*
glsc3_nb
;
67
if
(
proj_bufred_d
!=
NULL
) {
68
HIP_CHECK
(
hipFree
(
proj_bufred_d
));
69
}
70
HIP_CHECK
(
hipMalloc
(&
proj_bufred_d
, (*
j
)*
glsc3_nb
*
sizeof
(
real
)));
71
}
72
73
/* First glsc3_many call */
74
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
glsc3_many_kernel<real>
),
75
glsc3_nblcks
,
glsc3_nthrds
,
76
0, (
hipStream_t
) glb_cmd_queue,
77
(
const
real
*) b, (
const
real
**) xx,
78
(
const
real
*) mult,
proj_bufred_d
, *
j
, *n);
79
HIP_CHECK
(
hipGetLastError
());
80
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
glsc3_reduce_kernel<real>
),
81
(*
j
), 1024, 0 , (
hipStream_t
) glb_cmd_queue,
82
proj_bufred_d
,
glsc3_nb
, *
j
);
83
HIP_CHECK
(
hipGetLastError
());
84
HIP_CHECK
(
hipMemcpyAsync
(alpha,
proj_bufred_d
, (*
j
) *
sizeof
(
real
),
85
hipMemcpyDeviceToDevice
,
86
(
hipStream_t
) glb_cmd_queue));
87
HIP_CHECK
(
hipMemsetAsync
(xbar, 0, (*n) *
sizeof
(
real
)));
88
89
HIP_CHECK
(
hipStreamSynchronize
((
hipStream_t
) glb_cmd_queue));
90
device_mpi_allreduce_inplace
(alpha, (*
j
),
sizeof
(
real
),
DEVICE_MPI_SUM
);
91
92
const
dim3
vec_nthrds
(1024, 1, 1);
93
const
dim3
vec_nblcks
(((*n)+1024 - 1)/ 1024, 1, 1);
94
95
/* First vector operation block */
96
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
project_on_vec_kernel<real>
),
97
vec_nblcks
,
vec_nthrds
,
98
0, (
hipStream_t
) glb_cmd_queue, (
real
*) xbar,
99
(
const
real
**) xx, (
real
*) b, (
const
real
**) bb,
100
(
const
real
*) alpha, *
j
, *n);
101
/* Second glsc3_many call */
102
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
glsc3_many_kernel<real>
),
103
glsc3_nblcks
,
glsc3_nthrds
,
104
0, (
hipStream_t
) glb_cmd_queue,
105
(
const
real
*) b, (
const
real
**) xx,
106
(
const
real
*) mult,
proj_bufred_d
, *
j
, *n);
107
HIP_CHECK
(
hipGetLastError
());
108
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
glsc3_reduce_kernel<real>
),
109
(*
j
), 1024, 0, (
hipStream_t
) glb_cmd_queue,
110
proj_bufred_d
,
glsc3_nb
, *
j
);
111
HIP_CHECK
(
hipGetLastError
());
112
HIP_CHECK
(
hipMemcpyAsync
(alpha,
proj_bufred_d
, (*
j
) *
sizeof
(
real
),
113
hipMemcpyDeviceToDevice
,
114
(
hipStream_t
) glb_cmd_queue));
115
116
HIP_CHECK
(
hipStreamSynchronize
((
hipStream_t
) glb_cmd_queue));
117
device_mpi_allreduce_inplace
(alpha, (*
j
),
sizeof
(
real
),
DEVICE_MPI_SUM
);
118
119
/* Second vector operation block */
120
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
project_on_vec_kernel<real>
),
121
vec_nblcks
,
vec_nthrds
,
122
0, (
hipStream_t
) glb_cmd_queue, (
real
*) xbar,
123
(
const
real
**) xx, (
real
*) b, (
const
real
**) bb,
124
(
const
real
*) alpha, *
j
, *n);
125
}
126
127
void
hip_project_ortho
(
void
*alpha,
void
* b,
void
*xx,
void
*bb,
128
void
*
w
,
void
*
xm
,
int
*
j
,
int
*n,
real
*
nrm
){
129
130
int
pow2
= 1;
131
while
(
pow2
< (*
j
)){
132
pow2
= 2*
pow2
;
133
}
134
const
int
nt = 1024/
pow2
;
135
const
dim3
glsc3_nthrds
(
pow2
, nt, 1);
136
const
dim3
glsc3_nblcks
(((*n)+nt - 1)/nt, 1, 1);
137
const
int
glsc3_nb
= ((*n) + nt - 1)/nt;
138
if
((*
j
)*
glsc3_nb
>
proj_red_s
){
139
proj_red_s
= (*j)*
glsc3_nb
;
140
if
(
proj_bufred_d
!=
NULL
) {
141
HIP_CHECK
(
hipFree
(
proj_bufred_d
));
142
}
143
HIP_CHECK
(
hipMalloc
(&
proj_bufred_d
, (*
j
)*
glsc3_nb
*
sizeof
(
real
)));
144
}
145
146
/* First glsc3_many call */
147
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
glsc3_many_kernel<real>
),
148
glsc3_nblcks
,
glsc3_nthrds
,
149
0, (
hipStream_t
) glb_cmd_queue,
150
(
const
real
*) b, (
const
real
**) xx,
151
(
const
real
*)
w
,
proj_bufred_d
, *
j
, *n);
152
HIP_CHECK
(
hipGetLastError
());
153
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
glsc3_reduce_kernel<real>
),
154
(*
j
), 1024, 0 , (
hipStream_t
) glb_cmd_queue,
155
proj_bufred_d
,
glsc3_nb
, *
j
);
156
HIP_CHECK
(
hipGetLastError
());
157
HIP_CHECK
(
hipMemcpyAsync
(alpha,
proj_bufred_d
, (*
j
) *
sizeof
(
real
),
158
hipMemcpyDeviceToDevice
,
159
(
hipStream_t
) glb_cmd_queue));
160
161
HIP_CHECK
(
hipStreamSynchronize
((
hipStream_t
) glb_cmd_queue));
162
device_mpi_allreduce_inplace
(alpha, (*
j
),
sizeof
(
real
),
DEVICE_MPI_SUM
);
163
164
HIP_CHECK
(
hipMemcpyAsync
(
nrm
, (
real
*) alpha + (*
j
- 1),
165
sizeof
(
real
),
hipMemcpyDeviceToHost
,
166
(
hipStream_t
) glb_cmd_queue));
167
(*nrm) =
sqrt
(*
nrm
);
168
169
170
const
dim3
vec_nthrds
(1024, 1, 1);
171
const
dim3
vec_nblcks
(((*n)+1024 - 1)/ 1024, 1, 1);
172
173
/* First vector operation block */
174
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
project_ortho_vec_kernel<real>
),
175
vec_nblcks
,
vec_nthrds
, 0, (
hipStream_t
) glb_cmd_queue,
176
(
real
*)
xm
, (
const
real
**) xx,
177
(
real
*) b, (
const
real
**) bb,
178
(
const
real
*) alpha, *
j
, *n);
179
180
/* Second glsc3_many call */
181
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
glsc3_many_kernel<real>
),
182
glsc3_nblcks
,
glsc3_nthrds
, 0, (
hipStream_t
) glb_cmd_queue,
183
(
const
real
*) b, (
const
real
**) xx,
184
(
const
real
*)
w
,
proj_bufred_d
, *
j
, *n);
185
HIP_CHECK
(
hipGetLastError
());
186
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
glsc3_reduce_kernel<real>
),
187
(*
j
), 1024, 0 , (
hipStream_t
) glb_cmd_queue,
188
proj_bufred_d
,
glsc3_nb
, *
j
);
189
HIP_CHECK
(
hipGetLastError
());
190
HIP_CHECK
(
hipMemcpyAsync
(alpha,
proj_bufred_d
, (*
j
) *
sizeof
(
real
),
191
hipMemcpyDeviceToDevice
,
192
(
hipStream_t
) glb_cmd_queue));
193
194
HIP_CHECK
(
hipStreamSynchronize
((
hipStream_t
) glb_cmd_queue));
195
device_mpi_allreduce_inplace
(alpha, (*
j
),
sizeof
(
real
),
DEVICE_MPI_SUM
);
196
197
/* Second vector operation block */
198
hipLaunchKernelGGL
(
HIP_KERNEL_NAME
(
project_ortho_vec_kernel<real>
),
199
vec_nblcks
,
vec_nthrds
, 0, (
hipStream_t
) glb_cmd_queue,
200
(
real
*)
xm
, (
const
real
**) xx,
201
(
real
*) b, (
const
real
**) bb,
202
(
const
real
*) alpha, *
j
, *n);
203
204
}
205
206
}
207
w
__global__ void T *__restrict__ T *__restrict__ const T *__restrict__ const T *__restrict__ const T *__restrict__ w
Definition
ax_helm_full_kernel.h:48
j
const int j
Definition
ax_helm_full_kernel.h:94
dirichlet_apply_scalar_kernel
__global__ void dirichlet_apply_scalar_kernel(const int *__restrict__ msk, T *__restrict__ x, const T g, const int m)
Definition
dirichlet_kernel.h:42
device_config.h
real
double real
Definition
device_config.h:12
device_mpi_op.h
DEVICE_MPI_SUM
#define DEVICE_MPI_SUM
Definition
device_mpi_op.h:9
device_mpi_allreduce_inplace
void device_mpi_allreduce_inplace(void *buf_d, int count, int nbytes, int op)
Definition
device_mpi_reduce.c:74
device_mpi_reduce.h
check.h
HIP_CHECK
#define HIP_CHECK(err)
Definition
check.h:8
math_kernel.h
projection_kernel.h
hip_project_ortho
void hip_project_ortho(void *alpha, void *b, void *xx, void *bb, void *w, void *xm, int *j, int *n, real *nrm)
Definition
projection.hip:127
hip_project_on
void hip_project_on(void *alpha, void *b, void *xx, void *bb, void *mult, void *xbar, int *j, int *n)
Definition
projection.hip:54
proj_red_s
int proj_red_s
Definition
projection.hip:46
proj_bufred_d
real * proj_bufred_d
Definition
projection.hip:47
src
common
bcknd
device
hip
projection.hip
Generated on Mon Dec 23 2024 03:38:13 for Neko by
1.9.8