|
template<typename T > |
__global__ void | gather_kernel_add (T *__restrict__ v, const int m, const int o, const int *__restrict__ dg, const T *__restrict__ u, const int n, const int *__restrict__ gd, const int nb, const int *__restrict__ b, const int *__restrict__ bo) |
|
template<typename T > |
__global__ void | gather_kernel_mul (T *__restrict__ v, const int m, const int o, const int *__restrict__ dg, const T *__restrict__ u, const int n, const int *__restrict__ gd, const int nb, const int *__restrict__ b, const int *__restrict__ bo) |
|
template<typename T > |
__global__ void | gather_kernel_min (T *__restrict__ v, const int m, const int o, const int *__restrict__ dg, const T *__restrict__ u, const int n, const int *__restrict__ gd, const int nb, const int *__restrict__ b, const int *__restrict__ bo) |
|
template<typename T > |
__global__ void | gather_kernel_max (T *__restrict__ v, const int m, const int o, const int *__restrict__ dg, const T *__restrict__ u, const int n, const int *__restrict__ gd, const int nb, const int *__restrict__ b, const int *__restrict__ bo) |
|
template<typename T > |
__global__ void | scatter_kernel (T *__restrict__ v, const int m, const int *__restrict__ dg, T *__restrict__ u, const int n, const int *__restrict__ gd, const int nb, const int *__restrict__ b, const int *__restrict__ bo) |
|
template<typename T > |
__global__ void | gs_pack_kernel (const T *__restrict__ u, T *__restrict__ buf, const int32_t *__restrict__ dof, const int n) |
|
template<typename T > |
__global__ void | gs_unpack_add_kernel (T *__restrict__ u, const T *__restrict__ buf, const int32_t *__restrict__ dof, const int n) |
|
template<typename T > |
__device__ T | atomicMinFloat (T *address, T val) |
|
template<> |
__device__ float | atomicMinFloat< float > (float *address, float val) |
|
template<> |
__device__ double | atomicMinFloat< double > (double *address, double val) |
|
template<typename T > |
__global__ void | gs_unpack_min_kernel (T *__restrict__ u, const T *__restrict__ buf, const int32_t *__restrict__ dof, const int n) |
|
template<typename T > |
__device__ T | atomicMaxFloat (T *address, T val) |
|
template<> |
__device__ float | atomicMaxFloat< float > (float *address, float val) |
|
template<> |
__device__ double | atomicMaxFloat< double > (double *address, double val) |
|
template<typename T > |
__global__ void | gs_unpack_max_kernel (T *__restrict__ u, const T *__restrict__ buf, const int32_t *__restrict__ dof, const int n) |
|