Macros | |
#define | BLOCK 512 |
#define | BLOCK_TRANSPOSE32 256 |
#define | CHECK_CUBLAS(X) cublas_check_error_extended(X, __FILE__, __func__, __LINE__ ); |
#define | CHECK_CUDA(X) check_cuda_error_extended(X, __FILE__, __func__, __LINE__ ); |
#define | CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__, __func__, __LINE__); |
#define | FULL_MASK 0xffffffff |
#define | WARP_SIZE 32 |
Enumerations | |
enum | { cudnn_fastest , cudnn_smallest , cudnn_specify } |
Functions | |
cublasHandle_t | blas_handle () |
void | check_cuda_error (cudaError_t status, const char *const filename, const char *const funcname, const int line) |
void | check_cuda_error_extended (cudaError_t status, const char *const filename, const char *const funcname, const int line) |
void | cublas_check_error_extended (cublasStatus_t status, const char *const filename, const char *const funcname, const int line) |
float | cuda_compare (float *x_gpu, float *x, size_t n, char *s) |
void | cuda_free (float *x_gpu) |
void | cuda_free_host (float *x_cpu) |
int | cuda_get_device () |
dim3 | cuda_gridsize (size_t n) |
float * | cuda_make_array (float *x, size_t n) |
Allocate memory on the GPU. | |
float * | cuda_make_array_pinned (float *x, size_t n) |
float * | cuda_make_array_pinned_preallocated (float *x, size_t n) |
void ** | cuda_make_array_pointers (void **x, size_t n) |
int * | cuda_make_int_array (size_t n) |
int * | cuda_make_int_array_new_api (int *x, size_t n) |
void | cuda_push_array (float *x_gpu, float *x, size_t n) |
void | cuda_random (float *x_gpu, size_t n) |
void | cudnn_check_error_extended (cudnnStatus_t status, const char *const filename, const char *const function, const int line) |
cudnnHandle_t | cudnn_handle () |
void | free_pinned_memory () |
cudaStream_t | get_cuda_stream () |
int | get_gpu_compute_capability (int i, char *device_name) |
int | get_number_of_blocks (int array_size, int block_size) |
void | pre_allocate_pinned_memory (size_t size) |
void | reset_wait_stream_events () |
void | show_cuda_cudnn_info () |
cudaStream_t | switch_stream (int i) |
void | wait_stream (int i) |
Variables | |
int | cuda_debug_sync |
#define BLOCK 512 |
#define BLOCK_TRANSPOSE32 256 |
#define CHECK_CUBLAS | ( | X | ) | cublas_check_error_extended(X, __FILE__, __func__, __LINE__ ); |
#define CHECK_CUDA | ( | X | ) | check_cuda_error_extended(X, __FILE__, __func__, __LINE__ ); |
#define CHECK_CUDNN | ( | X | ) | cudnn_check_error_extended(X, __FILE__, __func__, __LINE__); |
#define FULL_MASK 0xffffffff |
#define WARP_SIZE 32 |
cublasHandle_t blas_handle | ( | ) |
void check_cuda_error | ( | cudaError_t | status, |
const char *const | filename, | ||
const char *const | funcname, | ||
const int | line | ||
) |
void check_cuda_error_extended | ( | cudaError_t | status, |
const char *const | filename, | ||
const char *const | funcname, | ||
const int | line | ||
) |
void cublas_check_error_extended | ( | cublasStatus_t | status, |
const char *const | filename, | ||
const char *const | funcname, | ||
const int | line | ||
) |
float cuda_compare | ( | float * | x_gpu, |
float * | x, | ||
size_t | n, | ||
char * | s | ||
) |
void cuda_free | ( | float * | x_gpu | ) |
void cuda_free_host | ( | float * | x_cpu | ) |
int cuda_get_device | ( | ) |
dim3 cuda_gridsize | ( | size_t | n | ) |
float * cuda_make_array | ( | float * | x, |
size_t | n | ||
) |
Allocate memory on the GPU.
If x
is not null, then copy the given floats from the host pointer.
float * cuda_make_array_pinned | ( | float * | x, |
size_t | n | ||
) |
float * cuda_make_array_pinned_preallocated | ( | float * | x, |
size_t | n | ||
) |
void ** cuda_make_array_pointers | ( | void ** | x, |
size_t | n | ||
) |
int * cuda_make_int_array | ( | size_t | n | ) |
int * cuda_make_int_array_new_api | ( | int * | x, |
size_t | n | ||
) |
void cuda_push_array | ( | float * | x_gpu, |
float * | x, | ||
size_t | n | ||
) |
void cuda_random | ( | float * | x_gpu, |
size_t | n | ||
) |
void cudnn_check_error_extended | ( | cudnnStatus_t | status, |
const char *const | filename, | ||
const char *const | function, | ||
const int | line | ||
) |
cudnnHandle_t cudnn_handle | ( | ) |
void free_pinned_memory | ( | ) |
cudaStream_t get_cuda_stream | ( | ) |
int get_gpu_compute_capability | ( | int | i, |
char * | device_name | ||
) |
int get_number_of_blocks | ( | int | array_size, |
int | block_size | ||
) |
void pre_allocate_pinned_memory | ( | size_t | size | ) |
void reset_wait_stream_events | ( | ) |
void show_cuda_cudnn_info | ( | ) |
cudaStream_t switch_stream | ( | int | i | ) |
void wait_stream | ( | int | i | ) |