Macros | |
#define | cudaEventWaitDefault 0x00 |
Functions | |
cublasHandle_t | blas_handle () |
void | check_cuda_error (cudaError_t status, const char *const filename, const char *const funcname, const int line) |
void | check_cuda_error_extended (cudaError_t status, const char *const filename, const char *const funcname, const int line) |
void | cublas_check_error (cublasStatus_t status) |
void | cublas_check_error_extended (cublasStatus_t status, const char *const filename, const char *const function, const int line) |
float | cuda_compare (float *x_gpu, float *x, size_t n, char *s) |
void | cuda_free (float *x_gpu) |
void | cuda_free_host (float *x_cpu) |
void * | cuda_get_context () |
int | cuda_get_device () |
dim3 | cuda_gridsize (size_t n) |
float * | cuda_make_array (float *x, size_t n) |
Allocate memory on the GPU. | |
float * | cuda_make_array_pinned (float *x, size_t n) |
float * | cuda_make_array_pinned_preallocated (float *x, size_t n) |
void ** | cuda_make_array_pointers (void **x, size_t n) |
int * | cuda_make_int_array (size_t n) |
int * | cuda_make_int_array_new_api (int *x, size_t n) |
void | cuda_pull_array (float *x_gpu, float *x, size_t n) |
void | cuda_pull_array_async (float *x_gpu, float *x, size_t n) |
void | cuda_push_array (float *x_gpu, float *x, size_t n) |
void | cuda_random (float *x_gpu, size_t n) |
void | cuda_set_device (int n) |
This is part of the original C API. This function does nothing when Darknet was built to run on the CPU. | |
void | cudnn_check_error (cudnnStatus_t status, const char *const filename, const char *const function, const int line) |
void | cudnn_check_error_extended (cudnnStatus_t status, const char *const filename, const char *const function, const int line) |
cudnnHandle_t | cudnn_handle () |
void | free_pinned_memory () |
cudaStream_t | get_cuda_stream () |
int | get_gpu_compute_capability (int i, char *device_name) |
int | get_number_of_blocks (int array_size, int block_size) |
void | pre_allocate_pinned_memory (const size_t size) |
void | reset_wait_stream_events () |
void | show_cuda_cudnn_info () |
cudaStream_t | switch_stream (int i) |
void | wait_stream (int i) |
Variables | |
static cublasHandle_t | blasHandle [16] |
static int | blasInit [16] = { 0 } |
int | cuda_debug_sync = 0 |
static cudnnHandle_t | cudnnHandle [16] |
static int | cudnnInit [16] = { 0 } |
static volatile int | event_counter = 0 |
static const int | max_events = 1024 |
static int | streamInit [16] = { 0 } |
static cudaStream_t | streamsArray [16] |
static cudnnHandle_t | switchCudnnHandle [16] |
static int | switchCudnnInit [16] |
static cudaEvent_t | switchEventsArray [1024] |
static int | switchStreamInit [16] = { 0 } |
static cudaStream_t | switchStreamsArray [16] |
#define cudaEventWaitDefault 0x00 |
cublasHandle_t blas_handle | ( | ) |
void check_cuda_error | ( | cudaError_t | status, |
const char *const | filename, | ||
const char *const | funcname, | ||
const int | line | ||
) |
void check_cuda_error_extended | ( | cudaError_t | status, |
const char *const | filename, | ||
const char *const | funcname, | ||
const int | line | ||
) |
void cublas_check_error | ( | cublasStatus_t | status | ) |
void cublas_check_error_extended | ( | cublasStatus_t | status, |
const char *const | filename, | ||
const char *const | function, | ||
const int | line | ||
) |
float cuda_compare | ( | float * | x_gpu, |
float * | x, | ||
size_t | n, | ||
char * | s | ||
) |
void cuda_free | ( | float * | x_gpu | ) |
void cuda_free_host | ( | float * | x_cpu | ) |
void * cuda_get_context | ( | ) |
int cuda_get_device | ( | ) |
dim3 cuda_gridsize | ( | size_t | n | ) |
float * cuda_make_array | ( | float * | x, |
size_t | n | ||
) |
Allocate memory on the GPU.
If x
is not null, then copy the given floats from the host pointer.
float * cuda_make_array_pinned | ( | float * | x, |
size_t | n | ||
) |
float * cuda_make_array_pinned_preallocated | ( | float * | x, |
size_t | n | ||
) |
void ** cuda_make_array_pointers | ( | void ** | x, |
size_t | n | ||
) |
int * cuda_make_int_array | ( | size_t | n | ) |
int * cuda_make_int_array_new_api | ( | int * | x, |
size_t | n | ||
) |
void cuda_pull_array | ( | float * | x_gpu, |
float * | x, | ||
size_t | n | ||
) |
void cuda_pull_array_async | ( | float * | x_gpu, |
float * | x, | ||
size_t | n | ||
) |
void cuda_push_array | ( | float * | x_gpu, |
float * | x, | ||
size_t | n | ||
) |
void cuda_random | ( | float * | x_gpu, |
size_t | n | ||
) |
void cuda_set_device | ( | int | n | ) |
This is part of the original C
API. This function does nothing when Darknet was built to run on the CPU.
void cudnn_check_error | ( | cudnnStatus_t | status, |
const char *const | filename, | ||
const char *const | function, | ||
const int | line | ||
) |
void cudnn_check_error_extended | ( | cudnnStatus_t | status, |
const char *const | filename, | ||
const char *const | function, | ||
const int | line | ||
) |
cudnnHandle_t cudnn_handle | ( | ) |
void free_pinned_memory | ( | ) |
cudaStream_t get_cuda_stream | ( | ) |
int get_gpu_compute_capability | ( | int | i, |
char * | device_name | ||
) |
int get_number_of_blocks | ( | int | array_size, |
int | block_size | ||
) |
void pre_allocate_pinned_memory | ( | const size_t | size | ) |
void reset_wait_stream_events | ( | ) |
void show_cuda_cudnn_info | ( | ) |
cudaStream_t switch_stream | ( | int | i | ) |
void wait_stream | ( | int | i | ) |
|
static |
|
static |
int cuda_debug_sync = 0 |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |