Darknet/YOLO v3.0-208-g0b6f60f-dirty
Object Detection Framework
 
Loading...
Searching...
No Matches
dark_cuda.hpp File Reference
Include dependency graph for dark_cuda.hpp:
This graph shows which files directly or indirectly include this file:

Macros

#define BLOCK   512
 
#define BLOCK_TRANSPOSE32   256
 
#define CHECK_CUBLAS(X)   cublas_check_error_extended(X, __FILE__, __func__, __LINE__ );
 
#define CHECK_CUDA(X)   check_cuda_error_extended(X, __FILE__, __func__, __LINE__ );
 
#define CHECK_CUDNN(X)   cudnn_check_error_extended(X, __FILE__, __func__, __LINE__);
 
#define FULL_MASK   0xffffffff
 
#define WARP_SIZE   32
 

Enumerations

enum  {
  cudnn_fastest ,
  cudnn_smallest ,
  cudnn_specify
}
 

Functions

cublasHandle_t blas_handle ()
 
void check_cuda_error (cudaError_t status, const char *const filename, const char *const funcname, const int line)
 
void check_cuda_error_extended (cudaError_t status, const char *const filename, const char *const funcname, const int line)
 
void cublas_check_error_extended (cublasStatus_t status, const char *const filename, const char *const funcname, const int line)
 
float cuda_compare (float *x_gpu, float *x, size_t n, char *s)
 
void cuda_free (float *x_gpu)
 
void cuda_free_host (float *x_cpu)
 
int cuda_get_device ()
 
dim3 cuda_gridsize (size_t n)
 
float * cuda_make_array (float *x, size_t n)
 Allocate memory on the GPU.
 
float * cuda_make_array_pinned (float *x, size_t n)
 
float * cuda_make_array_pinned_preallocated (float *x, size_t n)
 
void ** cuda_make_array_pointers (void **x, size_t n)
 
int * cuda_make_int_array (size_t n)
 
int * cuda_make_int_array_new_api (int *x, size_t n)
 
void cuda_push_array (float *x_gpu, float *x, size_t n)
 
void cuda_random (float *x_gpu, size_t n)
 
void cudnn_check_error_extended (cudnnStatus_t status, const char *const filename, const char *const function, const int line)
 
cudnnHandle_t cudnn_handle ()
 
void free_pinned_memory ()
 
cudaStream_t get_cuda_stream ()
 
int get_gpu_compute_capability (int i, char *device_name)
 
int get_number_of_blocks (int array_size, int block_size)
 
void pre_allocate_pinned_memory (size_t size)
 
void reset_wait_stream_events ()
 
void show_cuda_cudnn_info ()
 
cudaStream_t switch_stream (int i)
 
void wait_stream (int i)
 

Variables

int cuda_debug_sync
 

Macro Definition Documentation

◆ BLOCK

#define BLOCK   512
Todo:
What is this? See where it is used in all the .cu files.

◆ BLOCK_TRANSPOSE32

#define BLOCK_TRANSPOSE32   256

◆ CHECK_CUBLAS

#define CHECK_CUBLAS (   X)    cublas_check_error_extended(X, __FILE__, __func__, __LINE__ );

◆ CHECK_CUDA

#define CHECK_CUDA (   X)    check_cuda_error_extended(X, __FILE__, __func__, __LINE__ );

◆ CHECK_CUDNN

#define CHECK_CUDNN (   X)    cudnn_check_error_extended(X, __FILE__, __func__, __LINE__);

◆ FULL_MASK

#define FULL_MASK   0xffffffff

◆ WARP_SIZE

#define WARP_SIZE   32

Enumeration Type Documentation

◆ anonymous enum

anonymous enum
Enumerator
cudnn_fastest 
cudnn_smallest 
cudnn_specify 

Function Documentation

◆ blas_handle()

cublasHandle_t blas_handle ( )
Here is the call graph for this function:
Here is the caller graph for this function:

◆ check_cuda_error()

void check_cuda_error ( cudaError_t  status,
const char *const  filename,
const char *const  funcname,
const int  line 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ check_cuda_error_extended()

void check_cuda_error_extended ( cudaError_t  status,
const char *const  filename,
const char *const  funcname,
const int  line 
)
Here is the call graph for this function:

◆ cublas_check_error_extended()

void cublas_check_error_extended ( cublasStatus_t  status,
const char *const  filename,
const char *const  funcname,
const int  line 
)
Here is the call graph for this function:

◆ cuda_compare()

float cuda_compare ( float *  x_gpu,
float *  x,
size_t  n,
char *  s 
)
Here is the call graph for this function:

◆ cuda_free()

void cuda_free ( float *  x_gpu)

◆ cuda_free_host()

void cuda_free_host ( float *  x_cpu)
Here is the caller graph for this function:

◆ cuda_get_device()

int cuda_get_device ( )
Here is the caller graph for this function:

◆ cuda_gridsize()

dim3 cuda_gridsize ( size_t  n)

◆ cuda_make_array()

float * cuda_make_array ( float *  x,
size_t  n 
)

Allocate memory on the GPU.

If x is not null, then copy the given floats from the host pointer.

Returns
a pointer to the CUDA memory allocation.
Warning
The copy is asynchronous and may not have finished when this function returns!
Here is the call graph for this function:

◆ cuda_make_array_pinned()

float * cuda_make_array_pinned ( float *  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_make_array_pinned_preallocated()

float * cuda_make_array_pinned_preallocated ( float *  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_make_array_pointers()

void ** cuda_make_array_pointers ( void **  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_make_int_array()

int * cuda_make_int_array ( size_t  n)
Here is the caller graph for this function:

◆ cuda_make_int_array_new_api()

int * cuda_make_int_array_new_api ( int *  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_push_array()

void cuda_push_array ( float *  x_gpu,
float *  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_random()

void cuda_random ( float *  x_gpu,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cudnn_check_error_extended()

void cudnn_check_error_extended ( cudnnStatus_t  status,
const char *const  filename,
const char *const  function,
const int  line 
)
Here is the call graph for this function:

◆ cudnn_handle()

cudnnHandle_t cudnn_handle ( )
Here is the call graph for this function:
Here is the caller graph for this function:

◆ free_pinned_memory()

void free_pinned_memory ( )
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_cuda_stream()

cudaStream_t get_cuda_stream ( )
Here is the call graph for this function:

◆ get_gpu_compute_capability()

int get_gpu_compute_capability ( int  i,
char *  device_name 
)
Here is the caller graph for this function:

◆ get_number_of_blocks()

int get_number_of_blocks ( int  array_size,
int  block_size 
)

◆ pre_allocate_pinned_memory()

void pre_allocate_pinned_memory ( size_t  size)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reset_wait_stream_events()

void reset_wait_stream_events ( )
Here is the caller graph for this function:

◆ show_cuda_cudnn_info()

void show_cuda_cudnn_info ( )
Here is the call graph for this function:
Here is the caller graph for this function:

◆ switch_stream()

cudaStream_t switch_stream ( int  i)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ wait_stream()

void wait_stream ( int  i)
Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ cuda_debug_sync

int cuda_debug_sync
extern
Todo:
V3 is this still needed?
Todo:
V3 is cuda_debug_sync still necessary?