Darknet/YOLO v3.0-208-g0b6f60f-dirty
Object Detection Framework
 
Loading...
Searching...
No Matches
dark_cuda.cpp File Reference
Include dependency graph for dark_cuda.cpp:

Macros

#define cudaEventWaitDefault   0x00
 

Functions

cublasHandle_t blas_handle ()
 
void check_cuda_error (cudaError_t status, const char *const filename, const char *const funcname, const int line)
 
void check_cuda_error_extended (cudaError_t status, const char *const filename, const char *const funcname, const int line)
 
void cublas_check_error (cublasStatus_t status)
 
void cublas_check_error_extended (cublasStatus_t status, const char *const filename, const char *const function, const int line)
 
float cuda_compare (float *x_gpu, float *x, size_t n, char *s)
 
void cuda_free (float *x_gpu)
 
void cuda_free_host (float *x_cpu)
 
void * cuda_get_context ()
 
int cuda_get_device ()
 
dim3 cuda_gridsize (size_t n)
 
float * cuda_make_array (float *x, size_t n)
 Allocate memory on the GPU.
 
float * cuda_make_array_pinned (float *x, size_t n)
 
float * cuda_make_array_pinned_preallocated (float *x, size_t n)
 
void ** cuda_make_array_pointers (void **x, size_t n)
 
int * cuda_make_int_array (size_t n)
 
int * cuda_make_int_array_new_api (int *x, size_t n)
 
void cuda_pull_array (float *x_gpu, float *x, size_t n)
 
void cuda_pull_array_async (float *x_gpu, float *x, size_t n)
 
void cuda_push_array (float *x_gpu, float *x, size_t n)
 
void cuda_random (float *x_gpu, size_t n)
 
void cuda_set_device (int n)
 This is part of the original C API. This function does nothing when Darknet was built to run on the CPU.
 
void cudnn_check_error (cudnnStatus_t status, const char *const filename, const char *const function, const int line)
 
void cudnn_check_error_extended (cudnnStatus_t status, const char *const filename, const char *const function, const int line)
 
cudnnHandle_t cudnn_handle ()
 
void free_pinned_memory ()
 
cudaStream_t get_cuda_stream ()
 
int get_gpu_compute_capability (int i, char *device_name)
 
int get_number_of_blocks (int array_size, int block_size)
 
void pre_allocate_pinned_memory (const size_t size)
 
void reset_wait_stream_events ()
 
void show_cuda_cudnn_info ()
 
cudaStream_t switch_stream (int i)
 
void wait_stream (int i)
 

Variables

static cublasHandle_t blasHandle [16]
 
static int blasInit [16] = { 0 }
 
int cuda_debug_sync = 0
 
static cudnnHandle_t cudnnHandle [16]
 
static int cudnnInit [16] = { 0 }
 
static volatile int event_counter = 0
 
static const int max_events = 1024
 
static int streamInit [16] = { 0 }
 
static cudaStream_t streamsArray [16]
 
static cudnnHandle_t switchCudnnHandle [16]
 
static int switchCudnnInit [16]
 
static cudaEvent_t switchEventsArray [1024]
 
static int switchStreamInit [16] = { 0 }
 
static cudaStream_t switchStreamsArray [16]
 

Macro Definition Documentation

◆ cudaEventWaitDefault

#define cudaEventWaitDefault   0x00

Function Documentation

◆ blas_handle()

cublasHandle_t blas_handle ( )
Here is the call graph for this function:
Here is the caller graph for this function:

◆ check_cuda_error()

void check_cuda_error ( cudaError_t  status,
const char *const  filename,
const char *const  funcname,
const int  line 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ check_cuda_error_extended()

void check_cuda_error_extended ( cudaError_t  status,
const char *const  filename,
const char *const  funcname,
const int  line 
)
Here is the call graph for this function:

◆ cublas_check_error()

void cublas_check_error ( cublasStatus_t  status)
Here is the caller graph for this function:

◆ cublas_check_error_extended()

void cublas_check_error_extended ( cublasStatus_t  status,
const char *const  filename,
const char *const  function,
const int  line 
)
Here is the call graph for this function:

◆ cuda_compare()

float cuda_compare ( float *  x_gpu,
float *  x,
size_t  n,
char *  s 
)
Here is the call graph for this function:

◆ cuda_free()

void cuda_free ( float *  x_gpu)

◆ cuda_free_host()

void cuda_free_host ( float *  x_cpu)
Here is the caller graph for this function:

◆ cuda_get_context()

void * cuda_get_context ( )

◆ cuda_get_device()

int cuda_get_device ( )
Here is the caller graph for this function:

◆ cuda_gridsize()

dim3 cuda_gridsize ( size_t  n)

◆ cuda_make_array()

float * cuda_make_array ( float *  x,
size_t  n 
)

Allocate memory on the GPU.

If x is not null, then copy the given floats from the host pointer.

Returns
a pointer to the CUDA memory allocation.
Warning
The copy is asynchronous and may not have finished when this function returns!
Here is the call graph for this function:

◆ cuda_make_array_pinned()

float * cuda_make_array_pinned ( float *  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_make_array_pinned_preallocated()

float * cuda_make_array_pinned_preallocated ( float *  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_make_array_pointers()

void ** cuda_make_array_pointers ( void **  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_make_int_array()

int * cuda_make_int_array ( size_t  n)
Here is the caller graph for this function:

◆ cuda_make_int_array_new_api()

int * cuda_make_int_array_new_api ( int *  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_pull_array()

void cuda_pull_array ( float *  x_gpu,
float *  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_pull_array_async()

void cuda_pull_array_async ( float *  x_gpu,
float *  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_push_array()

void cuda_push_array ( float *  x_gpu,
float *  x,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_random()

void cuda_random ( float *  x_gpu,
size_t  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cuda_set_device()

void cuda_set_device ( int  n)

This is part of the original C API. This function does nothing when Darknet was built to run on the CPU.

Here is the caller graph for this function:

◆ cudnn_check_error()

void cudnn_check_error ( cudnnStatus_t  status,
const char *const  filename,
const char *const  function,
const int  line 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cudnn_check_error_extended()

void cudnn_check_error_extended ( cudnnStatus_t  status,
const char *const  filename,
const char *const  function,
const int  line 
)
Here is the call graph for this function:

◆ cudnn_handle()

cudnnHandle_t cudnn_handle ( )
Here is the call graph for this function:
Here is the caller graph for this function:

◆ free_pinned_memory()

void free_pinned_memory ( )
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_cuda_stream()

cudaStream_t get_cuda_stream ( )
Here is the call graph for this function:

◆ get_gpu_compute_capability()

int get_gpu_compute_capability ( int  i,
char *  device_name 
)
Here is the caller graph for this function:

◆ get_number_of_blocks()

int get_number_of_blocks ( int  array_size,
int  block_size 
)

◆ pre_allocate_pinned_memory()

void pre_allocate_pinned_memory ( const size_t  size)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reset_wait_stream_events()

void reset_wait_stream_events ( )
Here is the caller graph for this function:

◆ show_cuda_cudnn_info()

void show_cuda_cudnn_info ( )
Here is the call graph for this function:
Here is the caller graph for this function:

◆ switch_stream()

cudaStream_t switch_stream ( int  i)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ wait_stream()

void wait_stream ( int  i)
Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ blasHandle

cublasHandle_t blasHandle[16]
static

◆ blasInit

int blasInit[16] = { 0 }
static

◆ cuda_debug_sync

int cuda_debug_sync = 0
Todo:
V3 is cuda_debug_sync still necessary?

◆ cudnnHandle

cudnnHandle_t cudnnHandle[16]
static

◆ cudnnInit

int cudnnInit[16] = { 0 }
static

◆ event_counter

volatile int event_counter = 0
static

◆ max_events

const int max_events = 1024
static

◆ streamInit

int streamInit[16] = { 0 }
static

◆ streamsArray

cudaStream_t streamsArray[16]
static

◆ switchCudnnHandle

cudnnHandle_t switchCudnnHandle[16]
static

◆ switchCudnnInit

int switchCudnnInit[16]
static

◆ switchEventsArray

cudaEvent_t switchEventsArray[1024]
static

◆ switchStreamInit

int switchStreamInit[16] = { 0 }
static

◆ switchStreamsArray

cudaStream_t switchStreamsArray[16]
static