Functions | |
void | convolve_bin_gpu (float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad, int new_lda, float *mean_arr_gpu) |
void | convolve_gpu (float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad) |
void | fill_int8_gpu (unsigned char *src, unsigned char val, size_t size) |
void | float_to_bit_gpu (float *src, unsigned char *dst, size_t size) |
void | gemm_nn_custom_bin_mean_transposed_gpu (int M, int N, int K, unsigned char *A, int lda, unsigned char *B, int ldb, float *C, int ldc, float *mean_arr, float *bias, int leaky_activation, float *shortcut_in_gpu, float *shortcut_out_gpu) |
void | im2col_align_bin_ongpu (float *im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col, int bit_align) |
void | im2col_align_ongpu (float *im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col, int bit_align) |
void | im2col_cpu (float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col) |
void | im2col_cpu_ext (const float *data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, float *data_col) |
float | im2col_get_pixel (float *im, int height, int width, int channels, int row, int col, int channel, int pad) |
void | im2col_gpu_ext (const float *data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, float *data_col) |
void | im2col_ongpu (float *im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col) |
void | repack_input_gpu (float *input, float *re_packed_input, int w, int h, int c) |
void | repack_input_gpu_bin (float *input, uint32_t *re_packed_input_bin, int w, int h, int c) |
void | transpose_bin_gpu (unsigned char *A, unsigned char *B, const int n, const int m, const int lda, const int ldb, const int block_size) |
void | transpose_uint32_gpu (uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align) |
void convolve_bin_gpu | ( | float * | input, |
float * | weights, | ||
float * | output, | ||
int | in_w, | ||
int | in_h, | ||
int | in_c, | ||
int | n, | ||
int | size, | ||
int | pad, | ||
int | new_lda, | ||
float * | mean_arr_gpu | ||
) |
void convolve_gpu | ( | float * | input, |
float * | weights, | ||
float * | output, | ||
int | in_w, | ||
int | in_h, | ||
int | in_c, | ||
int | n, | ||
int | size, | ||
int | pad | ||
) |
void fill_int8_gpu | ( | unsigned char * | src, |
unsigned char | val, | ||
size_t | size | ||
) |
void float_to_bit_gpu | ( | float * | src, |
unsigned char * | dst, | ||
size_t | size | ||
) |
void gemm_nn_custom_bin_mean_transposed_gpu | ( | int | M, |
int | N, | ||
int | K, | ||
unsigned char * | A, | ||
int | lda, | ||
unsigned char * | B, | ||
int | ldb, | ||
float * | C, | ||
int | ldc, | ||
float * | mean_arr, | ||
float * | bias, | ||
int | leaky_activation, | ||
float * | shortcut_in_gpu, | ||
float * | shortcut_out_gpu | ||
) |
void im2col_align_bin_ongpu | ( | float * | im, |
int | channels, | ||
int | height, | ||
int | width, | ||
int | ksize, | ||
int | stride, | ||
int | pad, | ||
float * | data_col, | ||
int | bit_align | ||
) |
void im2col_align_ongpu | ( | float * | im, |
int | channels, | ||
int | height, | ||
int | width, | ||
int | ksize, | ||
int | stride, | ||
int | pad, | ||
float * | data_col, | ||
int | bit_align | ||
) |
void im2col_cpu | ( | float * | data_im, |
int | channels, | ||
int | height, | ||
int | width, | ||
int | ksize, | ||
int | stride, | ||
int | pad, | ||
float * | data_col | ||
) |
void im2col_cpu_ext | ( | const float * | data_im, |
const int | channels, | ||
const int | height, | ||
const int | width, | ||
const int | kernel_h, | ||
const int | kernel_w, | ||
const int | pad_h, | ||
const int | pad_w, | ||
const int | stride_h, | ||
const int | stride_w, | ||
const int | dilation_h, | ||
const int | dilation_w, | ||
float * | data_col | ||
) |
float im2col_get_pixel | ( | float * | im, |
int | height, | ||
int | width, | ||
int | channels, | ||
int | row, | ||
int | col, | ||
int | channel, | ||
int | pad | ||
) |
void im2col_gpu_ext | ( | const float * | data_im, |
const int | channels, | ||
const int | height, | ||
const int | width, | ||
const int | kernel_h, | ||
const int | kernel_w, | ||
const int | pad_h, | ||
const int | pad_w, | ||
const int | stride_h, | ||
const int | stride_w, | ||
const int | dilation_h, | ||
const int | dilation_w, | ||
float * | data_col | ||
) |
void im2col_ongpu | ( | float * | im, |
int | channels, | ||
int | height, | ||
int | width, | ||
int | ksize, | ||
int | stride, | ||
int | pad, | ||
float * | data_col | ||
) |
void repack_input_gpu | ( | float * | input, |
float * | re_packed_input, | ||
int | w, | ||
int | h, | ||
int | c | ||
) |
void repack_input_gpu_bin | ( | float * | input, |
uint32_t * | re_packed_input_bin, | ||
int | w, | ||
int | h, | ||
int | c | ||
) |
void transpose_bin_gpu | ( | unsigned char * | A, |
unsigned char * | B, | ||
const int | n, | ||
const int | m, | ||
const int | lda, | ||
const int | ldb, | ||
const int | block_size | ||
) |
void transpose_uint32_gpu | ( | uint32_t * | src, |
uint32_t * | dst, | ||
int | src_h, | ||
int | src_w, | ||
int | src_align, | ||
int | dst_align | ||
) |