Darknet/YOLO v3.0-177-gfa2353b
Object Detection Framework
 
Loading...
Searching...
No Matches
gemm.cpp File Reference

General matrix multiplication (GEMM) More...

Include dependency graph for gemm.cpp:

Macros

#define PUT_IN_REGISTER   register
 
#define TILE_K   16
 
#define TILE_M   4
 
#define TILE_N   16
 

Functions

void activate_array_cpu_custom (float *x, const int n, const ACTIVATION a)
 
void binary_int32_printf (uint32_t src)
 
void binary_int64_printf (uint64_t src)
 
void convolution_2d (int w, int h, int ksize, int n, int c, int pad, int stride, float *weights, float *input, float *output, float *mean)
 
void convolution_repacked (uint32_t *packed_input, uint32_t *packed_weights, float *output, int w, int h, int c, int n, int size, int pad, int new_lda, float *mean_arr)
 
static uint32_t fill_bit_int32 (char src)
 
static uint64_t fill_bit_int64 (char src)
 
void float_to_bit (float *src, unsigned char *dst, size_t size)
 
void forward_maxpool_layer_avx (float *src, float *dst, int *indexes, int size, int w, int h, int out_w, int out_h, int c, int pad, int stride, int batch)
 
void gemm (int TA, int TB, int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float BETA, float *C, int ldc)
 
void gemm_bin (int M, int N, int K, float ALPHA, char *A, int lda, float *B, int ldb, float *C, int ldc)
 
void gemm_cpu (int TA, int TB, int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float BETA, float *C, int ldc)
 
void gemm_gpu (int TA, int TB, int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float BETA, float *C, int ldc)
 
void gemm_nn (int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float *C, int ldc)
 
void gemm_nn_bin_32bit_packed (int M, int N, int K, float ALPHA, uint32_t *A, int lda, uint32_t *B, int ldb, float *C, int ldc, float *mean_arr)
 
void gemm_nn_bin_transposed_32bit_packed (int M, int N, int K, float ALPHA, uint32_t *A, int lda, uint32_t *B, int ldb, float *C, int ldc, float *mean_arr)
 
void gemm_nn_custom_bin_mean_transposed (int M, int N, int K, float ALPHA_UNUSED, unsigned char *A, int lda, unsigned char *B, int ldb, float *C, int ldc, float *mean_arr)
 
void gemm_nn_fast (int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float *C, int ldc)
 
void gemm_nt (int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float *C, int ldc)
 
void gemm_ongpu (int TA, int TB, int M, int N, int K, float ALPHA, float *A_gpu, int lda, float *B_gpu, int ldb, float BETA, float *C_gpu, int ldc)
 
void gemm_tn (int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float *C, int ldc)
 
void gemm_tt (int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float *C, int ldc)
 
static uint32_t get_bit_int32 (uint32_t const *const src, size_t index)
 
void im2col_cpu_custom (float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col)
 
void im2col_cpu_custom_bin (float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col, int bit_align)
 
void im2col_cpu_custom_transpose (float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col, int ldb_align)
 
void init_cpu ()
 
int is_avx ()
 
int is_fma_avx2 ()
 
float * random_matrix (int rows, int cols)
 
void repack_input (float *input, float *re_packed_input, int w, int h, int c)
 
void test_gpu_accuracy (int TA, int TB, int m, int k, int n)
 
int test_gpu_blas ()
 
void time_gpu_random_matrix (int TA, int TB, int m, int k, int n)
 
void time_ongpu (int TA, int TB, int m, int k, int n)
 
void time_random_matrix (int TA, int TB, int m, int k, int n)
 
void transpose_bin (uint32_t *A, uint32_t *B, const int n, const int m, const int lda, const int ldb, const int block_size)
 
void transpose_block_SSE4x4 (float *A, float *B, const int n, const int m, const int lda, const int ldb, const int block_size)
 
static void transpose_scalar_block (float *A, float *B, const int lda, const int ldb, const int block_size)
 
void transpose_uint32 (uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align)
 
static unsigned char xnor (unsigned char a, unsigned char b)
 
static uint32_t xnor_int32 (uint32_t a, uint32_t b)
 
static uint64_t xnor_int64 (uint64_t a, uint64_t b)
 

Detailed Description

General matrix multiplication (GEMM)

Macro Definition Documentation

◆ PUT_IN_REGISTER

#define PUT_IN_REGISTER   register

◆ TILE_K

#define TILE_K   16

◆ TILE_M

#define TILE_M   4
Todo:
V3 Would be nice to know where this file came from, and to see if there are updates available.

◆ TILE_N

#define TILE_N   16

Function Documentation

◆ activate_array_cpu_custom()

void activate_array_cpu_custom ( float *  x,
const int  n,
const ACTIVATION  a 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ binary_int32_printf()

void binary_int32_printf ( uint32_t  src)

◆ binary_int64_printf()

void binary_int64_printf ( uint64_t  src)

◆ convolution_2d()

void convolution_2d ( int  w,
int  h,
int  ksize,
int  n,
int  c,
int  pad,
int  stride,
float *  weights,
float *  input,
float *  output,
float *  mean 
)

◆ convolution_repacked()

void convolution_repacked ( uint32_t *  packed_input,
uint32_t *  packed_weights,
float *  output,
int  w,
int  h,
int  c,
int  n,
int  size,
int  pad,
int  new_lda,
float *  mean_arr 
)

◆ fill_bit_int32()

static uint32_t fill_bit_int32 ( char  src)
inlinestatic

◆ fill_bit_int64()

static uint64_t fill_bit_int64 ( char  src)
inlinestatic

◆ float_to_bit()

void float_to_bit ( float *  src,
unsigned char *  dst,
size_t  size 
)
Here is the caller graph for this function:

◆ forward_maxpool_layer_avx()

void forward_maxpool_layer_avx ( float *  src,
float *  dst,
int *  indexes,
int  size,
int  w,
int  h,
int  out_w,
int  out_h,
int  c,
int  pad,
int  stride,
int  batch 
)
Here is the caller graph for this function:

◆ gemm()

void gemm ( int  TA,
int  TB,
int  M,
int  N,
int  K,
float  ALPHA,
float *  A,
int  lda,
float *  B,
int  ldb,
float  BETA,
float *  C,
int  ldc 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gemm_bin()

void gemm_bin ( int  M,
int  N,
int  K,
float  ALPHA,
char *  A,
int  lda,
float *  B,
int  ldb,
float *  C,
int  ldc 
)

◆ gemm_cpu()

void gemm_cpu ( int  TA,
int  TB,
int  M,
int  N,
int  K,
float  ALPHA,
float *  A,
int  lda,
float *  B,
int  ldb,
float  BETA,
float *  C,
int  ldc 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gemm_gpu()

void gemm_gpu ( int  TA,
int  TB,
int  M,
int  N,
int  K,
float  ALPHA,
float *  A,
int  lda,
float *  B,
int  ldb,
float  BETA,
float *  C,
int  ldc 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gemm_nn()

void gemm_nn ( int  M,
int  N,
int  K,
float  ALPHA,
float *  A,
int  lda,
float *  B,
int  ldb,
float *  C,
int  ldc 
)
Here is the caller graph for this function:

◆ gemm_nn_bin_32bit_packed()

void gemm_nn_bin_32bit_packed ( int  M,
int  N,
int  K,
float  ALPHA,
uint32_t *  A,
int  lda,
uint32_t *  B,
int  ldb,
float *  C,
int  ldc,
float *  mean_arr 
)

◆ gemm_nn_bin_transposed_32bit_packed()

void gemm_nn_bin_transposed_32bit_packed ( int  M,
int  N,
int  K,
float  ALPHA,
uint32_t *  A,
int  lda,
uint32_t *  B,
int  ldb,
float *  C,
int  ldc,
float *  mean_arr 
)

◆ gemm_nn_custom_bin_mean_transposed()

void gemm_nn_custom_bin_mean_transposed ( int  M,
int  N,
int  K,
float  ALPHA_UNUSED,
unsigned char *  A,
int  lda,
unsigned char *  B,
int  ldb,
float *  C,
int  ldc,
float *  mean_arr 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gemm_nn_fast()

void gemm_nn_fast ( int  M,
int  N,
int  K,
float  ALPHA,
float *  A,
int  lda,
float *  B,
int  ldb,
float *  C,
int  ldc 
)
Here is the caller graph for this function:

◆ gemm_nt()

void gemm_nt ( int  M,
int  N,
int  K,
float  ALPHA,
float *  A,
int  lda,
float *  B,
int  ldb,
float *  C,
int  ldc 
)
Here is the caller graph for this function:

◆ gemm_ongpu()

void gemm_ongpu ( int  TA,
int  TB,
int  M,
int  N,
int  K,
float  ALPHA,
float *  A_gpu,
int  lda,
float *  B_gpu,
int  ldb,
float  BETA,
float *  C_gpu,
int  ldc 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gemm_tn()

void gemm_tn ( int  M,
int  N,
int  K,
float  ALPHA,
float *  A,
int  lda,
float *  B,
int  ldb,
float *  C,
int  ldc 
)
Here is the caller graph for this function:

◆ gemm_tt()

void gemm_tt ( int  M,
int  N,
int  K,
float  ALPHA,
float *  A,
int  lda,
float *  B,
int  ldb,
float *  C,
int  ldc 
)
Here is the caller graph for this function:

◆ get_bit_int32()

static uint32_t get_bit_int32 ( uint32_t const *const  src,
size_t  index 
)
inlinestatic

◆ im2col_cpu_custom()

void im2col_cpu_custom ( float *  data_im,
int  channels,
int  height,
int  width,
int  ksize,
int  stride,
int  pad,
float *  data_col 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ im2col_cpu_custom_bin()

void im2col_cpu_custom_bin ( float *  data_im,
int  channels,
int  height,
int  width,
int  ksize,
int  stride,
int  pad,
float *  data_col,
int  bit_align 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ im2col_cpu_custom_transpose()

void im2col_cpu_custom_transpose ( float *  data_im,
int  channels,
int  height,
int  width,
int  ksize,
int  stride,
int  pad,
float *  data_col,
int  ldb_align 
)

◆ init_cpu()

void init_cpu ( )
Here is the call graph for this function:
Here is the caller graph for this function:

◆ is_avx()

int is_avx ( )
Here is the caller graph for this function:

◆ is_fma_avx2()

int is_fma_avx2 ( )
Here is the caller graph for this function:

◆ random_matrix()

float * random_matrix ( int  rows,
int  cols 
)
Here is the caller graph for this function:

◆ repack_input()

void repack_input ( float *  input,
float *  re_packed_input,
int  w,
int  h,
int  c 
)
Here is the caller graph for this function:

◆ test_gpu_accuracy()

void test_gpu_accuracy ( int  TA,
int  TB,
int  m,
int  k,
int  n 
)
Here is the call graph for this function:

◆ test_gpu_blas()

int test_gpu_blas ( )
Here is the call graph for this function:

◆ time_gpu_random_matrix()

void time_gpu_random_matrix ( int  TA,
int  TB,
int  m,
int  k,
int  n 
)
Here is the call graph for this function:

◆ time_ongpu()

void time_ongpu ( int  TA,
int  TB,
int  m,
int  k,
int  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ time_random_matrix()

void time_random_matrix ( int  TA,
int  TB,
int  m,
int  k,
int  n 
)
Here is the call graph for this function:

◆ transpose_bin()

void transpose_bin ( uint32_t *  A,
uint32_t *  B,
const int  n,
const int  m,
const int  lda,
const int  ldb,
const int  block_size 
)
Here is the call graph for this function:

◆ transpose_block_SSE4x4()

void transpose_block_SSE4x4 ( float *  A,
float *  B,
const int  n,
const int  m,
const int  lda,
const int  ldb,
const int  block_size 
)

◆ transpose_scalar_block()

static void transpose_scalar_block ( float *  A,
float *  B,
const int  lda,
const int  ldb,
const int  block_size 
)
inlinestatic

◆ transpose_uint32()

void transpose_uint32 ( uint32_t *  src,
uint32_t *  dst,
int  src_h,
int  src_w,
int  src_align,
int  dst_align 
)
Here is the caller graph for this function:

◆ xnor()

static unsigned char xnor ( unsigned char  a,
unsigned char  b 
)
inlinestatic
Here is the caller graph for this function:

◆ xnor_int32()

static uint32_t xnor_int32 ( uint32_t  a,
uint32_t  b 
)
inlinestatic

◆ xnor_int64()

static uint64_t xnor_int64 ( uint64_t  a,
uint64_t  b 
)
inlinestatic
Here is the caller graph for this function: