Darknet/YOLO v3.0-208-g0b6f60f-dirty
Object Detection Framework
 
Loading...
Searching...
No Matches
gemm.hpp File Reference

General matrix multiplication (GEMM) More...

Include dependency graph for gemm.hpp:
This graph shows which files directly or indirectly include this file:

Functions

void activate_array_cpu_custom (float *x, const int n, const ACTIVATION a)
 
void convolution_2d (int w, int h, int ksize, int n, int c, int pad, int stride, float *weights, float *input, float *output, float *mean)
 
void convolution_repacked (uint32_t *packed_input, uint32_t *packed_weights, float *output, int w, int h, int c, int n, int size, int pad, int new_lda, float *mean_arr)
 
void float_to_bit (float *src, unsigned char *dst, size_t size)
 
void forward_maxpool_layer_avx (float *src, float *dst, int *indexes, int size, int w, int h, int out_w, int out_h, int c, int pad, int stride, int batch)
 
void gemm (int TA, int TB, int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float BETA, float *C, int ldc)
 
void gemm_bin (int M, int N, int K, float ALPHA, char *A, int lda, float *B, int ldb, float *C, int ldc)
 
void gemm_cpu (int TA, int TB, int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float BETA, float *C, int ldc)
 
void gemm_gpu (int TA, int TB, int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float BETA, float *C, int ldc)
 
void gemm_nn_bin_32bit_packed (int M, int N, int K, float ALPHA, uint32_t *A, int lda, uint32_t *B, int ldb, float *C, int ldc, float *mean_arr)
 
void gemm_nn_bin_transposed_32bit_packed (int M, int N, int K, float ALPHA, uint32_t *A, int lda, uint32_t *B, int ldb, float *C, int ldc, float *mean_arr)
 
void gemm_nn_custom_bin_mean_transposed (int M, int N, int K, float ALPHA_UNUSED, unsigned char *A, int lda, unsigned char *B, int ldb, float *C, int ldc, float *mean_arr)
 
void gemm_ongpu (int TA, int TB, int M, int N, int K, float ALPHA, float *A_gpu, int lda, float *B_gpu, int ldb, float BETA, float *C_gpu, int ldc)
 
static unsigned char get_bit (unsigned char const *const src, size_t index)
 
void im2col_cpu_custom (float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col)
 
void im2col_cpu_custom_align (float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col, int bit_align)
 
void im2col_cpu_custom_bin (float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col, int bit_align)
 
void im2col_cpu_custom_transpose (float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col, int ldb_align)
 
int is_avx ()
 
int is_fma_avx2 ()
 
void repack_input (float *input, float *re_packed_input, int w, int h, int c)
 
static void set_bit (unsigned char *const dst, size_t index)
 
void transpose_32x32_bits_reversed_diagonale (uint32_t *A, uint32_t *B, int m, int n)
 
void transpose_bin (uint32_t *A, uint32_t *B, const int n, const int m, const int lda, const int ldb, const int block_size)
 
void transpose_block_SSE4x4 (float *A, float *B, const int n, const int m, const int lda, const int ldb, const int block_size)
 
void transpose_uint32 (uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align)
 

Detailed Description

General matrix multiplication (GEMM)

Function Documentation

◆ activate_array_cpu_custom()

void activate_array_cpu_custom ( float *  x,
const int  n,
const ACTIVATION  a 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ convolution_2d()

void convolution_2d ( int  w,
int  h,
int  ksize,
int  n,
int  c,
int  pad,
int  stride,
float *  weights,
float *  input,
float *  output,
float *  mean 
)

◆ convolution_repacked()

void convolution_repacked ( uint32_t *  packed_input,
uint32_t *  packed_weights,
float *  output,
int  w,
int  h,
int  c,
int  n,
int  size,
int  pad,
int  new_lda,
float *  mean_arr 
)

◆ float_to_bit()

void float_to_bit ( float *  src,
unsigned char *  dst,
size_t  size 
)
Here is the caller graph for this function:

◆ forward_maxpool_layer_avx()

void forward_maxpool_layer_avx ( float *  src,
float *  dst,
int *  indexes,
int  size,
int  w,
int  h,
int  out_w,
int  out_h,
int  c,
int  pad,
int  stride,
int  batch 
)
Here is the caller graph for this function:

◆ gemm()

void gemm ( int  TA,
int  TB,
int  M,
int  N,
int  K,
float  ALPHA,
float *  A,
int  lda,
float *  B,
int  ldb,
float  BETA,
float *  C,
int  ldc 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gemm_bin()

void gemm_bin ( int  M,
int  N,
int  K,
float  ALPHA,
char *  A,
int  lda,
float *  B,
int  ldb,
float *  C,
int  ldc 
)

◆ gemm_cpu()

void gemm_cpu ( int  TA,
int  TB,
int  M,
int  N,
int  K,
float  ALPHA,
float *  A,
int  lda,
float *  B,
int  ldb,
float  BETA,
float *  C,
int  ldc 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gemm_gpu()

void gemm_gpu ( int  TA,
int  TB,
int  M,
int  N,
int  K,
float  ALPHA,
float *  A,
int  lda,
float *  B,
int  ldb,
float  BETA,
float *  C,
int  ldc 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gemm_nn_bin_32bit_packed()

void gemm_nn_bin_32bit_packed ( int  M,
int  N,
int  K,
float  ALPHA,
uint32_t *  A,
int  lda,
uint32_t *  B,
int  ldb,
float *  C,
int  ldc,
float *  mean_arr 
)

◆ gemm_nn_bin_transposed_32bit_packed()

void gemm_nn_bin_transposed_32bit_packed ( int  M,
int  N,
int  K,
float  ALPHA,
uint32_t *  A,
int  lda,
uint32_t *  B,
int  ldb,
float *  C,
int  ldc,
float *  mean_arr 
)

◆ gemm_nn_custom_bin_mean_transposed()

void gemm_nn_custom_bin_mean_transposed ( int  M,
int  N,
int  K,
float  ALPHA_UNUSED,
unsigned char *  A,
int  lda,
unsigned char *  B,
int  ldb,
float *  C,
int  ldc,
float *  mean_arr 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gemm_ongpu()

void gemm_ongpu ( int  TA,
int  TB,
int  M,
int  N,
int  K,
float  ALPHA,
float *  A_gpu,
int  lda,
float *  B_gpu,
int  ldb,
float  BETA,
float *  C_gpu,
int  ldc 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_bit()

static unsigned char get_bit ( unsigned char const *const  src,
size_t  index 
)
inlinestatic
Here is the caller graph for this function:

◆ im2col_cpu_custom()

void im2col_cpu_custom ( float *  data_im,
int  channels,
int  height,
int  width,
int  ksize,
int  stride,
int  pad,
float *  data_col 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ im2col_cpu_custom_align()

void im2col_cpu_custom_align ( float *  data_im,
int  channels,
int  height,
int  width,
int  ksize,
int  stride,
int  pad,
float *  data_col,
int  bit_align 
)

◆ im2col_cpu_custom_bin()

void im2col_cpu_custom_bin ( float *  data_im,
int  channels,
int  height,
int  width,
int  ksize,
int  stride,
int  pad,
float *  data_col,
int  bit_align 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ im2col_cpu_custom_transpose()

void im2col_cpu_custom_transpose ( float *  data_im,
int  channels,
int  height,
int  width,
int  ksize,
int  stride,
int  pad,
float *  data_col,
int  ldb_align 
)

◆ is_avx()

int is_avx ( )
Here is the caller graph for this function:

◆ is_fma_avx2()

int is_fma_avx2 ( )
Here is the caller graph for this function:

◆ repack_input()

void repack_input ( float *  input,
float *  re_packed_input,
int  w,
int  h,
int  c 
)
Here is the caller graph for this function:

◆ set_bit()

static void set_bit ( unsigned char *const  dst,
size_t  index 
)
inlinestatic
Here is the caller graph for this function:

◆ transpose_32x32_bits_reversed_diagonale()

void transpose_32x32_bits_reversed_diagonale ( uint32_t *  A,
uint32_t *  B,
int  m,
int  n 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ transpose_bin()

void transpose_bin ( uint32_t *  A,
uint32_t *  B,
const int  n,
const int  m,
const int  lda,
const int  ldb,
const int  block_size 
)
Here is the call graph for this function:

◆ transpose_block_SSE4x4()

void transpose_block_SSE4x4 ( float *  A,
float *  B,
const int  n,
const int  m,
const int  lda,
const int  ldb,
const int  block_size 
)

◆ transpose_uint32()

void transpose_uint32 ( uint32_t *  src,
uint32_t *  dst,
int  src_h,
int  src_w,
int  src_align,
int  dst_align 
)
Here is the caller graph for this function: