Darknet/YOLO v3.0-208-g0b6f60f-dirty
Object Detection Framework
 
Loading...
Searching...
No Matches
blas_kernels.cu File Reference

Functions

__global__ void accumulate_kernel (float *x, int n, int groups, float *sum)
 
void activate_and_mult (float *a1, float *a2, size_t size, ACTIVATION a, float *dst)
 
__global__ void activate_and_mult_kernel (float *a1, float *a2, size_t size, ACTIVATION a, float *dst)
 
void adam_gpu (int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t)
 
__global__ void adam_kernel (int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t)
 
void adam_update_gpu (float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t)
 
void add_3_arrays_activate (float *a1, float *a2, float *a3, size_t size, ACTIVATION a, float *dst)
 
__global__ void add_3_arrays_activate_kernel (float *a1, float *a2, float *a3, size_t size, ACTIVATION a, float *dst)
 
void add_bias_gpu (float *output, float *biases, int batch, int filters, int spatial)
 
__global__ void add_bias_kernel (float *output, float *biases, int batch, int filters, int spatial, int current_size)
 
__global__ void axpy_kernel (int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
 
void axpy_ongpu (int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
 
void axpy_ongpu_offset (int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
 
void backward_bias_gpu (float *bias_updates, float *delta, int batch, int n, int size)
 
__global__ void backward_bias_kernel (float *bias_updates, float *delta, int batch, int n, int size)
 
void backward_implicit_gpu (int batch, int nweights, float *weight_updates_gpu, float *delta_gpu)
 
__global__ void backward_implicit_kernel (int size, int batch, int nweights, float *weight_updates_gpu, float *delta_gpu)
 
void backward_sam_gpu (float *in_w_h_c_delta, int size, int channel_size, float *in_scales_c, float *out_from_delta, float *in_from_output, float *out_state_delta)
 
__global__ void backward_sam_kernel (float *in_w_h_c_delta, int size, int channel_size, float *in_scales_c, float *out_from_delta, float *in_from_output, float *out_state_delta)
 
void backward_scale_channels_gpu (float *in_w_h_c_delta, int size, int channel_size, int batch_size, int scale_wh, float *in_scales_c, float *out_from_delta, float *in_from_output, float *out_state_delta)
 
__global__ void backward_scale_channels_kernel (float *in_w_h_c_delta, int size, int channel_size, int batch_size, int scale_wh, float *in_scales_c, float *out_from_delta, float *in_from_output, float *out_state_delta)
 
void backward_scale_gpu (float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
 
__global__ void backward_scale_kernel (float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
 
void backward_shortcut_multilayer_gpu (int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalization)
 
__global__ void backward_shortcut_multilayer_kernel (int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalization)
 
void compare_2_arrays_gpu (float *one, float *two, int size)
 
__global__ void compare_2_arrays_kernel (float *one, float *two, int size)
 
__global__ void const_kernel (int N, float ALPHA, float *X, int INCX)
 
void const_ongpu (int N, float ALPHA, float *X, int INCX)
 
__global__ void constrain_kernel (int N, float ALPHA, float *X, int INCX)
 
__global__ void constrain_min_max_kernel (int N, float MIN, float MAX, float *X, int INCX)
 
void constrain_min_max_ongpu (int N, float MIN, float MAX, float *X, int INCX)
 
void constrain_ongpu (int N, float ALPHA, float *X, int INCX)
 
__global__ void constrain_weight_updates_kernel (int N, float coef, float *weights_gpu, float *weight_updates_gpu)
 
void constrain_weight_updates_ongpu (int N, float coef, float *weights_gpu, float *weight_updates_gpu)
 
void coord_conv_gpu (float *dst, int size, int w, int h, int chan, int b, int type)
 
__global__ void coord_conv_kernel (float *dst, int w, int h, int chan, int batch, int type)
 
__global__ void copy_kernel (int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
 
void copy_ongpu (int N, float *X, int INCX, float *Y, int INCY)
 
void copy_ongpu_offset (int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
 
void expand_array_gpu (const float *src_gpu, float *dst_gpu, int size, int groups)
 
__global__ void expand_array_kernel (const float *src_gpu, float *dst_gpu, int current_size, int groups)
 
void fast_mean_delta_gpu (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
 
__global__ void fast_mean_delta_kernel (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
 
void fast_mean_gpu (float *x, int batch, int filters, int spatial, float *mean)
 
__global__ void fast_mean_kernel (float *x, int batch, int filters, int spatial, float *mean)
 
void fast_v_cbn_gpu (const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance, const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon)
 
__global__ void fast_v_cbn_kernel (const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance, const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon)
 
void fast_variance_delta_gpu (float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
 
__global__ void fast_variance_delta_kernel (float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
 
void fast_variance_gpu (float *x, float *mean, int batch, int filters, int spatial, float *variance)
 
__global__ void fast_variance_kernel (float *x, float *mean, int batch, int filters, int spatial, float *variance)
 
__global__ void fill_kernel (int N, float ALPHA, float *X, int INCX)
 
void fill_ongpu (int N, float ALPHA, float *X, int INCX)
 
void fix_nan_and_inf (float *input, size_t size)
 
__global__ void fix_nan_and_inf_kernel (float *input, size_t size)
 
__global__ void flatten_kernel (int N, float *x, int spatial, int layers, int batch, int forward, float *out)
 
void flatten_ongpu (float *x, int spatial, int layers, int batch, int forward, float *out)
 
void forward_implicit_gpu (int batch, int nweights, float *weight_gpu, float *output_gpu)
 
__global__ void forward_implicit_kernel (int size, int batch, int nweights, float *weight_gpu, float *output_gpu)
 
__device__ float grad_lrelu (float src)
 
__device__ float grad_relu (float src)
 
void gradient_centralization_gpu (int w, int h, int c, int f, float *in)
 
__global__ void gradient_centralization_kernel (int filters, int f_size, float *in)
 
void input_shortcut_gpu (float *in, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
 
__global__ void input_shortcut_kernel (float *in, int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
 
__global__ void inverse_variance_kernel (int size, float *src, float *dst, float epsilon)
 
void inverse_variance_ongpu (int size, float *src, float *dst, float epsilon)
 
int is_nan_or_inf (float *input, size_t size)
 
__global__ void is_nan_or_inf_kernel (float *input, size_t size, int *pinned_return)
 
void l2_gpu (int n, float *pred, float *truth, float *delta, float *error)
 
__global__ void l2_kernel (int n, float *pred, float *truth, float *delta, float *error)
 
__device__ float lrelu (float src)
 
void mask_gpu_new_api (int N, float *X, float mask_num, float *mask, float val)
 
__global__ void mask_kernel (int n, float *x, float mask_num, float *mask)
 
__global__ void mask_kernel_new_api (int n, float *x, float mask_num, float *mask, float val)
 
void mask_ongpu (int N, float *X, float mask_num, float *mask)
 
void mean_array_gpu (float *src, int size, float alpha, float *avg)
 
__global__ void mean_array_kernel (float *src, int size, float alpha, float *avg)
 
void mean_delta_gpu (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
 
__global__ void mean_delta_kernel (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
 
void mean_gpu (float *x, int batch, int filters, int spatial, float *mean)
 
__global__ void mean_kernel (float *x, int batch, int filters, int spatial, float *mean)
 
void memcpy_ongpu (void *dst, void *src, int size_bytes)
 
__global__ void mul_kernel (int N, float *X, int INCX, float *Y, int INCY)
 
void mul_ongpu (int N, float *X, int INCX, float *Y, int INCY)
 
void mult_add_into_gpu (int num, float *a, float *b, float *c)
 
__global__ void mult_add_into_kernel (int n, float *a, float *b, float *c)
 
void mult_inverse_array_gpu (const float *src_gpu, float *dst_gpu, int size, float eps, float divider, float clip, float abs_add)
 
__global__ void mult_inverse_array_kernel (const float *src_gpu, float *dst_gpu, int size, const float eps, float divider, const float clip, const float abs_add)
 
void normalize_delta_gpu (float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
 
__global__ void normalize_delta_kernel (int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
 
void normalize_gpu (float *x, float *mean, float *variance, int batch, int filters, int spatial)
 
__global__ void normalize_kernel (int N, float *x, float *mean, float *variance, int batch, int filters, int spatial)
 
void normalize_scale_bias_gpu (float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon)
 
__global__ void normalize_scale_bias_kernel (int N, float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon)
 
void P_constrastive_f_det_gpu (int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size)
 
__global__ void P_constrastive_f_det_kernel (int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size)
 
__global__ void pow_kernel (int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
 
void pow_ongpu (int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
 
void reduce_and_expand_array_gpu (const float *src_gpu, float *dst_gpu, int size, int groups)
 
__global__ void reduce_and_expand_array_kernel (const float *src_gpu, float *dst_gpu, int current_size, int groups)
 
__device__ float relu (float src)
 
__global__ void reorg_kernel (int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
 
void reorg_ongpu (float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
 
void reset_nan_and_inf (float *input, size_t size)
 
__global__ void reset_nan_and_inf_kernel (float *input, size_t size)
 
void rotate_weights_gpu (const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int reverse)
 
__global__ void rotate_weights_kernel (const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, int reverse)
 
void sam_gpu (float *in_w_h_c, int size, int channel_size, float *scales_c, float *out)
 
__global__ void sam_kernel (float *in_w_h_c, int size, int channel_size, float *scales_c, float *out)
 
__global__ void scal_add_kernel (int N, float ALPHA, float BETA, float *X, int INCX)
 
void scal_add_ongpu (int N, float ALPHA, float BETA, float *X, int INCX)
 
__global__ void scal_kernel (int N, float ALPHA, float *X, int INCX)
 
void scal_ongpu (int N, float ALPHA, float *X, int INCX)
 
void scale_bias_gpu (float *output, float *scale, int batch, int filters, int spatial)
 
__global__ void scale_bias_kernel (float *output, float *scale, int batch, int filters, int spatial, int current_size)
 
void scale_channels_gpu (float *in_w_h_c, int size, int channel_size, int batch_size, int scale_wh, float *scales_c, float *out)
 
__global__ void scale_channels_kernel (float *in_w_h_c, int size, int channel_size, int batch_size, int scale_wh, float *scales_c, float *out)
 
void shortcut_gpu (int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
 
__global__ void shortcut_kernel (int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
 
void shortcut_multilayer_gpu (int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization)
 
__global__ void shortcut_multilayer_kernel (int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization)
 
__global__ void shortcut_singlelayer_simple_kernel (int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization)
 
__global__ void simple_copy_kernel (int size, float *src, float *dst)
 
void simple_copy_ongpu (int size, float *src, float *dst)
 
__global__ void simple_input_shortcut_kernel (float *in, int size, float *add, float *out)
 
void smooth_l1_gpu (int n, float *pred, float *truth, float *delta, float *error)
 
__global__ void smooth_l1_kernel (int n, float *pred, float *truth, float *delta, float *error)
 
void smooth_rotate_weights_gpu (const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse)
 
__global__ void smooth_rotate_weights_kernel (const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, int angle, int reverse)
 
__device__ void softmax_device (int n, float *input, float temp, float *output)
 
__device__ void softmax_device_new_api (float *input, int n, float temp, int stride, float *output)
 
void softmax_gpu (float *input, int n, int offset, int groups, float temp, float *output)
 
void softmax_gpu_new_api (float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
 
__global__ void softmax_kernel (int n, int offset, int batch, float *input, float temp, float *output)
 
__global__ void softmax_kernel_new_api (float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
 
void softmax_tree_gpu (float *input, int spatial, int batch, int stride, float temp, float *output, Darknet::Tree hier)
 
__global__ void softmax_tree_kernel (float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset)
 
void softmax_x_ent_gpu (int n, float *pred, float *truth, float *delta, float *error)
 
__global__ void softmax_x_ent_kernel (int n, float *pred, float *truth, float *delta, float *error)
 
void stretch_sway_flip_weights_gpu (const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse)
 
__global__ void stretch_sway_flip_weights_kernel (const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, float angle, int reverse)
 
void stretch_weights_gpu (const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, float scale, int reverse)
 
__global__ void stretch_weights_kernel (const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, float scale, int reverse)
 
void sum_of_mults (float *a1, float *a2, float *b1, float *b2, size_t size, float *dst)
 
__global__ void sum_of_mults_kernel (float *a1, float *a2, float *b1, float *b2, size_t size, float *dst)
 
__global__ void supp_kernel (int N, float ALPHA, float *X, int INCX)
 
void supp_ongpu (int N, float ALPHA, float *X, int INCX)
 
void sway_and_flip_weights_gpu (const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse)
 
__global__ void sway_and_flip_weights_kernel (const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, int angle, int reverse)
 
void upsample_gpu (float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
 
__global__ void upsample_kernel (size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
 
__global__ void variance_delta_kernel (float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
 
void variance_gpu (float *x, float *mean, int batch, int filters, int spatial, float *variance)
 
__global__ void variance_kernel (float *x, float *mean, int batch, int filters, int spatial, float *variance)
 
__inline__ __device__ float warpAllReduceSum (float val)
 
void weighted_delta_gpu (float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc)
 
__global__ void weighted_delta_kernel (int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc)
 
void weighted_sum_gpu (float *a, float *b, float *s, int num, float *c)
 
__global__ void weighted_sum_kernel (int n, float *a, float *b, float *s, float *c)
 

Function Documentation

◆ accumulate_kernel()

__global__ void accumulate_kernel ( float *  x,
int  n,
int  groups,
float *  sum 
)

◆ activate_and_mult()

void activate_and_mult ( float *  a1,
float *  a2,
size_t  size,
ACTIVATION  a,
float *  dst 
)
Here is the call graph for this function:

◆ activate_and_mult_kernel()

__global__ void activate_and_mult_kernel ( float *  a1,
float *  a2,
size_t  size,
ACTIVATION  a,
float *  dst 
)
Here is the caller graph for this function:

◆ adam_gpu()

void adam_gpu ( int  n,
float *  x,
float *  m,
float *  v,
float  B1,
float  B2,
float  rate,
float  eps,
int  t 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ adam_kernel()

__global__ void adam_kernel ( int  N,
float *  x,
float *  m,
float *  v,
float  B1,
float  B2,
float  rate,
float  eps,
int  t 
)
Here is the caller graph for this function:

◆ adam_update_gpu()

void adam_update_gpu ( float *  w,
float *  d,
float *  m,
float *  v,
float  B1,
float  B2,
float  eps,
float  decay,
float  rate,
int  n,
int  batch,
int  t 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ add_3_arrays_activate()

void add_3_arrays_activate ( float *  a1,
float *  a2,
float *  a3,
size_t  size,
ACTIVATION  a,
float *  dst 
)
Here is the call graph for this function:

◆ add_3_arrays_activate_kernel()

__global__ void add_3_arrays_activate_kernel ( float *  a1,
float *  a2,
float *  a3,
size_t  size,
ACTIVATION  a,
float *  dst 
)
Here is the caller graph for this function:

◆ add_bias_gpu()

void add_bias_gpu ( float *  output,
float *  biases,
int  batch,
int  filters,
int  spatial 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ add_bias_kernel()

__global__ void add_bias_kernel ( float *  output,
float *  biases,
int  batch,
int  filters,
int  spatial,
int  current_size 
)
Here is the caller graph for this function:

◆ axpy_kernel()

__global__ void axpy_kernel ( int  N,
float  ALPHA,
float *  X,
int  OFFX,
int  INCX,
float *  Y,
int  OFFY,
int  INCY 
)
Here is the caller graph for this function:

◆ axpy_ongpu()

void axpy_ongpu ( int  N,
float  ALPHA,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ axpy_ongpu_offset()

void axpy_ongpu_offset ( int  N,
float  ALPHA,
float *  X,
int  OFFX,
int  INCX,
float *  Y,
int  OFFY,
int  INCY 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ backward_bias_gpu()

void backward_bias_gpu ( float *  bias_updates,
float *  delta,
int  batch,
int  n,
int  size 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ backward_bias_kernel()

__global__ void backward_bias_kernel ( float *  bias_updates,
float *  delta,
int  batch,
int  n,
int  size 
)
Here is the caller graph for this function:

◆ backward_implicit_gpu()

void backward_implicit_gpu ( int  batch,
int  nweights,
float *  weight_updates_gpu,
float *  delta_gpu 
)
Here is the call graph for this function:

◆ backward_implicit_kernel()

__global__ void backward_implicit_kernel ( int  size,
int  batch,
int  nweights,
float *  weight_updates_gpu,
float *  delta_gpu 
)
Here is the caller graph for this function:

◆ backward_sam_gpu()

void backward_sam_gpu ( float *  in_w_h_c_delta,
int  size,
int  channel_size,
float *  in_scales_c,
float *  out_from_delta,
float *  in_from_output,
float *  out_state_delta 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ backward_sam_kernel()

__global__ void backward_sam_kernel ( float *  in_w_h_c_delta,
int  size,
int  channel_size,
float *  in_scales_c,
float *  out_from_delta,
float *  in_from_output,
float *  out_state_delta 
)
Here is the caller graph for this function:

◆ backward_scale_channels_gpu()

void backward_scale_channels_gpu ( float *  in_w_h_c_delta,
int  size,
int  channel_size,
int  batch_size,
int  scale_wh,
float *  in_scales_c,
float *  out_from_delta,
float *  in_from_output,
float *  out_state_delta 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ backward_scale_channels_kernel()

__global__ void backward_scale_channels_kernel ( float *  in_w_h_c_delta,
int  size,
int  channel_size,
int  batch_size,
int  scale_wh,
float *  in_scales_c,
float *  out_from_delta,
float *  in_from_output,
float *  out_state_delta 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ backward_scale_gpu()

void backward_scale_gpu ( float *  x_norm,
float *  delta,
int  batch,
int  n,
int  size,
float *  scale_updates 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ backward_scale_kernel()

__global__ void backward_scale_kernel ( float *  x_norm,
float *  delta,
int  batch,
int  n,
int  size,
float *  scale_updates 
)
Here is the caller graph for this function:

◆ backward_shortcut_multilayer_gpu()

void backward_shortcut_multilayer_gpu ( int  src_outputs,
int  batch,
int  n,
int *  outputs_of_layers_gpu,
float **  layers_delta_gpu,
float *  delta_out,
float *  delta_in,
float *  weights_gpu,
float *  weight_updates_gpu,
int  nweights,
float *  in,
float **  layers_output_gpu,
WEIGHTS_NORMALIZATION_T  weights_normalization 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ backward_shortcut_multilayer_kernel()

__global__ void backward_shortcut_multilayer_kernel ( int  size,
int  src_outputs,
int  batch,
int  n,
int *  outputs_of_layers_gpu,
float **  layers_delta_gpu,
float *  delta_out,
float *  delta_in,
float *  weights_gpu,
float *  weight_updates_gpu,
int  nweights,
float *  in,
float **  layers_output_gpu,
WEIGHTS_NORMALIZATION_T  weights_normalization 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ compare_2_arrays_gpu()

void compare_2_arrays_gpu ( float *  one,
float *  two,
int  size 
)
Here is the call graph for this function:

◆ compare_2_arrays_kernel()

__global__ void compare_2_arrays_kernel ( float *  one,
float *  two,
int  size 
)
Here is the caller graph for this function:

◆ const_kernel()

__global__ void const_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)
Here is the caller graph for this function:

◆ const_ongpu()

void const_ongpu ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)
Here is the call graph for this function:

◆ constrain_kernel()

__global__ void constrain_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)
Here is the caller graph for this function:

◆ constrain_min_max_kernel()

__global__ void constrain_min_max_kernel ( int  N,
float  MIN,
float  MAX,
float *  X,
int  INCX 
)
Here is the caller graph for this function:

◆ constrain_min_max_ongpu()

void constrain_min_max_ongpu ( int  N,
float  MIN,
float  MAX,
float *  X,
int  INCX 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ constrain_ongpu()

void constrain_ongpu ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ constrain_weight_updates_kernel()

__global__ void constrain_weight_updates_kernel ( int  N,
float  coef,
float *  weights_gpu,
float *  weight_updates_gpu 
)
Here is the caller graph for this function:

◆ constrain_weight_updates_ongpu()

void constrain_weight_updates_ongpu ( int  N,
float  coef,
float *  weights_gpu,
float *  weight_updates_gpu 
)
Here is the call graph for this function:

◆ coord_conv_gpu()

void coord_conv_gpu ( float *  dst,
int  size,
int  w,
int  h,
int  chan,
int  b,
int  type 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ coord_conv_kernel()

__global__ void coord_conv_kernel ( float *  dst,
int  w,
int  h,
int  chan,
int  batch,
int  type 
)
Here is the caller graph for this function:

◆ copy_kernel()

__global__ void copy_kernel ( int  N,
float *  X,
int  OFFX,
int  INCX,
float *  Y,
int  OFFY,
int  INCY 
)
Here is the caller graph for this function:

◆ copy_ongpu()

void copy_ongpu ( int  N,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ copy_ongpu_offset()

void copy_ongpu_offset ( int  N,
float *  X,
int  OFFX,
int  INCX,
float *  Y,
int  OFFY,
int  INCY 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ expand_array_gpu()

void expand_array_gpu ( const float *  src_gpu,
float *  dst_gpu,
int  size,
int  groups 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ expand_array_kernel()

__global__ void expand_array_kernel ( const float *  src_gpu,
float *  dst_gpu,
int  current_size,
int  groups 
)
Here is the caller graph for this function:

◆ fast_mean_delta_gpu()

void fast_mean_delta_gpu ( float *  delta,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  mean_delta 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fast_mean_delta_kernel()

__global__ void fast_mean_delta_kernel ( float *  delta,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  mean_delta 
)
Here is the caller graph for this function:

◆ fast_mean_gpu()

void fast_mean_gpu ( float *  x,
int  batch,
int  filters,
int  spatial,
float *  mean 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fast_mean_kernel()

__global__ void fast_mean_kernel ( float *  x,
int  batch,
int  filters,
int  spatial,
float *  mean 
)
Here is the caller graph for this function:

◆ fast_v_cbn_gpu()

void fast_v_cbn_gpu ( const float *  x,
float *  mean,
int  batch,
int  filters,
int  spatial,
int  minibatch_index,
int  max_minibatch_index,
float *  m_avg,
float *  v_avg,
float *  variance,
const float  alpha,
float *  rolling_mean_gpu,
float *  rolling_variance_gpu,
int  inverse_variance,
float  epsilon 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fast_v_cbn_kernel()

__global__ void fast_v_cbn_kernel ( const float *  x,
float *  mean,
int  batch,
int  filters,
int  spatial,
int  minibatch_index,
int  max_minibatch_index,
float *  m_avg,
float *  v_avg,
float *  variance,
const float  alpha,
float *  rolling_mean_gpu,
float *  rolling_variance_gpu,
int  inverse_variance,
float  epsilon 
)
Here is the caller graph for this function:

◆ fast_variance_delta_gpu()

void fast_variance_delta_gpu ( float *  x,
float *  delta,
float *  mean,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  variance_delta 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fast_variance_delta_kernel()

__global__ void fast_variance_delta_kernel ( float *  x,
float *  delta,
float *  mean,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  variance_delta 
)
Here is the caller graph for this function:

◆ fast_variance_gpu()

void fast_variance_gpu ( float *  x,
float *  mean,
int  batch,
int  filters,
int  spatial,
float *  variance 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fast_variance_kernel()

__global__ void fast_variance_kernel ( float *  x,
float *  mean,
int  batch,
int  filters,
int  spatial,
float *  variance 
)
Here is the caller graph for this function:

◆ fill_kernel()

__global__ void fill_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)
Here is the caller graph for this function:

◆ fill_ongpu()

void fill_ongpu ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fix_nan_and_inf()

void fix_nan_and_inf ( float *  input,
size_t  size 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fix_nan_and_inf_kernel()

__global__ void fix_nan_and_inf_kernel ( float *  input,
size_t  size 
)
Here is the caller graph for this function:

◆ flatten_kernel()

__global__ void flatten_kernel ( int  N,
float *  x,
int  spatial,
int  layers,
int  batch,
int  forward,
float *  out 
)
Here is the caller graph for this function:

◆ flatten_ongpu()

void flatten_ongpu ( float *  x,
int  spatial,
int  layers,
int  batch,
int  forward,
float *  out 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ forward_implicit_gpu()

void forward_implicit_gpu ( int  batch,
int  nweights,
float *  weight_gpu,
float *  output_gpu 
)
Here is the call graph for this function:

◆ forward_implicit_kernel()

__global__ void forward_implicit_kernel ( int  size,
int  batch,
int  nweights,
float *  weight_gpu,
float *  output_gpu 
)
Here is the caller graph for this function:

◆ grad_lrelu()

__device__ float grad_lrelu ( float  src)

◆ grad_relu()

__device__ float grad_relu ( float  src)

◆ gradient_centralization_gpu()

void gradient_centralization_gpu ( int  w,
int  h,
int  c,
int  f,
float *  in 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gradient_centralization_kernel()

__global__ void gradient_centralization_kernel ( int  filters,
int  f_size,
float *  in 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ input_shortcut_gpu()

void input_shortcut_gpu ( float *  in,
int  batch,
int  w1,
int  h1,
int  c1,
float *  add,
int  w2,
int  h2,
int  c2,
float *  out 
)
Here is the call graph for this function:

◆ input_shortcut_kernel()

__global__ void input_shortcut_kernel ( float *  in,
int  size,
int  minw,
int  minh,
int  minc,
int  stride,
int  sample,
int  batch,
int  w1,
int  h1,
int  c1,
float *  add,
int  w2,
int  h2,
int  c2,
float *  out 
)
Here is the call graph for this function:

◆ inverse_variance_kernel()

__global__ void inverse_variance_kernel ( int  size,
float *  src,
float *  dst,
float  epsilon 
)
Here is the caller graph for this function:

◆ inverse_variance_ongpu()

void inverse_variance_ongpu ( int  size,
float *  src,
float *  dst,
float  epsilon 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ is_nan_or_inf()

int is_nan_or_inf ( float *  input,
size_t  size 
)
Here is the call graph for this function:

◆ is_nan_or_inf_kernel()

__global__ void is_nan_or_inf_kernel ( float *  input,
size_t  size,
int *  pinned_return 
)
Here is the caller graph for this function:

◆ l2_gpu()

void l2_gpu ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ l2_kernel()

__global__ void l2_kernel ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)
Here is the caller graph for this function:

◆ lrelu()

__device__ float lrelu ( float  src)
Here is the caller graph for this function:

◆ mask_gpu_new_api()

void mask_gpu_new_api ( int  N,
float *  X,
float  mask_num,
float *  mask,
float  val 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mask_kernel()

__global__ void mask_kernel ( int  n,
float *  x,
float  mask_num,
float *  mask 
)
Here is the caller graph for this function:

◆ mask_kernel_new_api()

__global__ void mask_kernel_new_api ( int  n,
float *  x,
float  mask_num,
float *  mask,
float  val 
)
Here is the caller graph for this function:

◆ mask_ongpu()

void mask_ongpu ( int  N,
float *  X,
float  mask_num,
float *  mask 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mean_array_gpu()

void mean_array_gpu ( float *  src,
int  size,
float  alpha,
float *  avg 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mean_array_kernel()

__global__ void mean_array_kernel ( float *  src,
int  size,
float  alpha,
float *  avg 
)
Here is the caller graph for this function:

◆ mean_delta_gpu()

void mean_delta_gpu ( float *  delta,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  mean_delta 
)
Here is the call graph for this function:

◆ mean_delta_kernel()

__global__ void mean_delta_kernel ( float *  delta,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  mean_delta 
)
Here is the caller graph for this function:

◆ mean_gpu()

void mean_gpu ( float *  x,
int  batch,
int  filters,
int  spatial,
float *  mean 
)
Here is the call graph for this function:

◆ mean_kernel()

__global__ void mean_kernel ( float *  x,
int  batch,
int  filters,
int  spatial,
float *  mean 
)
Here is the caller graph for this function:

◆ memcpy_ongpu()

void memcpy_ongpu ( void *  dst,
void *  src,
int  size_bytes 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mul_kernel()

__global__ void mul_kernel ( int  N,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)
Here is the caller graph for this function:

◆ mul_ongpu()

void mul_ongpu ( int  N,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mult_add_into_gpu()

void mult_add_into_gpu ( int  num,
float *  a,
float *  b,
float *  c 
)
Here is the call graph for this function:

◆ mult_add_into_kernel()

__global__ void mult_add_into_kernel ( int  n,
float *  a,
float *  b,
float *  c 
)
Here is the caller graph for this function:

◆ mult_inverse_array_gpu()

void mult_inverse_array_gpu ( const float *  src_gpu,
float *  dst_gpu,
int  size,
float  eps,
float  divider,
float  clip,
float  abs_add 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ mult_inverse_array_kernel()

__global__ void mult_inverse_array_kernel ( const float *  src_gpu,
float *  dst_gpu,
int  size,
const float  eps,
float  divider,
const float  clip,
const float  abs_add 
)
Here is the caller graph for this function:

◆ normalize_delta_gpu()

void normalize_delta_gpu ( float *  x,
float *  mean,
float *  variance,
float *  mean_delta,
float *  variance_delta,
int  batch,
int  filters,
int  spatial,
float *  delta 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ normalize_delta_kernel()

__global__ void normalize_delta_kernel ( int  N,
float *  x,
float *  mean,
float *  variance,
float *  mean_delta,
float *  variance_delta,
int  batch,
int  filters,
int  spatial,
float *  delta 
)
Here is the caller graph for this function:

◆ normalize_gpu()

void normalize_gpu ( float *  x,
float *  mean,
float *  variance,
int  batch,
int  filters,
int  spatial 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ normalize_kernel()

__global__ void normalize_kernel ( int  N,
float *  x,
float *  mean,
float *  variance,
int  batch,
int  filters,
int  spatial 
)
Here is the caller graph for this function:

◆ normalize_scale_bias_gpu()

void normalize_scale_bias_gpu ( float *  x,
float *  mean,
float *  variance,
float *  scales,
float *  biases,
int  batch,
int  filters,
int  spatial,
int  inverse_variance,
float  epsilon 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ normalize_scale_bias_kernel()

__global__ void normalize_scale_bias_kernel ( int  N,
float *  x,
float *  mean,
float *  variance,
float *  scales,
float *  biases,
int  batch,
int  filters,
int  spatial,
int  inverse_variance,
float  epsilon 
)
Here is the caller graph for this function:

◆ P_constrastive_f_det_gpu()

void P_constrastive_f_det_gpu ( int *  labels,
unsigned int  feature_size,
float  temperature,
contrastive_params contrast_p,
const int  contrast_p_size 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ P_constrastive_f_det_kernel()

__global__ void P_constrastive_f_det_kernel ( int *  labels,
unsigned int  feature_size,
float  temperature,
contrastive_params contrast_p,
const int  contrast_p_size 
)
Here is the caller graph for this function:

◆ pow_kernel()

__global__ void pow_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)
Here is the caller graph for this function:

◆ pow_ongpu()

void pow_ongpu ( int  N,
float  ALPHA,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)
Here is the call graph for this function:

◆ reduce_and_expand_array_gpu()

void reduce_and_expand_array_gpu ( const float *  src_gpu,
float *  dst_gpu,
int  size,
int  groups 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reduce_and_expand_array_kernel()

__global__ void reduce_and_expand_array_kernel ( const float *  src_gpu,
float *  dst_gpu,
int  current_size,
int  groups 
)
Here is the caller graph for this function:

◆ relu()

__device__ float relu ( float  src)

◆ reorg_kernel()

__global__ void reorg_kernel ( int  N,
float *  x,
int  w,
int  h,
int  c,
int  batch,
int  stride,
int  forward,
float *  out 
)
Here is the caller graph for this function:

◆ reorg_ongpu()

void reorg_ongpu ( float *  x,
int  w,
int  h,
int  c,
int  batch,
int  stride,
int  forward,
float *  out 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reset_nan_and_inf()

void reset_nan_and_inf ( float *  input,
size_t  size 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reset_nan_and_inf_kernel()

__global__ void reset_nan_and_inf_kernel ( float *  input,
size_t  size 
)
Here is the caller graph for this function:

◆ rotate_weights_gpu()

void rotate_weights_gpu ( const float *  src_weight_gpu,
float *  weight_deform_gpu,
int  nweights,
int  n,
int  size,
int  reverse 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ rotate_weights_kernel()

__global__ void rotate_weights_kernel ( const float *  src_weight_gpu,
float *  weight_deform_gpu,
int  nweights,
int  n,
int  kernel_size,
int  reverse 
)
Here is the caller graph for this function:

◆ sam_gpu()

void sam_gpu ( float *  in_w_h_c,
int  size,
int  channel_size,
float *  scales_c,
float *  out 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sam_kernel()

__global__ void sam_kernel ( float *  in_w_h_c,
int  size,
int  channel_size,
float *  scales_c,
float *  out 
)
Here is the caller graph for this function:

◆ scal_add_kernel()

__global__ void scal_add_kernel ( int  N,
float  ALPHA,
float  BETA,
float *  X,
int  INCX 
)
Here is the caller graph for this function:

◆ scal_add_ongpu()

void scal_add_ongpu ( int  N,
float  ALPHA,
float  BETA,
float *  X,
int  INCX 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ scal_kernel()

__global__ void scal_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)
Here is the caller graph for this function:

◆ scal_ongpu()

void scal_ongpu ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ scale_bias_gpu()

void scale_bias_gpu ( float *  output,
float *  scale,
int  batch,
int  filters,
int  spatial 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ scale_bias_kernel()

__global__ void scale_bias_kernel ( float *  output,
float *  scale,
int  batch,
int  filters,
int  spatial,
int  current_size 
)
Here is the caller graph for this function:

◆ scale_channels_gpu()

void scale_channels_gpu ( float *  in_w_h_c,
int  size,
int  channel_size,
int  batch_size,
int  scale_wh,
float *  scales_c,
float *  out 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ scale_channels_kernel()

__global__ void scale_channels_kernel ( float *  in_w_h_c,
int  size,
int  channel_size,
int  batch_size,
int  scale_wh,
float *  scales_c,
float *  out 
)
Here is the caller graph for this function:

◆ shortcut_gpu()

void shortcut_gpu ( int  batch,
int  w1,
int  h1,
int  c1,
float *  add,
int  w2,
int  h2,
int  c2,
float *  out 
)
Here is the call graph for this function:

◆ shortcut_kernel()

__global__ void shortcut_kernel ( int  size,
int  minw,
int  minh,
int  minc,
int  stride,
int  sample,
int  batch,
int  w1,
int  h1,
int  c1,
float *  add,
int  w2,
int  h2,
int  c2,
float *  out 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shortcut_multilayer_gpu()

void shortcut_multilayer_gpu ( int  src_outputs,
int  batch,
int  n,
int *  outputs_of_layers_gpu,
float **  layers_output_gpu,
float *  out,
float *  in,
float *  weights_gpu,
int  nweights,
WEIGHTS_NORMALIZATION_T  weights_normalization 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shortcut_multilayer_kernel()

__global__ void shortcut_multilayer_kernel ( int  size,
int  src_outputs,
int  batch,
int  n,
int *  outputs_of_layers_gpu,
float **  layers_output_gpu,
float *  out,
float *  in,
float *  weights_gpu,
int  nweights,
WEIGHTS_NORMALIZATION_T  weights_normalization 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ shortcut_singlelayer_simple_kernel()

__global__ void shortcut_singlelayer_simple_kernel ( int  size,
int  src_outputs,
int  batch,
int  n,
int *  outputs_of_layers_gpu,
float **  layers_output_gpu,
float *  out,
float *  in,
float *  weights_gpu,
int  nweights,
WEIGHTS_NORMALIZATION_T  weights_normalization 
)
Here is the caller graph for this function:

◆ simple_copy_kernel()

__global__ void simple_copy_kernel ( int  size,
float *  src,
float *  dst 
)
Here is the caller graph for this function:

◆ simple_copy_ongpu()

void simple_copy_ongpu ( int  size,
float *  src,
float *  dst 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ simple_input_shortcut_kernel()

__global__ void simple_input_shortcut_kernel ( float *  in,
int  size,
float *  add,
float *  out 
)
Here is the caller graph for this function:

◆ smooth_l1_gpu()

void smooth_l1_gpu ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ smooth_l1_kernel()

__global__ void smooth_l1_kernel ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)
Here is the caller graph for this function:

◆ smooth_rotate_weights_gpu()

void smooth_rotate_weights_gpu ( const float *  src_weight_gpu,
float *  weight_deform_gpu,
int  nweights,
int  n,
int  size,
int  angle,
int  reverse 
)
Here is the call graph for this function:

◆ smooth_rotate_weights_kernel()

__global__ void smooth_rotate_weights_kernel ( const float *  src_weight_gpu,
float *  weight_deform_gpu,
int  nweights,
int  n,
int  kernel_size,
int  angle,
int  reverse 
)
Here is the caller graph for this function:

◆ softmax_device()

__device__ void softmax_device ( int  n,
float *  input,
float  temp,
float *  output 
)
Here is the caller graph for this function:

◆ softmax_device_new_api()

__device__ void softmax_device_new_api ( float *  input,
int  n,
float  temp,
int  stride,
float *  output 
)
Here is the caller graph for this function:

◆ softmax_gpu()

void softmax_gpu ( float *  input,
int  n,
int  offset,
int  groups,
float  temp,
float *  output 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ softmax_gpu_new_api()

void softmax_gpu_new_api ( float *  input,
int  n,
int  batch,
int  batch_offset,
int  groups,
int  group_offset,
int  stride,
float  temp,
float *  output 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ softmax_kernel()

__global__ void softmax_kernel ( int  n,
int  offset,
int  batch,
float *  input,
float  temp,
float *  output 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ softmax_kernel_new_api()

__global__ void softmax_kernel_new_api ( float *  input,
int  n,
int  batch,
int  batch_offset,
int  groups,
int  group_offset,
int  stride,
float  temp,
float *  output 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ softmax_tree_gpu()

void softmax_tree_gpu ( float *  input,
int  spatial,
int  batch,
int  stride,
float  temp,
float *  output,
Darknet::Tree  hier 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ softmax_tree_kernel()

__global__ void softmax_tree_kernel ( float *  input,
int  spatial,
int  batch,
int  stride,
float  temp,
float *  output,
int  groups,
int *  group_size,
int *  group_offset 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ softmax_x_ent_gpu()

void softmax_x_ent_gpu ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ softmax_x_ent_kernel()

__global__ void softmax_x_ent_kernel ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)
Here is the caller graph for this function:

◆ stretch_sway_flip_weights_gpu()

void stretch_sway_flip_weights_gpu ( const float *  src_weight_gpu,
float *  weight_deform_gpu,
int  nweights,
int  n,
int  size,
int  angle,
int  reverse 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ stretch_sway_flip_weights_kernel()

__global__ void stretch_sway_flip_weights_kernel ( const float *  src_weight_gpu,
float *  weight_deform_gpu,
int  nweights,
int  n,
int  kernel_size,
float  angle,
int  reverse 
)
Here is the caller graph for this function:

◆ stretch_weights_gpu()

void stretch_weights_gpu ( const float *  src_weight_gpu,
float *  weight_deform_gpu,
int  nweights,
int  n,
int  size,
float  scale,
int  reverse 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ stretch_weights_kernel()

__global__ void stretch_weights_kernel ( const float *  src_weight_gpu,
float *  weight_deform_gpu,
int  nweights,
int  n,
int  kernel_size,
float  scale,
int  reverse 
)
Here is the caller graph for this function:

◆ sum_of_mults()

void sum_of_mults ( float *  a1,
float *  a2,
float *  b1,
float *  b2,
size_t  size,
float *  dst 
)
Here is the call graph for this function:

◆ sum_of_mults_kernel()

__global__ void sum_of_mults_kernel ( float *  a1,
float *  a2,
float *  b1,
float *  b2,
size_t  size,
float *  dst 
)
Here is the caller graph for this function:

◆ supp_kernel()

__global__ void supp_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)
Here is the caller graph for this function:

◆ supp_ongpu()

void supp_ongpu ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sway_and_flip_weights_gpu()

void sway_and_flip_weights_gpu ( const float *  src_weight_gpu,
float *  weight_deform_gpu,
int  nweights,
int  n,
int  size,
int  angle,
int  reverse 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sway_and_flip_weights_kernel()

__global__ void sway_and_flip_weights_kernel ( const float *  src_weight_gpu,
float *  weight_deform_gpu,
int  nweights,
int  n,
int  kernel_size,
int  angle,
int  reverse 
)
Here is the caller graph for this function:

◆ upsample_gpu()

void upsample_gpu ( float *  in,
int  w,
int  h,
int  c,
int  batch,
int  stride,
int  forward,
float  scale,
float *  out 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ upsample_kernel()

__global__ void upsample_kernel ( size_t  N,
float *  x,
int  w,
int  h,
int  c,
int  batch,
int  stride,
int  forward,
float  scale,
float *  out 
)
Here is the caller graph for this function:

◆ variance_delta_kernel()

__global__ void variance_delta_kernel ( float *  x,
float *  delta,
float *  mean,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  variance_delta 
)

◆ variance_gpu()

void variance_gpu ( float *  x,
float *  mean,
int  batch,
int  filters,
int  spatial,
float *  variance 
)
Here is the call graph for this function:

◆ variance_kernel()

__global__ void variance_kernel ( float *  x,
float *  mean,
int  batch,
int  filters,
int  spatial,
float *  variance 
)
Here is the caller graph for this function:

◆ warpAllReduceSum()

__inline__ __device__ float warpAllReduceSum ( float  val)
Here is the caller graph for this function:

◆ weighted_delta_gpu()

void weighted_delta_gpu ( float *  a,
float *  b,
float *  s,
float *  da,
float *  db,
float *  ds,
int  num,
float *  dc 
)
Here is the call graph for this function:

◆ weighted_delta_kernel()

__global__ void weighted_delta_kernel ( int  n,
float *  a,
float *  b,
float *  s,
float *  da,
float *  db,
float *  ds,
float *  dc 
)
Here is the caller graph for this function:

◆ weighted_sum_gpu()

void weighted_sum_gpu ( float *  a,
float *  b,
float *  s,
int  num,
float *  c 
)
Here is the call graph for this function:

◆ weighted_sum_kernel()

__global__ void weighted_sum_kernel ( int  n,
float *  a,
float *  b,
float *  s,
float *  c 
)
Here is the caller graph for this function: