Deep Neural Network Library (DNNL)  1.90.1
Performance library for Deep Learning
cnn_inference_f32.c

Annotated version: CNN f32 inference example

/*******************************************************************************
* Copyright 2016-2019 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
// Required for posix_memalign
#define _POSIX_C_SOURCE 200112L
#include <stdlib.h>
#include "example_macros.h"
#include "example_utils.h"
#define BATCH 8
#define IC 3
#define OC 96
#define CONV_IH 227
#define CONV_IW 227
#define CONV_OH 55
#define CONV_OW 55
#define CONV_STRIDE 4
#define CONV_PAD 0
#define POOL_OH 27
#define POOL_OW 27
#define POOL_STRIDE 2
#define POOL_PAD 0
static size_t product(dnnl_dim_t *arr, size_t size) {
size_t prod = 1;
for (size_t i = 0; i < size; ++i)
prod *= arr[i];
return prod;
}
typedef struct {
int nargs;
} args_t;
static void prepare_arg_node(args_t *node, int nargs) {
node->args = (dnnl_exec_arg_t *)malloc(sizeof(dnnl_exec_arg_t) * nargs);
CHECK_NULL(node->args);
node->nargs = nargs;
}
static void free_arg_node(args_t *node) {
free(node->args);
}
static void set_arg(dnnl_exec_arg_t *arg, int arg_idx, dnnl_memory_t memory) {
arg->arg = arg_idx;
arg->memory = memory;
}
static void init_data_memory(uint32_t dim, const dnnl_dim_t *dims,
dnnl_engine_t engine, float *data, dnnl_memory_t *memory) {
&user_md, dim, dims, dnnl_f32, user_tag));
CHECK(dnnl_memory_create(memory, &user_md, engine, DNNL_MEMORY_ALLOCATE));
write_to_dnnl_memory(data, *memory);
}
dnnl_status_t prepare_reorder(dnnl_memory_t *user_memory, // in
const dnnl_memory_desc_t *prim_memory_md, // in
dnnl_engine_t prim_engine, // in: primitive's engine
int dir_is_user_to_prim, // in: user -> prim or prim -> user
dnnl_memory_t *prim_memory, // out: primitive's memory created
dnnl_primitive_t *reorder, // out: reorder primitive created
uint32_t *net_index, // primitive index in net (inc if reorder created)
dnnl_primitive_t *net, args_t *net_args) { // net params
const dnnl_memory_desc_t *user_memory_md;
dnnl_memory_get_memory_desc(*user_memory, &user_memory_md);
dnnl_engine_t user_mem_engine;
dnnl_memory_get_engine(*user_memory, &user_mem_engine);
if (!dnnl_memory_desc_equal(user_memory_md, prim_memory_md)) {
// memory_create(&p, m, NULL) means allocate memory
CHECK(dnnl_memory_create(prim_memory, prim_memory_md, prim_engine,
DNNL_MEMORY_ALLOCATE));
if (dir_is_user_to_prim) {
user_memory_md, user_mem_engine, prim_memory_md,
prim_engine, NULL));
} else {
prim_memory_md, prim_engine, user_memory_md,
user_mem_engine, NULL));
}
CHECK(dnnl_primitive_create(reorder, reorder_pd));
CHECK(dnnl_primitive_desc_destroy(reorder_pd));
net[*net_index] = *reorder;
prepare_arg_node(&net_args[*net_index], 2);
set_arg(&net_args[*net_index].args[0], DNNL_ARG_FROM,
dir_is_user_to_prim ? *user_memory : *prim_memory);
set_arg(&net_args[*net_index].args[1], DNNL_ARG_TO,
dir_is_user_to_prim ? *prim_memory : *user_memory);
(*net_index)++;
} else {
*prim_memory = NULL;
*reorder = NULL;
}
return dnnl_success;
}
dnnl_status_t simple_net(dnnl_engine_kind_t engine_kind) {
CHECK(dnnl_engine_create(&engine, engine_kind, 0));
// build a simple net
uint32_t n = 0;
args_t net_args[10];
float *net_src
= (float *)malloc(BATCH * IC * CONV_IH * CONV_IW * sizeof(float));
float *net_dst
= (float *)malloc(BATCH * OC * POOL_OH * POOL_OW * sizeof(float));
CHECK_NULL(net_src);
CHECK_NULL(net_dst);
// AlexNet: conv
// {BATCH, IC, CONV_IH, CONV_IW} (x) {OC, IC, CONV_KH, CONV_KW} ->
// {BATCH, OC, CONV_OH, CONV_OW}
// strides: {CONV_STRIDE, CONV_STRIDE}
dnnl_dim_t conv_user_src_sizes[4] = {BATCH, IC, CONV_IH, CONV_IW};
dnnl_dim_t conv_user_weights_sizes[4] = {OC, IC, 11, 11};
dnnl_dim_t conv_bias_sizes[4] = {OC};
dnnl_dim_t conv_user_dst_sizes[4] = {BATCH, OC, CONV_OH, CONV_OW};
dnnl_dim_t conv_strides[2] = {CONV_STRIDE, CONV_STRIDE};
dnnl_dim_t conv_padding[2] = {CONV_PAD, CONV_PAD};
float *conv_src = net_src;
float *conv_weights = (float *)malloc(
product(conv_user_weights_sizes, 4) * sizeof(float));
float *conv_bias
= (float *)malloc(product(conv_bias_sizes, 1) * sizeof(float));
CHECK_NULL(conv_src);
CHECK_NULL(conv_weights);
CHECK_NULL(conv_bias);
// create memory for user data
dnnl_memory_t conv_user_src_memory, conv_user_weights_memory,
conv_user_bias_memory;
init_data_memory(4, conv_user_src_sizes, dnnl_nchw, dnnl_f32, engine,
conv_src, &conv_user_src_memory);
init_data_memory(4, conv_user_weights_sizes, dnnl_oihw, dnnl_f32, engine,
conv_weights, &conv_user_weights_memory);
init_data_memory(1, conv_bias_sizes, dnnl_x, dnnl_f32, engine, conv_bias,
&conv_user_bias_memory);
// create data descriptors for convolution w/ no specified format
dnnl_memory_desc_t conv_src_md, conv_weights_md, conv_bias_md, conv_dst_md;
CHECK(dnnl_memory_desc_init_by_tag(&conv_src_md, 4, conv_user_src_sizes,
CHECK(dnnl_memory_desc_init_by_tag(&conv_weights_md, 4,
conv_user_weights_sizes, dnnl_f32, dnnl_format_tag_any));
&conv_bias_md, 1, conv_bias_sizes, dnnl_f32, dnnl_x));
CHECK(dnnl_memory_desc_init_by_tag(&conv_dst_md, 4, conv_user_dst_sizes,
// create a convolution
dnnl_convolution_desc_t conv_any_desc;
dnnl_convolution_direct, &conv_src_md, &conv_weights_md,
&conv_bias_md, &conv_dst_md, conv_strides, conv_padding,
conv_padding));
&conv_pd, &conv_any_desc, NULL, engine, NULL));
dnnl_memory_t conv_internal_src_memory, conv_internal_weights_memory,
conv_internal_dst_memory;
// create memory for dst data, we don't need reorder it to user data
&conv_internal_dst_memory, dst_md, engine, DNNL_MEMORY_ALLOCATE));
// create reorder primitives between user data and convolution srcs
// if required
dnnl_primitive_t conv_reorder_src, conv_reorder_weights;
CHECK(prepare_reorder(&conv_user_src_memory, src_md, engine, 1,
&conv_internal_src_memory, &conv_reorder_src, &n, net, net_args));
CHECK(prepare_reorder(&conv_user_weights_memory, weights_md, engine, 1,
&conv_internal_weights_memory, &conv_reorder_weights, &n, net,
net_args));
dnnl_memory_t conv_src_memory = conv_internal_src_memory
? conv_internal_src_memory
: conv_user_src_memory;
dnnl_memory_t conv_weights_memory = conv_internal_weights_memory
? conv_internal_weights_memory
: conv_user_weights_memory;
// finally create a convolution primitive
CHECK(dnnl_primitive_create(&conv, conv_pd));
net[n] = conv;
prepare_arg_node(&net_args[n], 4);
set_arg(&net_args[n].args[0], DNNL_ARG_SRC, conv_src_memory);
set_arg(&net_args[n].args[1], DNNL_ARG_WEIGHTS, conv_weights_memory);
set_arg(&net_args[n].args[2], DNNL_ARG_BIAS, conv_user_bias_memory);
set_arg(&net_args[n].args[3], DNNL_ARG_DST, conv_internal_dst_memory);
n++;
// AlexNet: relu
// {BATCH, OC, CONV_OH, CONV_OW} -> {BATCH, OC, CONV_OH, CONV_OW}
float negative_slope = 1.0f;
// create relu memory descriptor on dst memory descriptor
// from previous primitive
const dnnl_memory_desc_t *relu_src_md
// create a relu
dnnl_eltwise_relu, relu_src_md, negative_slope, 0));
CHECK(dnnl_primitive_desc_create(&relu_pd, &relu_desc, NULL, engine, NULL));
dnnl_memory_t relu_dst_memory;
const dnnl_memory_desc_t *relu_dst_md
&relu_dst_memory, relu_dst_md, engine, DNNL_MEMORY_ALLOCATE));
// finally create a relu primitive
CHECK(dnnl_primitive_create(&relu, relu_pd));
net[n] = relu;
prepare_arg_node(&net_args[n], 2);
set_arg(&net_args[n].args[0], DNNL_ARG_SRC, conv_internal_dst_memory);
set_arg(&net_args[n].args[1], DNNL_ARG_DST, relu_dst_memory);
n++;
// AlexNet: lrn
// {BATCH, OC, CONV_OH, CONV_OW} -> {BATCH, OC, CONV_OH, CONV_OW}
// local size: 5
// alpha: 0.0001
// beta: 0.75
uint32_t local_size = 5;
float alpha = 0.0001f;
float beta = 0.75f;
float k = 1.0f;
// create lrn memory descriptor on dst memory descriptor
// from previous primitive
const dnnl_memory_desc_t *lrn_src_md = relu_dst_md;
// create a lrn
dnnl_lrn_desc_t lrn_desc;
dnnl_lrn_across_channels, lrn_src_md, local_size, alpha, beta, k));
CHECK(dnnl_primitive_desc_create(&lrn_pd, &lrn_desc, NULL, engine, NULL));
dnnl_memory_t lrn_dst_memory;
const dnnl_memory_desc_t *lrn_dst_md
&lrn_dst_memory, lrn_dst_md, engine, DNNL_MEMORY_ALLOCATE));
dnnl_memory_t lrn_ws_memory;
const dnnl_memory_desc_t *lrn_ws_md
&lrn_ws_memory, lrn_ws_md, engine, DNNL_MEMORY_ALLOCATE));
// finally create a lrn primitive
CHECK(dnnl_primitive_create(&lrn, lrn_pd));
net[n] = lrn;
prepare_arg_node(&net_args[n], 3);
set_arg(&net_args[n].args[0], DNNL_ARG_SRC, relu_dst_memory);
set_arg(&net_args[n].args[1], DNNL_ARG_DST, lrn_dst_memory);
set_arg(&net_args[n].args[2], DNNL_ARG_WORKSPACE, lrn_ws_memory);
n++;
// AlexNet: pool
// {BATCH, OC, CONV_OH, CONV_OW} -> {BATCH, OC, POOL_OH, POOL_OW}
// kernel: {3, 3}
// strides: {POOL_STRIDE, POOL_STRIDE}
dnnl_dim_t pool_dst_sizes[4] = {BATCH, OC, POOL_OH, POOL_OW};
dnnl_dim_t pool_kernel[2] = {3, 3};
dnnl_dim_t pool_strides[2] = {POOL_STRIDE, POOL_STRIDE};
dnnl_dim_t pool_padding[2] = {POOL_PAD, POOL_PAD};
// create pooling memory descriptor on dst descriptor
// from previous primitive
const dnnl_memory_desc_t *pool_src_md = lrn_dst_md;
// create descriptors for dst pooling data
dnnl_memory_desc_t pool_dst_any_md;
CHECK(dnnl_memory_desc_init_by_tag(&pool_dst_any_md, 4, pool_dst_sizes,
// create memory for user data
dnnl_memory_t pool_user_dst_memory;
init_data_memory(4, pool_dst_sizes, dnnl_nchw, dnnl_f32, engine, net_dst,
&pool_user_dst_memory);
// create a pooling
dnnl_pooling_max, pool_src_md, &pool_dst_any_md, pool_strides,
pool_kernel, pool_padding, pool_padding));
CHECK(dnnl_primitive_desc_create(&pool_pd, &pool_desc, NULL, engine, NULL));
// create memory for workspace
dnnl_memory_t pool_ws_memory;
const dnnl_memory_desc_t *pool_ws_md
&pool_ws_memory, pool_ws_md, engine, DNNL_MEMORY_ALLOCATE));
dnnl_memory_t pool_dst_memory;
// create reorder primitives between user data and pooling dsts
// if required
dnnl_primitive_t pool_reorder_dst;
dnnl_memory_t pool_internal_dst_memory;
const dnnl_memory_desc_t *pool_dst_md
n += 1; // tentative workaround: preserve space for pooling that should
// happen before the reorder
CHECK(prepare_reorder(&pool_user_dst_memory, pool_dst_md, engine, 0,
&pool_internal_dst_memory, &pool_reorder_dst, &n, net, net_args));
n -= pool_reorder_dst ? 2 : 1;
pool_dst_memory = pool_internal_dst_memory ? pool_internal_dst_memory
: pool_user_dst_memory;
// finally create a pooling primitive
CHECK(dnnl_primitive_create(&pool, pool_pd));
net[n] = pool;
prepare_arg_node(&net_args[n], 3);
set_arg(&net_args[n].args[0], DNNL_ARG_SRC, lrn_dst_memory);
set_arg(&net_args[n].args[1], DNNL_ARG_DST, pool_dst_memory);
set_arg(&net_args[n].args[2], DNNL_ARG_WORKSPACE, pool_ws_memory);
n++;
if (pool_reorder_dst) n += 1;
dnnl_stream_t stream;
for (uint32_t i = 0; i < n; ++i) {
net[i], stream, net_args[i].nargs, net_args[i].args));
}
CHECK(dnnl_stream_wait(stream));
// clean-up
for (uint32_t i = 0; i < n; ++i)
free_arg_node(&net_args[i]);
CHECK(dnnl_primitive_desc_destroy(conv_pd));
CHECK(dnnl_primitive_desc_destroy(relu_pd));
CHECK(dnnl_primitive_desc_destroy(pool_pd));
free(net_src);
free(net_dst);
dnnl_memory_destroy(conv_user_src_memory);
dnnl_memory_destroy(conv_user_weights_memory);
dnnl_memory_destroy(conv_user_bias_memory);
dnnl_memory_destroy(conv_internal_src_memory);
dnnl_memory_destroy(conv_internal_weights_memory);
dnnl_memory_destroy(conv_internal_dst_memory);
dnnl_primitive_destroy(conv_reorder_src);
dnnl_primitive_destroy(conv_reorder_weights);
free(conv_weights);
free(conv_bias);
dnnl_memory_destroy(relu_dst_memory);
dnnl_memory_destroy(lrn_ws_memory);
dnnl_memory_destroy(lrn_dst_memory);
dnnl_memory_destroy(pool_user_dst_memory);
dnnl_memory_destroy(pool_internal_dst_memory);
dnnl_memory_destroy(pool_ws_memory);
dnnl_primitive_destroy(pool_reorder_dst);
return dnnl_success;
}
int main(int argc, char **argv) {
dnnl_status_t result = simple_net(parse_engine_kind(argc, argv));
printf("%s\n",
(result == dnnl_success) ? "Simple net f32 inference passed!"
: "Simple net f32 inference failed!");
return result;
}
dnnl_convolution_forward_desc_init
dnnl_status_t DNNL_API dnnl_convolution_forward_desc_init(dnnl_convolution_desc_t *conv_desc, dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind, const dnnl_memory_desc_t *src_desc, const dnnl_memory_desc_t *weights_desc, const dnnl_memory_desc_t *bias_desc, const dnnl_memory_desc_t *dst_desc, const dnnl_dims_t strides, const dnnl_dims_t padding_l, const dnnl_dims_t padding_r)
Initializes a convolution descriptor conv_desc for forward propagation using prop_kind (possible valu...
dnnl_f32
32-bit/single-precision floating point.
Definition: dnnl_types.h:75
dnnl_memory
dnnl_stream_destroy
dnnl_status_t DNNL_API dnnl_stream_destroy(dnnl_stream_t stream)
Destroys an execution stream.
dnnl_engine
An opaque structure to describe an engine.
dnnl_primitive_desc_create
dnnl_status_t DNNL_API dnnl_primitive_desc_create(dnnl_primitive_desc_t *primitive_desc, const_dnnl_op_desc_t op_desc, const_dnnl_primitive_attr_t attr, dnnl_engine_t engine, const_dnnl_primitive_desc_t hint_forward_primitive_desc)
Creates a primitive_desc using op_desc, attr, engine, and optionally a hint primitive descriptor from...
dnnl_engine_kind_t
dnnl_engine_kind_t
Kinds of engines.
Definition: dnnl_types.h:1320
dnnl_convolution_desc_t
A descriptor of a convolution operation.
Definition: dnnl_types.h:956
dnnl_status_t
dnnl_status_t
Status values returned by the library functions.
Definition: dnnl_types.h:49
dnnl::query::dst_md
destination memory desc
dnnl_memory_get_memory_desc
dnnl_status_t DNNL_API dnnl_memory_get_memory_desc(const_dnnl_memory_t memory, const dnnl_memory_desc_t **memory_desc)
Returns a memory_desc associated with memory.
dnnl_exec_arg_t
An auxiliary structure to specify primitive's inputs/outputs at execution.
Definition: dnnl_types.h:1519
dnnl::query::engine
execution engine
dnnl::query::weights_md
weights memory descriptor desc
dnnl_oihw
4D CNN weights tensor, an alias to dnnl_abcd
Definition: dnnl_types.h:374
dnnl_pooling_max
Max pooling.
Definition: dnnl_types.h:694
dnnl_query_workspace_md
workspace memory desc
Definition: dnnl_types.h:1607
dnnl_pooling_desc_t
A descriptor of a pooling operation.
Definition: dnnl_types.h:1068
dnnl_lrn_across_channels
Local response normalization (LRN) across multiple channels.
Definition: dnnl_types.h:701
dnnl::query::src_md
source memory desc
dnnl_success
The operation was successful.
Definition: dnnl_types.h:51
dnnl_lrn_forward_desc_init
dnnl_status_t DNNL_API dnnl_lrn_forward_desc_init(dnnl_lrn_desc_t *lrn_desc, dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind, const dnnl_memory_desc_t *data_desc, dnnl_dim_t local_size, float alpha, float beta, float k)
Initializes an lrn_desc for forward propagation using prop_kind (possible values are dnnl_forward_tra...
dnnl_exec_arg_t::memory
dnnl_memory_t memory
Input/output memory.
Definition: dnnl_types.h:1521
dnnl_memory_destroy
dnnl_status_t DNNL_API dnnl_memory_destroy(dnnl_memory_t memory)
Deletes a memory.
dnnl_primitive_desc
An opaque structure to describe a primitive descriptor.
dnnl_primitive_desc_destroy
dnnl_status_t DNNL_API dnnl_primitive_desc_destroy(dnnl_primitive_desc_t primitive_desc)
Deletes a primitive_desc.
dnnl_query_src_md
source memory desc
Definition: dnnl_types.h:1601
dnnl_memory_desc_equal
int DNNL_API dnnl_memory_desc_equal(const dnnl_memory_desc_t *lhs, const dnnl_memory_desc_t *rhs)
Compares two memory descriptors.
dnnl_query_weights_md
weights memory descriptor desc
Definition: dnnl_types.h:1603
dnnl_memory_desc_init_by_tag
dnnl_status_t DNNL_API dnnl_memory_desc_init_by_tag(dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims, dnnl_data_type_t data_type, dnnl_format_tag_t tag)
Initializes a memory_desc memory descriptor using ndims, dims, data_type, and format tag.
dnnl_primitive
dnnl_engine_destroy
dnnl_status_t DNNL_API dnnl_engine_destroy(dnnl_engine_t engine)
Destroys an engine.
dnnl_memory_create
dnnl_status_t DNNL_API dnnl_memory_create(dnnl_memory_t *memory, const dnnl_memory_desc_t *memory_desc, dnnl_engine_t engine, void *handle)
Creates a memory for given memory_desc and engine.
dnnl_engine_create
dnnl_status_t DNNL_API dnnl_engine_create(dnnl_engine_t *engine, dnnl_engine_kind_t kind, size_t index)
Creates an engine of particular kind and index.
dnnl_lrn_desc_t
A descriptor of a Local Response Normalization (LRN) operation.
Definition: dnnl_types.h:1101
dnnl_exec_arg_t::arg
int arg
An argument index, e.g. DNNL_ARG_SRC.
Definition: dnnl_types.h:1520
dnnl_eltwise_desc_t
A descriptor of a element-wise operation.
Definition: dnnl_types.h:1016
dnnl_memory_get_engine
dnnl_status_t DNNL_API dnnl_memory_get_engine(const_dnnl_memory_t memory, dnnl_engine_t *engine)
Returns an engine associated with memory.
dnnl_primitive_desc_query_md
const dnnl_memory_desc_t DNNL_API * dnnl_primitive_desc_query_md(const_dnnl_primitive_desc_t primitive_desc, dnnl_query_t what, int index)
Queries primitive descriptor for memory descriptor.
dnnl_stream_wait
dnnl_status_t DNNL_API dnnl_stream_wait(dnnl_stream_t stream)
Waits for all primitives in the execution stream to finish.
dnnl_nchw
4D CNN activations tensor, an alias to dnnl_abcd
Definition: dnnl_types.h:351
dnnl_reorder_primitive_desc_create
dnnl_status_t DNNL_API dnnl_reorder_primitive_desc_create(dnnl_primitive_desc_t *reorder_primitive_desc, const dnnl_memory_desc_t *src_md, dnnl_engine_t src_engine, const dnnl_memory_desc_t *dst_md, dnnl_engine_t dst_engine, const_dnnl_primitive_attr_t attr)
Initializes a reorder_primitive_desc using the description of the source (src_engine and src_md) and ...
dnnl_primitive_create
dnnl_status_t DNNL_API dnnl_primitive_create(dnnl_primitive_t *primitive, const_dnnl_primitive_desc_t primitive_desc)
Creates a primitive using a primitive_desc descriptor.
dnnl_data_type_t
dnnl_data_type_t
Data type specification.
Definition: dnnl_types.h:67
dnnl_query_dst_md
destination memory desc
Definition: dnnl_types.h:1605
dnnl_format_tag_any
Undefined memory format tag.
Definition: dnnl_types.h:174
dnnl_memory_desc_t
Memory descriptor.
Definition: dnnl_types.h:883
dnnl_eltwise_relu
Eltwise: ReLU.
Definition: dnnl_types.h:665
dnnl_dim_t
int64_t dnnl_dim_t
A type to describe tensor dimension.
Definition: dnnl_types.h:777
dnnl_eltwise_forward_desc_init
dnnl_status_t DNNL_API dnnl_eltwise_forward_desc_init(dnnl_eltwise_desc_t *eltwise_desc, dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind, const dnnl_memory_desc_t *data_desc, float alpha, float beta)
Initializes an eltwise_desc for forward propagation using prop_kind (possible values are dnnl_forward...
dnnl_primitive_destroy
dnnl_status_t DNNL_API dnnl_primitive_destroy(dnnl_primitive_t primitive)
Deletes a primitive.
dnnl_stream_default_flags
Default stream configuration.
Definition: dnnl_types.h:1626
dnnl_format_tag_t
dnnl_format_tag_t
Memory format tag specification.
Definition: dnnl_types.h:169
dnnl_stream_create
dnnl_status_t DNNL_API dnnl_stream_create(dnnl_stream_t *stream, dnnl_engine_t engine, unsigned flags)
Creates an execution stream for engine and with flags.
dnnl_stream
dnnl_convolution_direct
Direct convolution.
Definition: dnnl_types.h:655
dnnl_forward
Forward data propagation (alias for dnnl_forward_training).
Definition: dnnl_types.h:601
dnnl_x
1D tensor, an alias to dnnl_a
Definition: dnnl_types.h:337
dnnl_primitive_execute
dnnl_status_t DNNL_API dnnl_primitive_execute(const_dnnl_primitive_t primitive, dnnl_stream_t stream, int nargs, const dnnl_exec_arg_t *args)
Executes a primitive using a stream, and nargs arguments args.
dnnl_pooling_forward_desc_init
dnnl_status_t DNNL_API dnnl_pooling_forward_desc_init(dnnl_pooling_desc_t *pool_desc, dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind, const dnnl_memory_desc_t *src_desc, const dnnl_memory_desc_t *dst_desc, const dnnl_dims_t strides, const dnnl_dims_t kernel, const dnnl_dims_t padding_l, const dnnl_dims_t padding_r)
Initializes a pooling descriptor pool_desc for forward propagation using prop_kind (possible values a...