327 lines
9.5 KiB
C++
327 lines
9.5 KiB
C++
#pragma once
|
|
#include <map>
|
|
#include "common.hpp"
|
|
|
|
enum RESNETTYPE {
|
|
R18 = 0,
|
|
R34,
|
|
R50,
|
|
R101,
|
|
R152
|
|
};
|
|
|
|
const std::map<RESNETTYPE, std::vector<int>> num_blocks_per_stage = {
|
|
{R18, {2, 2, 2, 2}},
|
|
{R34, {3, 4, 6, 3}},
|
|
{R50, {3, 4, 6, 3}},
|
|
{R101, {3, 4, 23, 3}},
|
|
{R152, {3, 8, 36, 3}}
|
|
};
|
|
|
|
IScaleLayer* addBatchNorm2d(
|
|
INetworkDefinition *network,
|
|
std::unordered_map<std::string, Weights>& weightMap,
|
|
ITensor& input,
|
|
const std::string& lname,
|
|
float eps = 1e-5
|
|
) {
|
|
float *gamma = (float*)(weightMap[lname + ".weight"].values);
|
|
float *beta = (float*)(weightMap[lname + ".bias"].values);
|
|
float *mean = (float*)(weightMap[lname + ".running_mean"].values);
|
|
float *var = (float*)(weightMap[lname + ".running_var"].values);
|
|
int len = weightMap[lname + ".running_var"].count;
|
|
|
|
float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
|
|
for (int i = 0; i < len; i++) {
|
|
scval[i] = gamma[i] / sqrt(var[i] + eps);
|
|
}
|
|
Weights scale{ DataType::kFLOAT, scval, len };
|
|
|
|
float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
|
|
for (int i = 0; i < len; i++) {
|
|
shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
|
|
}
|
|
Weights shift{ DataType::kFLOAT, shval, len };
|
|
|
|
float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
|
|
for (int i = 0; i < len; i++) {
|
|
pval[i] = 1.0;
|
|
}
|
|
Weights power{ DataType::kFLOAT, pval, len };
|
|
|
|
weightMap[lname + ".scale"] = scale;
|
|
weightMap[lname + ".shift"] = shift;
|
|
weightMap[lname + ".power"] = power;
|
|
IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
|
|
assert(scale_1);
|
|
return scale_1;
|
|
}
|
|
|
|
ILayer* BasicStem(
|
|
INetworkDefinition *network,
|
|
std::unordered_map<std::string, Weights>& weightMap,
|
|
const std::string& lname,
|
|
ITensor& input,
|
|
int out_channels,
|
|
int group_num = 1
|
|
) {
|
|
// conv1
|
|
Weights emptywts{ DataType::kFLOAT, nullptr, 0 };
|
|
IConvolutionLayer* conv1 = network->addConvolutionNd(
|
|
input,
|
|
out_channels,
|
|
DimsHW{ 7, 7 },
|
|
weightMap[lname + ".conv1.weight"],
|
|
emptywts);
|
|
assert(conv1);
|
|
conv1->setStrideNd(DimsHW{ 2, 2 });
|
|
conv1->setPaddingNd(DimsHW{ 3, 3 });
|
|
conv1->setNbGroups(group_num);
|
|
|
|
auto bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + ".bn1");
|
|
assert(bn1);
|
|
|
|
auto r1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
|
|
assert(r1);
|
|
|
|
auto max_pool2d = network->addPoolingNd(*r1->getOutput(0), PoolingType::kMAX, DimsHW{ 3, 3 });
|
|
max_pool2d->setStrideNd(DimsHW{ 2, 2 });
|
|
max_pool2d->setPaddingNd(DimsHW{ 1, 1 });
|
|
auto mp_dim = max_pool2d->getOutput(0)->getDimensions();
|
|
return max_pool2d;
|
|
}
|
|
|
|
ITensor* BasicBlock(
|
|
INetworkDefinition *network,
|
|
std::unordered_map<std::string, Weights>& weightMap,
|
|
const std::string& lname,
|
|
ITensor& input,
|
|
int in_channels,
|
|
int out_channels,
|
|
int stride = 1
|
|
) {
|
|
// conv1
|
|
IConvolutionLayer* conv1 = network->addConvolutionNd(
|
|
input,
|
|
out_channels,
|
|
DimsHW{ 3, 3 },
|
|
weightMap[lname + ".conv1.weight"],
|
|
weightMap[lname + ".conv1.bias"]);
|
|
assert(conv1);
|
|
conv1->setStrideNd(DimsHW{ stride, stride });
|
|
conv1->setPaddingNd(DimsHW{ 1, 1 });
|
|
|
|
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
|
|
assert(r1);
|
|
|
|
// conv2
|
|
IConvolutionLayer* conv2 = network->addConvolutionNd(
|
|
*r1->getOutput(0),
|
|
out_channels, DimsHW{ 3, 3 },
|
|
weightMap[lname + ".conv2.weight"],
|
|
weightMap[lname + ".conv2.bias"]);
|
|
assert(conv2);
|
|
conv2->setStrideNd(DimsHW{ 1, 1 });
|
|
conv2->setPaddingNd(DimsHW{ 1, 1 });
|
|
|
|
// shortcut
|
|
ITensor* shortcut_value = nullptr;
|
|
if (in_channels != out_channels) {
|
|
auto shortcut = network->addConvolutionNd(
|
|
input,
|
|
out_channels,
|
|
DimsHW{ 1, 1 },
|
|
weightMap[lname + ".shortcut.weight"],
|
|
weightMap[lname + ".shortcut.bias"]);
|
|
assert(shortcut);
|
|
shortcut->setStrideNd(DimsHW{ stride, stride });
|
|
shortcut_value = shortcut->getOutput(0);
|
|
} else {
|
|
shortcut_value = &input;
|
|
}
|
|
|
|
// add
|
|
auto ew = network->addElementWise(*conv2->getOutput(0), *shortcut_value, ElementWiseOperation::kSUM);
|
|
assert(ew);
|
|
|
|
auto r3 = network->addActivation(*ew->getOutput(0), ActivationType::kRELU);
|
|
assert(r3);
|
|
|
|
return r3->getOutput(0);
|
|
}
|
|
|
|
ITensor* BottleneckBlock(
|
|
INetworkDefinition *network,
|
|
std::unordered_map<std::string, Weights>& weightMap,
|
|
const std::string& lname,
|
|
ITensor& input,
|
|
int in_channels,
|
|
int bottleneck_channels,
|
|
int out_channels,
|
|
int stride = 1,
|
|
int dilation = 1,
|
|
int group_num = 1
|
|
) {
|
|
Weights emptywts{ DataType::kFLOAT, nullptr, 0 };
|
|
// conv1
|
|
IConvolutionLayer* conv1 = network->addConvolutionNd(
|
|
input,
|
|
bottleneck_channels,
|
|
DimsHW{ 1, 1 },
|
|
weightMap[lname + ".conv1.weight"],
|
|
emptywts);
|
|
assert(conv1);
|
|
conv1->setStrideNd(DimsHW{ 1, 1 });
|
|
conv1->setNbGroups(group_num);
|
|
|
|
auto bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + ".bn1");
|
|
assert(bn1);
|
|
|
|
auto r1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
|
|
assert(r1);
|
|
|
|
// conv2
|
|
IConvolutionLayer* conv2 = network->addConvolutionNd(
|
|
*r1->getOutput(0),
|
|
bottleneck_channels,
|
|
DimsHW{ 3, 3 },
|
|
weightMap[lname + ".conv2.weight"],
|
|
emptywts);
|
|
assert(conv2);
|
|
conv2->setStrideNd(DimsHW{ stride, stride });
|
|
conv2->setPaddingNd(DimsHW{ 1 * dilation, 1 * dilation });
|
|
conv2->setDilationNd(DimsHW{ dilation, dilation });
|
|
conv2->setNbGroups(group_num);
|
|
|
|
auto bn2 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + ".bn2");
|
|
assert(bn2);
|
|
|
|
auto r2 = network->addActivation(*bn2->getOutput(0), ActivationType::kRELU);
|
|
assert(r2);
|
|
|
|
// conv3
|
|
IConvolutionLayer* conv3 = network->addConvolutionNd(
|
|
*r2->getOutput(0),
|
|
out_channels,
|
|
DimsHW{ 1, 1 },
|
|
weightMap[lname + ".conv3.weight"],
|
|
emptywts);
|
|
assert(conv3);
|
|
conv3->setStrideNd(DimsHW{ 1, 1 });
|
|
conv3->setNbGroups(group_num);
|
|
|
|
auto bn3 = addBatchNorm2d(network, weightMap, *conv3->getOutput(0), lname + ".bn3");
|
|
assert(bn3);
|
|
|
|
// shortcut
|
|
ITensor* shortcut_value = nullptr;
|
|
if (in_channels != out_channels) {
|
|
auto shortcut = network->addConvolutionNd(
|
|
input,
|
|
out_channels,
|
|
DimsHW{ 1, 1 },
|
|
weightMap[lname + ".downsample.0.weight"],
|
|
emptywts);
|
|
assert(shortcut);
|
|
shortcut->setStrideNd(DimsHW{stride, stride});
|
|
shortcut->setNbGroups(group_num);
|
|
|
|
auto shortcut_bn = addBatchNorm2d(network, weightMap, *shortcut->getOutput(0), lname + ".downsample.1");
|
|
assert(shortcut_bn);
|
|
shortcut_value = shortcut_bn->getOutput(0);
|
|
} else {
|
|
shortcut_value = &input;
|
|
}
|
|
|
|
// add
|
|
auto ew = network->addElementWise(*bn3->getOutput(0), *shortcut_value, ElementWiseOperation::kSUM);
|
|
assert(ew);
|
|
|
|
auto r3 = network->addActivation(*ew->getOutput(0), ActivationType::kRELU);
|
|
assert(r3);
|
|
|
|
return r3->getOutput(0);
|
|
}
|
|
|
|
ITensor* MakeStage(
|
|
INetworkDefinition *network,
|
|
std::unordered_map<std::string, Weights>& weightMap,
|
|
const std::string& lname,
|
|
ITensor& input,
|
|
int stage,
|
|
RESNETTYPE resnet_type,
|
|
int in_channels,
|
|
int bottleneck_channels,
|
|
int out_channels,
|
|
int first_stride = 1,
|
|
int dilation = 1
|
|
) {
|
|
ITensor* out = &input;
|
|
for (int i = 0; i < stage; i++) {
|
|
std::string layerName = lname + "." + std::to_string(i);
|
|
int stride = i == 0 ? first_stride : 1;
|
|
|
|
if (resnet_type == R18 || resnet_type == R34)
|
|
out = BasicBlock(network, weightMap, layerName, *out, in_channels, out_channels, stride);
|
|
else
|
|
out = BottleneckBlock(
|
|
network,
|
|
weightMap,
|
|
layerName,
|
|
*out,
|
|
in_channels,
|
|
bottleneck_channels,
|
|
out_channels,
|
|
stride,
|
|
dilation);
|
|
|
|
in_channels = out_channels;
|
|
}
|
|
return out;
|
|
}
|
|
|
|
ITensor* BuildResNet(
|
|
INetworkDefinition *network,
|
|
std::unordered_map<std::string, Weights>& weightMap,
|
|
ITensor& input,
|
|
RESNETTYPE resnet_type,
|
|
int stem_out_channels,
|
|
int bottleneck_channels,
|
|
int res2_out_channels,
|
|
int res5_dilation = 1
|
|
) {
|
|
assert(res5_dilation == 1 || res5_dilation == 2); // "res5_dilation must be 1 or 2"
|
|
if (resnet_type == R18 || resnet_type == R34) {
|
|
assert(res2_out_channels == 64); // "res2_out_channels must be 64 for R18/R34")
|
|
assert(res5_dilation == 1); // "res5_dilation must be 1 for R18/R34")
|
|
}
|
|
|
|
int out_channels = res2_out_channels;
|
|
ITensor* out = nullptr;
|
|
// stem
|
|
auto stem = BasicStem(network, weightMap, "backbone.0.body", input, stem_out_channels);
|
|
out = stem->getOutput(0);
|
|
|
|
// res
|
|
for (int i = 0; i < 4; i++) {
|
|
int dilation = (i == 3) ? res5_dilation : 1;
|
|
int first_stride = (i == 0 || (i == 3 && dilation == 2)) ? 1 : 2;
|
|
out = MakeStage(
|
|
network,
|
|
weightMap,
|
|
"backbone.0.body.layer" + std::to_string(i + 1),
|
|
*out,
|
|
num_blocks_per_stage.at(resnet_type)[i],
|
|
resnet_type,
|
|
stem_out_channels,
|
|
bottleneck_channels,
|
|
out_channels,
|
|
first_stride,
|
|
dilation);
|
|
stem_out_channels = out_channels;
|
|
bottleneck_channels *= 2;
|
|
out_channels *= 2;
|
|
}
|
|
return out;
|
|
}
|