230 lines
7.3 KiB
C++
Executable File
230 lines
7.3 KiB
C++
Executable File
#pragma once
|
|
#include <vector>
|
|
#include <map>
|
|
#include <string>
|
|
#include "common.hpp"
|
|
|
|
/* when stride>1, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution.
|
|
set false when use backbone from torchvision*/
|
|
#define STRIDE_IN_1X1 true
|
|
|
|
enum RESNETTYPE {
|
|
R18 = 0,
|
|
R34,
|
|
R50,
|
|
R101,
|
|
R152
|
|
};
|
|
|
|
const std::map<RESNETTYPE, std::vector<int>> num_blocks_per_stage = {
|
|
{R18, {2, 2, 2, 2}},
|
|
{R34, {3, 4, 6, 3}},
|
|
{R50, {3, 4, 6, 3}},
|
|
{R101, {3, 4, 23, 3}},
|
|
{R152, {3, 8, 36, 3}}
|
|
};
|
|
|
|
ILayer* BasicStem(INetworkDefinition *network,
|
|
std::map<std::string, Weights>& weightMap,
|
|
const std::string& lname, ITensor& input,
|
|
int out_channels,
|
|
int group_num = 1) {
|
|
// conv1
|
|
IConvolutionLayer* conv1 = network->addConvolutionNd(input, out_channels, DimsHW{ 7, 7 },
|
|
weightMap[lname + ".conv1.weight"],
|
|
weightMap[lname + ".conv1.bias"]);
|
|
assert(conv1);
|
|
conv1->setStrideNd(DimsHW{ 2, 2 });
|
|
conv1->setPaddingNd(DimsHW{ 3, 3 });
|
|
conv1->setNbGroups(group_num);
|
|
|
|
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
|
|
assert(r1);
|
|
|
|
auto max_pool2d = network->addPoolingNd(*r1->getOutput(0), PoolingType::kMAX, DimsHW{ 3, 3 });
|
|
max_pool2d->setStrideNd(DimsHW{ 2, 2 });
|
|
max_pool2d->setPaddingNd(DimsHW{ 1, 1 });
|
|
// auto mp_dim = max_pool2d->getOutput(0)->getDimensions();
|
|
return max_pool2d;
|
|
}
|
|
|
|
ITensor* BasicBlock(INetworkDefinition *network,
|
|
std::map<std::string, Weights>& weightMap,
|
|
const std::string& lname,
|
|
ITensor& input,
|
|
int in_channels,
|
|
int out_channels,
|
|
int stride = 1) {
|
|
// conv1
|
|
IConvolutionLayer* conv1 = network->addConvolutionNd(input, out_channels, DimsHW{ 3, 3 },
|
|
weightMap[lname + ".conv1.weight"],
|
|
weightMap[lname + ".conv1.bias"]);
|
|
assert(conv1);
|
|
conv1->setStrideNd(DimsHW{ stride, stride });
|
|
conv1->setPaddingNd(DimsHW{ 1, 1 });
|
|
|
|
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
|
|
assert(r1);
|
|
|
|
// conv2
|
|
IConvolutionLayer* conv2 = network->addConvolutionNd(*r1->getOutput(0), out_channels, DimsHW{ 3, 3 },
|
|
weightMap[lname + ".conv2.weight"],
|
|
weightMap[lname + ".conv2.bias"]);
|
|
assert(conv2);
|
|
conv2->setStrideNd(DimsHW{ 1, 1 });
|
|
conv2->setPaddingNd(DimsHW{ 1, 1 });
|
|
|
|
// shortcut
|
|
ITensor* shortcut_value = nullptr;
|
|
if (in_channels != out_channels) {
|
|
auto shortcut = network->addConvolutionNd(input, out_channels, DimsHW{ 1, 1 },
|
|
weightMap[lname + ".shortcut.weight"],
|
|
weightMap[lname + ".shortcut.bias"]);
|
|
assert(shortcut);
|
|
shortcut->setStrideNd(DimsHW{ stride, stride });
|
|
shortcut_value = shortcut->getOutput(0);
|
|
} else {
|
|
shortcut_value = &input;
|
|
}
|
|
|
|
// add
|
|
auto ew = network->addElementWise(*conv2->getOutput(0), *shortcut_value, ElementWiseOperation::kSUM);
|
|
assert(ew);
|
|
|
|
auto r3 = network->addActivation(*ew->getOutput(0), ActivationType::kRELU);
|
|
assert(r3);
|
|
|
|
return r3->getOutput(0);
|
|
}
|
|
|
|
ITensor* BottleneckBlock(INetworkDefinition *network,
|
|
std::map<std::string, Weights>& weightMap,
|
|
const std::string& lname,
|
|
ITensor& input,
|
|
int in_channels,
|
|
int bottleneck_channels,
|
|
int out_channels,
|
|
int stride = 1,
|
|
int dilation = 1,
|
|
int group_num = 1) {
|
|
int stride_1x1 = STRIDE_IN_1X1 ? stride : 1;
|
|
int stride_3x3 = STRIDE_IN_1X1 ? 1 : stride;
|
|
// conv1
|
|
IConvolutionLayer* conv1 = network->addConvolutionNd(input, bottleneck_channels, DimsHW{ 1, 1 },
|
|
weightMap[lname + ".conv1.weight"],
|
|
weightMap[lname + ".conv1.bias"]);
|
|
assert(conv1);
|
|
conv1->setStrideNd(DimsHW{ stride_1x1, stride_1x1 });
|
|
conv1->setNbGroups(group_num);
|
|
|
|
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
|
|
assert(r1);
|
|
|
|
// conv2
|
|
IConvolutionLayer* conv2 = network->addConvolutionNd(*r1->getOutput(0), bottleneck_channels, DimsHW{ 3, 3 },
|
|
weightMap[lname + ".conv2.weight"],
|
|
weightMap[lname + ".conv2.bias"]);
|
|
assert(conv2);
|
|
conv2->setStrideNd(DimsHW{ stride_3x3, stride_3x3 });
|
|
conv2->setPaddingNd(DimsHW{ 1 * dilation, 1 * dilation });
|
|
conv2->setDilationNd(DimsHW{ dilation, dilation });
|
|
conv2->setNbGroups(group_num);
|
|
|
|
auto r2 = network->addActivation(*conv2->getOutput(0), ActivationType::kRELU);
|
|
assert(r2);
|
|
|
|
// conv3
|
|
IConvolutionLayer* conv3 = network->addConvolutionNd(*r2->getOutput(0), out_channels, DimsHW{ 1, 1 },
|
|
weightMap[lname + ".conv3.weight"],
|
|
weightMap[lname + ".conv3.bias"]);
|
|
assert(conv3);
|
|
conv3->setStrideNd(DimsHW{ 1, 1 });
|
|
conv3->setNbGroups(group_num);
|
|
|
|
// shortcut
|
|
ITensor* shortcut_value = nullptr;
|
|
if (in_channels != out_channels) {
|
|
auto shortcut = network->addConvolutionNd(input, out_channels, DimsHW{ 1, 1 },
|
|
weightMap[lname + ".shortcut.weight"],
|
|
weightMap[lname + ".shortcut.bias"]);
|
|
assert(shortcut);
|
|
shortcut->setStrideNd(DimsHW{stride, stride});
|
|
shortcut->setNbGroups(group_num);
|
|
shortcut_value = shortcut->getOutput(0);
|
|
} else {
|
|
shortcut_value = &input;
|
|
}
|
|
|
|
// add
|
|
auto ew = network->addElementWise(*conv3->getOutput(0), *shortcut_value, ElementWiseOperation::kSUM);
|
|
assert(ew);
|
|
|
|
auto r3 = network->addActivation(*ew->getOutput(0), ActivationType::kRELU);
|
|
assert(r3);
|
|
|
|
return r3->getOutput(0);
|
|
}
|
|
|
|
ITensor* MakeStage(INetworkDefinition *network,
|
|
std::map<std::string, Weights>& weightMap,
|
|
const std::string& lname,
|
|
ITensor& input,
|
|
int stage,
|
|
RESNETTYPE resnet_type,
|
|
int in_channels,
|
|
int bottleneck_channels,
|
|
int out_channels,
|
|
int first_stride = 1,
|
|
int dilation = 1) {
|
|
ITensor* out = &input;
|
|
for (int i = 0; i < stage; i++) {
|
|
std::string layerName = lname + "." + std::to_string(i);
|
|
int stride = i == 0 ? first_stride : 1;
|
|
|
|
if (resnet_type == R18 || resnet_type == R34)
|
|
out = BasicBlock(network, weightMap, layerName, *out, in_channels, out_channels, stride);
|
|
else
|
|
out = BottleneckBlock(network, weightMap, layerName, *out,
|
|
in_channels, bottleneck_channels, out_channels, stride, dilation);
|
|
|
|
in_channels = out_channels;
|
|
}
|
|
return out;
|
|
}
|
|
|
|
ITensor* BuildResNet(INetworkDefinition *network,
|
|
std::map<std::string, Weights>& weightMap,
|
|
ITensor& input,
|
|
RESNETTYPE resnet_type,
|
|
int stem_out_channels,
|
|
int bottleneck_channels,
|
|
int res2_out_channels,
|
|
int res5_dilation = 1) {
|
|
assert(res5_dilation == 1 || res5_dilation == 2); // "res5_dilation must be 1 or 2"
|
|
if (resnet_type == R18 || resnet_type == R34) {
|
|
assert(res2_out_channels == 64); // "res2_out_channels must be 64 for R18/R34"
|
|
assert(res5_dilation == 1); // "res5_dilation must be 1 for R18/R34"
|
|
}
|
|
|
|
int out_channels = res2_out_channels;
|
|
ITensor* out = nullptr;
|
|
// stem
|
|
auto stem = BasicStem(network, weightMap, "backbone.stem", input, stem_out_channels);
|
|
out = stem->getOutput(0);
|
|
|
|
// res
|
|
for (int i = 0; i < 3; i++) {
|
|
int dilation = (i == 3) ? res5_dilation : 1;
|
|
int first_stride = (i == 0 || (i == 3 && dilation == 2)) ? 1 : 2;
|
|
out = MakeStage(network, weightMap,
|
|
"backbone.res" + std::to_string(i + 2), *out,
|
|
num_blocks_per_stage.at(resnet_type)[i], resnet_type,
|
|
stem_out_channels, bottleneck_channels, out_channels,
|
|
first_stride, dilation);
|
|
stem_out_channels = out_channels;
|
|
bottleneck_channels *= 2;
|
|
out_channels *= 2;
|
|
}
|
|
return out;
|
|
}
|