yolo_standard_libray/tensorrtx-master/rcnn/backbone.hpp
2025-03-07 11:35:40 +08:00

230 lines
7.3 KiB
C++
Executable File

#pragma once
#include <vector>
#include <map>
#include <string>
#include "common.hpp"
/* when stride>1, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution.
set false when use backbone from torchvision*/
#define STRIDE_IN_1X1 true
enum RESNETTYPE {
R18 = 0,
R34,
R50,
R101,
R152
};
const std::map<RESNETTYPE, std::vector<int>> num_blocks_per_stage = {
{R18, {2, 2, 2, 2}},
{R34, {3, 4, 6, 3}},
{R50, {3, 4, 6, 3}},
{R101, {3, 4, 23, 3}},
{R152, {3, 8, 36, 3}}
};
ILayer* BasicStem(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
const std::string& lname, ITensor& input,
int out_channels,
int group_num = 1) {
// conv1
IConvolutionLayer* conv1 = network->addConvolutionNd(input, out_channels, DimsHW{ 7, 7 },
weightMap[lname + ".conv1.weight"],
weightMap[lname + ".conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{ 2, 2 });
conv1->setPaddingNd(DimsHW{ 3, 3 });
conv1->setNbGroups(group_num);
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(r1);
auto max_pool2d = network->addPoolingNd(*r1->getOutput(0), PoolingType::kMAX, DimsHW{ 3, 3 });
max_pool2d->setStrideNd(DimsHW{ 2, 2 });
max_pool2d->setPaddingNd(DimsHW{ 1, 1 });
// auto mp_dim = max_pool2d->getOutput(0)->getDimensions();
return max_pool2d;
}
ITensor* BasicBlock(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
const std::string& lname,
ITensor& input,
int in_channels,
int out_channels,
int stride = 1) {
// conv1
IConvolutionLayer* conv1 = network->addConvolutionNd(input, out_channels, DimsHW{ 3, 3 },
weightMap[lname + ".conv1.weight"],
weightMap[lname + ".conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{ stride, stride });
conv1->setPaddingNd(DimsHW{ 1, 1 });
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(r1);
// conv2
IConvolutionLayer* conv2 = network->addConvolutionNd(*r1->getOutput(0), out_channels, DimsHW{ 3, 3 },
weightMap[lname + ".conv2.weight"],
weightMap[lname + ".conv2.bias"]);
assert(conv2);
conv2->setStrideNd(DimsHW{ 1, 1 });
conv2->setPaddingNd(DimsHW{ 1, 1 });
// shortcut
ITensor* shortcut_value = nullptr;
if (in_channels != out_channels) {
auto shortcut = network->addConvolutionNd(input, out_channels, DimsHW{ 1, 1 },
weightMap[lname + ".shortcut.weight"],
weightMap[lname + ".shortcut.bias"]);
assert(shortcut);
shortcut->setStrideNd(DimsHW{ stride, stride });
shortcut_value = shortcut->getOutput(0);
} else {
shortcut_value = &input;
}
// add
auto ew = network->addElementWise(*conv2->getOutput(0), *shortcut_value, ElementWiseOperation::kSUM);
assert(ew);
auto r3 = network->addActivation(*ew->getOutput(0), ActivationType::kRELU);
assert(r3);
return r3->getOutput(0);
}
ITensor* BottleneckBlock(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
const std::string& lname,
ITensor& input,
int in_channels,
int bottleneck_channels,
int out_channels,
int stride = 1,
int dilation = 1,
int group_num = 1) {
int stride_1x1 = STRIDE_IN_1X1 ? stride : 1;
int stride_3x3 = STRIDE_IN_1X1 ? 1 : stride;
// conv1
IConvolutionLayer* conv1 = network->addConvolutionNd(input, bottleneck_channels, DimsHW{ 1, 1 },
weightMap[lname + ".conv1.weight"],
weightMap[lname + ".conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{ stride_1x1, stride_1x1 });
conv1->setNbGroups(group_num);
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(r1);
// conv2
IConvolutionLayer* conv2 = network->addConvolutionNd(*r1->getOutput(0), bottleneck_channels, DimsHW{ 3, 3 },
weightMap[lname + ".conv2.weight"],
weightMap[lname + ".conv2.bias"]);
assert(conv2);
conv2->setStrideNd(DimsHW{ stride_3x3, stride_3x3 });
conv2->setPaddingNd(DimsHW{ 1 * dilation, 1 * dilation });
conv2->setDilationNd(DimsHW{ dilation, dilation });
conv2->setNbGroups(group_num);
auto r2 = network->addActivation(*conv2->getOutput(0), ActivationType::kRELU);
assert(r2);
// conv3
IConvolutionLayer* conv3 = network->addConvolutionNd(*r2->getOutput(0), out_channels, DimsHW{ 1, 1 },
weightMap[lname + ".conv3.weight"],
weightMap[lname + ".conv3.bias"]);
assert(conv3);
conv3->setStrideNd(DimsHW{ 1, 1 });
conv3->setNbGroups(group_num);
// shortcut
ITensor* shortcut_value = nullptr;
if (in_channels != out_channels) {
auto shortcut = network->addConvolutionNd(input, out_channels, DimsHW{ 1, 1 },
weightMap[lname + ".shortcut.weight"],
weightMap[lname + ".shortcut.bias"]);
assert(shortcut);
shortcut->setStrideNd(DimsHW{stride, stride});
shortcut->setNbGroups(group_num);
shortcut_value = shortcut->getOutput(0);
} else {
shortcut_value = &input;
}
// add
auto ew = network->addElementWise(*conv3->getOutput(0), *shortcut_value, ElementWiseOperation::kSUM);
assert(ew);
auto r3 = network->addActivation(*ew->getOutput(0), ActivationType::kRELU);
assert(r3);
return r3->getOutput(0);
}
ITensor* MakeStage(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
const std::string& lname,
ITensor& input,
int stage,
RESNETTYPE resnet_type,
int in_channels,
int bottleneck_channels,
int out_channels,
int first_stride = 1,
int dilation = 1) {
ITensor* out = &input;
for (int i = 0; i < stage; i++) {
std::string layerName = lname + "." + std::to_string(i);
int stride = i == 0 ? first_stride : 1;
if (resnet_type == R18 || resnet_type == R34)
out = BasicBlock(network, weightMap, layerName, *out, in_channels, out_channels, stride);
else
out = BottleneckBlock(network, weightMap, layerName, *out,
in_channels, bottleneck_channels, out_channels, stride, dilation);
in_channels = out_channels;
}
return out;
}
ITensor* BuildResNet(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input,
RESNETTYPE resnet_type,
int stem_out_channels,
int bottleneck_channels,
int res2_out_channels,
int res5_dilation = 1) {
assert(res5_dilation == 1 || res5_dilation == 2); // "res5_dilation must be 1 or 2"
if (resnet_type == R18 || resnet_type == R34) {
assert(res2_out_channels == 64); // "res2_out_channels must be 64 for R18/R34"
assert(res5_dilation == 1); // "res5_dilation must be 1 for R18/R34"
}
int out_channels = res2_out_channels;
ITensor* out = nullptr;
// stem
auto stem = BasicStem(network, weightMap, "backbone.stem", input, stem_out_channels);
out = stem->getOutput(0);
// res
for (int i = 0; i < 3; i++) {
int dilation = (i == 3) ? res5_dilation : 1;
int first_stride = (i == 0 || (i == 3 && dilation == 2)) ? 1 : 2;
out = MakeStage(network, weightMap,
"backbone.res" + std::to_string(i + 2), *out,
num_blocks_per_stage.at(resnet_type)[i], resnet_type,
stem_out_channels, bottleneck_channels, out_channels,
first_stride, dilation);
stem_out_channels = out_channels;
bottleneck_channels *= 2;
out_channels *= 2;
}
return out;
}