yolo_standard_libray/tensorrtx-master/detr/backbone.hpp
2025-03-07 11:35:40 +08:00

327 lines
9.5 KiB
C++

#pragma once
#include <map>
#include "common.hpp"
enum RESNETTYPE {
R18 = 0,
R34,
R50,
R101,
R152
};
const std::map<RESNETTYPE, std::vector<int>> num_blocks_per_stage = {
{R18, {2, 2, 2, 2}},
{R34, {3, 4, 6, 3}},
{R50, {3, 4, 6, 3}},
{R101, {3, 4, 23, 3}},
{R152, {3, 8, 36, 3}}
};
IScaleLayer* addBatchNorm2d(
INetworkDefinition *network,
std::unordered_map<std::string, Weights>& weightMap,
ITensor& input,
const std::string& lname,
float eps = 1e-5
) {
float *gamma = (float*)(weightMap[lname + ".weight"].values);
float *beta = (float*)(weightMap[lname + ".bias"].values);
float *mean = (float*)(weightMap[lname + ".running_mean"].values);
float *var = (float*)(weightMap[lname + ".running_var"].values);
int len = weightMap[lname + ".running_var"].count;
float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
for (int i = 0; i < len; i++) {
scval[i] = gamma[i] / sqrt(var[i] + eps);
}
Weights scale{ DataType::kFLOAT, scval, len };
float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
for (int i = 0; i < len; i++) {
shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
}
Weights shift{ DataType::kFLOAT, shval, len };
float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
for (int i = 0; i < len; i++) {
pval[i] = 1.0;
}
Weights power{ DataType::kFLOAT, pval, len };
weightMap[lname + ".scale"] = scale;
weightMap[lname + ".shift"] = shift;
weightMap[lname + ".power"] = power;
IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
assert(scale_1);
return scale_1;
}
ILayer* BasicStem(
INetworkDefinition *network,
std::unordered_map<std::string, Weights>& weightMap,
const std::string& lname,
ITensor& input,
int out_channels,
int group_num = 1
) {
// conv1
Weights emptywts{ DataType::kFLOAT, nullptr, 0 };
IConvolutionLayer* conv1 = network->addConvolutionNd(
input,
out_channels,
DimsHW{ 7, 7 },
weightMap[lname + ".conv1.weight"],
emptywts);
assert(conv1);
conv1->setStrideNd(DimsHW{ 2, 2 });
conv1->setPaddingNd(DimsHW{ 3, 3 });
conv1->setNbGroups(group_num);
auto bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + ".bn1");
assert(bn1);
auto r1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
assert(r1);
auto max_pool2d = network->addPoolingNd(*r1->getOutput(0), PoolingType::kMAX, DimsHW{ 3, 3 });
max_pool2d->setStrideNd(DimsHW{ 2, 2 });
max_pool2d->setPaddingNd(DimsHW{ 1, 1 });
auto mp_dim = max_pool2d->getOutput(0)->getDimensions();
return max_pool2d;
}
ITensor* BasicBlock(
INetworkDefinition *network,
std::unordered_map<std::string, Weights>& weightMap,
const std::string& lname,
ITensor& input,
int in_channels,
int out_channels,
int stride = 1
) {
// conv1
IConvolutionLayer* conv1 = network->addConvolutionNd(
input,
out_channels,
DimsHW{ 3, 3 },
weightMap[lname + ".conv1.weight"],
weightMap[lname + ".conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{ stride, stride });
conv1->setPaddingNd(DimsHW{ 1, 1 });
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(r1);
// conv2
IConvolutionLayer* conv2 = network->addConvolutionNd(
*r1->getOutput(0),
out_channels, DimsHW{ 3, 3 },
weightMap[lname + ".conv2.weight"],
weightMap[lname + ".conv2.bias"]);
assert(conv2);
conv2->setStrideNd(DimsHW{ 1, 1 });
conv2->setPaddingNd(DimsHW{ 1, 1 });
// shortcut
ITensor* shortcut_value = nullptr;
if (in_channels != out_channels) {
auto shortcut = network->addConvolutionNd(
input,
out_channels,
DimsHW{ 1, 1 },
weightMap[lname + ".shortcut.weight"],
weightMap[lname + ".shortcut.bias"]);
assert(shortcut);
shortcut->setStrideNd(DimsHW{ stride, stride });
shortcut_value = shortcut->getOutput(0);
} else {
shortcut_value = &input;
}
// add
auto ew = network->addElementWise(*conv2->getOutput(0), *shortcut_value, ElementWiseOperation::kSUM);
assert(ew);
auto r3 = network->addActivation(*ew->getOutput(0), ActivationType::kRELU);
assert(r3);
return r3->getOutput(0);
}
ITensor* BottleneckBlock(
INetworkDefinition *network,
std::unordered_map<std::string, Weights>& weightMap,
const std::string& lname,
ITensor& input,
int in_channels,
int bottleneck_channels,
int out_channels,
int stride = 1,
int dilation = 1,
int group_num = 1
) {
Weights emptywts{ DataType::kFLOAT, nullptr, 0 };
// conv1
IConvolutionLayer* conv1 = network->addConvolutionNd(
input,
bottleneck_channels,
DimsHW{ 1, 1 },
weightMap[lname + ".conv1.weight"],
emptywts);
assert(conv1);
conv1->setStrideNd(DimsHW{ 1, 1 });
conv1->setNbGroups(group_num);
auto bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + ".bn1");
assert(bn1);
auto r1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
assert(r1);
// conv2
IConvolutionLayer* conv2 = network->addConvolutionNd(
*r1->getOutput(0),
bottleneck_channels,
DimsHW{ 3, 3 },
weightMap[lname + ".conv2.weight"],
emptywts);
assert(conv2);
conv2->setStrideNd(DimsHW{ stride, stride });
conv2->setPaddingNd(DimsHW{ 1 * dilation, 1 * dilation });
conv2->setDilationNd(DimsHW{ dilation, dilation });
conv2->setNbGroups(group_num);
auto bn2 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + ".bn2");
assert(bn2);
auto r2 = network->addActivation(*bn2->getOutput(0), ActivationType::kRELU);
assert(r2);
// conv3
IConvolutionLayer* conv3 = network->addConvolutionNd(
*r2->getOutput(0),
out_channels,
DimsHW{ 1, 1 },
weightMap[lname + ".conv3.weight"],
emptywts);
assert(conv3);
conv3->setStrideNd(DimsHW{ 1, 1 });
conv3->setNbGroups(group_num);
auto bn3 = addBatchNorm2d(network, weightMap, *conv3->getOutput(0), lname + ".bn3");
assert(bn3);
// shortcut
ITensor* shortcut_value = nullptr;
if (in_channels != out_channels) {
auto shortcut = network->addConvolutionNd(
input,
out_channels,
DimsHW{ 1, 1 },
weightMap[lname + ".downsample.0.weight"],
emptywts);
assert(shortcut);
shortcut->setStrideNd(DimsHW{stride, stride});
shortcut->setNbGroups(group_num);
auto shortcut_bn = addBatchNorm2d(network, weightMap, *shortcut->getOutput(0), lname + ".downsample.1");
assert(shortcut_bn);
shortcut_value = shortcut_bn->getOutput(0);
} else {
shortcut_value = &input;
}
// add
auto ew = network->addElementWise(*bn3->getOutput(0), *shortcut_value, ElementWiseOperation::kSUM);
assert(ew);
auto r3 = network->addActivation(*ew->getOutput(0), ActivationType::kRELU);
assert(r3);
return r3->getOutput(0);
}
ITensor* MakeStage(
INetworkDefinition *network,
std::unordered_map<std::string, Weights>& weightMap,
const std::string& lname,
ITensor& input,
int stage,
RESNETTYPE resnet_type,
int in_channels,
int bottleneck_channels,
int out_channels,
int first_stride = 1,
int dilation = 1
) {
ITensor* out = &input;
for (int i = 0; i < stage; i++) {
std::string layerName = lname + "." + std::to_string(i);
int stride = i == 0 ? first_stride : 1;
if (resnet_type == R18 || resnet_type == R34)
out = BasicBlock(network, weightMap, layerName, *out, in_channels, out_channels, stride);
else
out = BottleneckBlock(
network,
weightMap,
layerName,
*out,
in_channels,
bottleneck_channels,
out_channels,
stride,
dilation);
in_channels = out_channels;
}
return out;
}
ITensor* BuildResNet(
INetworkDefinition *network,
std::unordered_map<std::string, Weights>& weightMap,
ITensor& input,
RESNETTYPE resnet_type,
int stem_out_channels,
int bottleneck_channels,
int res2_out_channels,
int res5_dilation = 1
) {
assert(res5_dilation == 1 || res5_dilation == 2); // "res5_dilation must be 1 or 2"
if (resnet_type == R18 || resnet_type == R34) {
assert(res2_out_channels == 64); // "res2_out_channels must be 64 for R18/R34")
assert(res5_dilation == 1); // "res5_dilation must be 1 for R18/R34")
}
int out_channels = res2_out_channels;
ITensor* out = nullptr;
// stem
auto stem = BasicStem(network, weightMap, "backbone.0.body", input, stem_out_channels);
out = stem->getOutput(0);
// res
for (int i = 0; i < 4; i++) {
int dilation = (i == 3) ? res5_dilation : 1;
int first_stride = (i == 0 || (i == 3 && dilation == 2)) ? 1 : 2;
out = MakeStage(
network,
weightMap,
"backbone.0.body.layer" + std::to_string(i + 1),
*out,
num_blocks_per_stage.at(resnet_type)[i],
resnet_type,
stem_out_channels,
bottleneck_channels,
out_channels,
first_stride,
dilation);
stem_out_channels = out_channels;
bottleneck_channels *= 2;
out_channels *= 2;
}
return out;
}