重新转换了模型,解决了RKNN输出配置错误
This commit is contained in:
parent
46fcb71a70
commit
806cff694f
18
Readme.md
18
Readme.md
@ -1150,3 +1150,21 @@ rsync -avz build-cross/ user@board_ip:/opt/media-server/
|
||||
- 中控只维护抽象图,实际在多台 RK3588 设备上分布执行。
|
||||
|
||||
---
|
||||
|
||||
---
|
||||
|
||||
## 附录:问题修复与调试文档
|
||||
|
||||
### 已知问题修复
|
||||
|
||||
| 问题 | 文档 | 日期 |
|
||||
|------|------|------|
|
||||
| YOLOv8 FP16 数据解析错误 | [docs/bugfix_yolov8_fp16.md](docs/bugfix_yolov8_fp16.md) | 2025-02 |
|
||||
| RKNN 输出格式配置错误 | [docs/fix_rknn_output_format.md](docs/fix_rknn_output_format.md) | 2026-02 |
|
||||
|
||||
### 调试与开发文档
|
||||
|
||||
- [docs/models.md](docs/models.md) - 模型转换与部署指南
|
||||
- [docs/Agent_API_Extensions.md](docs/Agent_API_Extensions.md) - Agent API 扩展说明
|
||||
- [docs/API_Device_RemoteMgmt_InterfaceTable.md](docs/API_Device_RemoteMgmt_InterfaceTable.md) - 远程管理接口
|
||||
|
||||
|
||||
Binary file not shown.
@ -46,9 +46,13 @@
|
||||
"model_w": 768,
|
||||
"model_h": 768,
|
||||
"num_classes": 11,
|
||||
"conf": 0.35,
|
||||
"conf": 0.2,
|
||||
"nms": 0.45,
|
||||
"class_filter": [3, 6]
|
||||
"debug": {
|
||||
"stats": true,
|
||||
"stats_interval": 30,
|
||||
"detections": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "face_det_cam1",
|
||||
|
||||
@ -14,150 +14,236 @@
|
||||
- 坐标值为超大异常数字
|
||||
- 检测分数为 `score=5561747627709562880.000000`(溢出值)
|
||||
|
||||
## 根因分析
|
||||
---
|
||||
|
||||
### 1. 数据类型不匹配
|
||||
## 修复历史
|
||||
|
||||
RKNN 模型的输出数据类型与代码解析方式不匹配:
|
||||
### 修复 #1: FP16 到 FP32 转换 (2025-02-26)
|
||||
|
||||
| 模型 | RKNN 输出类型 | 代码原处理方式 | 结果 |
|
||||
|------|--------------|---------------|------|
|
||||
| YOLOv5 | INT8 (量化) | `int8_t*` + 反量化 | ✅ 正常 |
|
||||
| YOLOv8 | FP16 (半精度) | `reinterpret_cast<float*>` | ❌ 错误 |
|
||||
**问题**: FP16 数据被错误地当作 FP32 解析
|
||||
|
||||
**问题代码:**
|
||||
```cpp
|
||||
// ai_yolo_node.cpp 第 591-592 行(修复前)
|
||||
if (outputs[0].type == RKNN_TENSOR_FLOAT32 ||
|
||||
outputs[0].type == RKNN_TENSOR_FLOAT16) {
|
||||
// 两者都按 float32 解析,导致 FP16 数据被错误解析
|
||||
valid_count = ProcessOutputV8(reinterpret_cast<float*>(...), ...);
|
||||
}
|
||||
**解决**: 添加 `Fp16ToFp32` 转换函数,单独处理 FP16 分支
|
||||
|
||||
**文件**: `plugins/ai_yolo/ai_yolo_node.cpp`
|
||||
|
||||
### 修复 #2: RKNN 输出格式配置 (2025-02-28)
|
||||
|
||||
**问题**: `ai_scheduler` 中 RKNN 输出配置错误导致数据类型混乱
|
||||
|
||||
**详细说明见下方**
|
||||
|
||||
---
|
||||
|
||||
## 修复 #2 详细说明
|
||||
|
||||
### 问题现象
|
||||
|
||||
使用新转换的模型 `best-rk3588.rknn` 时出现:
|
||||
|
||||
```
|
||||
[ai_yolo] raw box i=0 cx=inf cy=inf w=inf h=inf score=0.998047 cls=0
|
||||
[ai_yolo] det: cls=0 score=0.998047 bbox=(0.000000,0.000000,768.000000,768.000000)
|
||||
```
|
||||
|
||||
### 2. 为什么 FP16 不能直接当 FP32 解析
|
||||
- 所有检测分数相同(0.998047)
|
||||
- 坐标包含 `inf` 或全图
|
||||
- 或完全没有检测(`valid_count=0`)
|
||||
|
||||
- **FP16** (半精度浮点): 16位 = 1位符号 + 5位指数 + 10位尾数
|
||||
- **FP32** (单精度浮点): 32位 = 1位符号 + 8位指数 + 23位尾数
|
||||
### 根本原因
|
||||
|
||||
直接内存解释为 FP32 时,两个 FP16 数值会被错误地合并成一个 FP32,导致数据完全错乱。
|
||||
`src/ai_scheduler.cpp` 中存在三个配置错误:
|
||||
|
||||
### 3. 为什么 YOLOv5 正常
|
||||
|
||||
YOLOv5 RKNN 模型默认使用 INT8 量化,代码本来就有反量化逻辑:
|
||||
#### 1. 使用 INT8 原始输出
|
||||
```cpp
|
||||
DequantizeAffineToF32(int8_t qnt, int32_t zp, float scale)
|
||||
// 错误代码 (line 620)
|
||||
outputs[i].want_float = 0; // 获取 INT8 量化数据
|
||||
```
|
||||
|
||||
而 YOLOv8 RKNN 模型默认使用 FP16,代码缺乏 FP16→FP32 转换。
|
||||
|
||||
## 解决方法
|
||||
|
||||
### 1. 添加 FP16 到 FP32 转换函数
|
||||
|
||||
#### 2. 硬编码输出类型
|
||||
```cpp
|
||||
// ai_yolo_node.cpp
|
||||
// FP16 (half) to FP32 conversion
|
||||
// IEEE 754 half-precision: 1 sign bit, 5 exponent bits, 10 mantissa bits
|
||||
inline float Fp16ToFp32(uint16_t h) {
|
||||
uint32_t sign = (h >> 15) & 0x1;
|
||||
uint32_t exp = (h >> 10) & 0x1F;
|
||||
uint32_t mant = h & 0x3FF;
|
||||
|
||||
uint32_t f;
|
||||
if (exp == 0) {
|
||||
// Zero or subnormal
|
||||
if (mant == 0) {
|
||||
f = (sign << 31); // Signed zero
|
||||
} else {
|
||||
// Subnormal: convert to normal
|
||||
exp = 1;
|
||||
while ((mant & 0x400) == 0) {
|
||||
mant <<= 1;
|
||||
exp--;
|
||||
}
|
||||
mant &= 0x3FF;
|
||||
f = (sign << 31) | ((exp + 112) << 23) | (mant << 13);
|
||||
}
|
||||
} else if (exp == 0x1F) {
|
||||
// Infinity or NaN
|
||||
f = (sign << 31) | (0xFF << 23) | (mant << 13);
|
||||
} else {
|
||||
// Normal number
|
||||
f = (sign << 31) | ((exp + 112) << 23) | (mant << 13);
|
||||
}
|
||||
|
||||
float result;
|
||||
memcpy(&result, &f, sizeof(float));
|
||||
return result;
|
||||
}
|
||||
// 错误代码 (line 647)
|
||||
out.type = RKNN_TENSOR_FLOAT16; // 硬编码,与实际不符
|
||||
```
|
||||
|
||||
### 2. 单独处理 FP16 分支
|
||||
|
||||
#### 3. 缓冲区大小不足
|
||||
```cpp
|
||||
// ai_yolo_node.cpp PostProcessBorrowed() 函数
|
||||
if (outputs[0].type == RKNN_TENSOR_FLOAT32) {
|
||||
// FP32 直接解析
|
||||
valid_count = ProcessOutputV8(reinterpret_cast<float*>(...), ...);
|
||||
} else if (outputs[0].type == RKNN_TENSOR_FLOAT16) {
|
||||
// FP16 先转换到 FP32 缓冲区
|
||||
size_t num_elements = outputs[0].size / sizeof(uint16_t);
|
||||
fp32_buffer_.resize(num_elements);
|
||||
const uint16_t* fp16_data = reinterpret_cast<const uint16_t*>(outputs[0].data);
|
||||
for (size_t i = 0; i < num_elements; ++i) {
|
||||
fp32_buffer_[i] = Fp16ToFp32(fp16_data[i]);
|
||||
}
|
||||
valid_count = ProcessOutputV8(fp32_buffer_.data(), ...);
|
||||
} else {
|
||||
// INT8 反量化
|
||||
valid_count = ProcessOutputV8Int8(reinterpret_cast<int8_t*>(...), ...);
|
||||
}
|
||||
// 错误代码 (line 220)
|
||||
uint32_t out_sz = ctx->output_attrs[j].n_elems * sizeof(uint16_t); // 仅 2字节/元素
|
||||
```
|
||||
|
||||
### 3. 添加 FP32 缓冲区成员变量
|
||||
### 为什么会导致 0.998047
|
||||
|
||||
- `0.998047` ≈ FP16 值 `0x3BFF` 转换为 FP32 的结果
|
||||
- 当 INT8 数据被错误解析为 FP16 时,数值被错误解释
|
||||
- 由于所有 class scores 被相同方式解析,导致分数统一
|
||||
|
||||
### 解决方法
|
||||
|
||||
#### 修改 1: 启用 FP32 输出
|
||||
```cpp
|
||||
// src/ai_scheduler.cpp line 620
|
||||
outputs[i].want_float = 1; // RKNN 自动完成反量化,返回 FP32
|
||||
```
|
||||
|
||||
#### 修改 2: 正确标记输出类型
|
||||
```cpp
|
||||
// src/ai_scheduler.cpp line 647
|
||||
out.type = RKNN_TENSOR_FLOAT32; // 与 want_float=1 对应
|
||||
```
|
||||
|
||||
#### 修改 3: 分配正确大小的缓冲区
|
||||
```cpp
|
||||
// src/ai_scheduler.cpp line 220
|
||||
uint32_t out_sz = ctx->output_attrs[j].n_elems * sizeof(float); // 4字节/元素
|
||||
```
|
||||
|
||||
### 修复后的效果
|
||||
|
||||
```
|
||||
[ALARM][info] detections=[{cls=6 score=0.53 bbox=(129,344,427,407)}]
|
||||
[ALARM][info] detections=[{cls=6 score=0.43 bbox=(73,316,382,452)}]
|
||||
[ALARM][info] detections=[{cls=6 score=0.52 bbox=(108,315,299,453)}]
|
||||
```
|
||||
|
||||
| 指标 | 修复前 | 修复后 |
|
||||
|------|--------|--------|
|
||||
| 分数 | 固定 0.998047 | 0.43 ~ 0.53 变化 |
|
||||
| 坐标 | inf 或全图 | 合理的 bbox |
|
||||
| 检测 | 异常 | 稳定有效 |
|
||||
|
||||
---
|
||||
|
||||
## RKNN 输出类型参考
|
||||
|
||||
| `want_float` | 返回值类型 | 说明 |
|
||||
|-------------|-----------|------|
|
||||
| 0 | INT8 (原始量化值) | 需要手动反量化: `(val - zp) * scale` |
|
||||
| 1 | FP32 | RKNN 自动反量化,可直接使用 |
|
||||
|
||||
**注意**: `want_float=1` 时,无论模型内部是 INT8/FP16/FP32,RKNN 都返回 FP32。
|
||||
|
||||
---
|
||||
|
||||
## 完整修复代码 (src/ai_scheduler.cpp)
|
||||
|
||||
```cpp
|
||||
class AiYoloNode : public INode {
|
||||
// 1. 请求 FP32 输出 (line 620)
|
||||
for (uint32_t i = 0; i < ctx->n_output; ++i) {
|
||||
outputs[i].want_float = 1; // 修改: 原来是 0
|
||||
outputs[i].index = i;
|
||||
// ...
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
ModelHandle model_handle_ = kInvalidModelHandle;
|
||||
uint32_t n_output_ = 0;
|
||||
std::vector<uint8_t> rgb_tmp_;
|
||||
std::vector<float> fp32_buffer_; // For FP16 to FP32 conversion
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
// 2. 标记正确的输出类型 (line 647)
|
||||
for (uint32_t i = 0; i < ctx->n_output; ++i) {
|
||||
auto& out = result.outputs[i];
|
||||
out.index = static_cast<int>(i);
|
||||
out.size = outputs[i].size;
|
||||
out.data = reinterpret_cast<const uint8_t*>(outputs[i].buf);
|
||||
out.type = RKNN_TENSOR_FLOAT32; // 修改: 原来是 FLOAT16
|
||||
out.zp = ctx->output_attrs[i].zp;
|
||||
out.scale = ctx->output_attrs[i].scale;
|
||||
// ...
|
||||
}
|
||||
|
||||
// 3. 分配 FP32 缓冲区 (line 220)
|
||||
for (uint32_t j = 0; j < ctx->n_output; ++j) {
|
||||
// FP32 output: 4 bytes per element
|
||||
uint32_t out_sz = ctx->output_attrs[j].n_elems * sizeof(float); // 修改: 原来是 uint16_t
|
||||
if (out_sz > 0) {
|
||||
ctx->output_buffers[j].resize(out_sz);
|
||||
} else {
|
||||
ctx->output_buffers[j].clear();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 修复后的验证
|
||||
---
|
||||
|
||||
**正常日志:**
|
||||
```
|
||||
[ai_yolo] First box: x=4.632812, y=6.921875, w=11.078125, h=14.296875
|
||||
[ai_yolo] ProcessOutputV8 result: valid_count=23 out of 8400 boxes
|
||||
[tracker] id=trk_cam1 tracks=1 created=1 removed=0 matched=153 unmatch_det=1
|
||||
## 模型转换建议
|
||||
|
||||
### PC 端导出 ONNX
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO('best.pt')
|
||||
model.export(
|
||||
format='onnx',
|
||||
imgsz=768,
|
||||
opset=12,
|
||||
simplify=True,
|
||||
dynamic=False,
|
||||
)
|
||||
```
|
||||
|
||||
- 坐标值在正常范围 (0-640)
|
||||
- 检测数量合理 (23/8400)
|
||||
- 跟踪器正常工作 (tracks=1)
|
||||
- 检测分数正常 (0.67)
|
||||
### PC 端转换为 RKNN
|
||||
```python
|
||||
from rknn.api import RKNN
|
||||
|
||||
rknn = RKNN()
|
||||
rknn.config(
|
||||
target_platform='rk3588',
|
||||
mean_values=[[0.0, 0.0, 0.0]], # 归一化到 [0,1]
|
||||
std_values=[[255.0, 255.0, 255.0]],
|
||||
)
|
||||
|
||||
rknn.load_onnx(
|
||||
model='best.onnx',
|
||||
input_size_list=[[1, 3, 768, 768]] # NCHW
|
||||
)
|
||||
|
||||
rknn.build(do_quantization=False) # FP16 测试
|
||||
rknn.export_rknn('best-rk3588.rknn')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 调试技巧
|
||||
|
||||
### 验证模型输出
|
||||
```python
|
||||
from rknnlite.api import RKNNLite
|
||||
import numpy as np
|
||||
|
||||
rknn = RKNNLite()
|
||||
rknn.load_rknn('best-rk3588.rknn')
|
||||
rknn.init_runtime()
|
||||
|
||||
input_data = np.ones((1, 3, 768, 768), dtype=np.float32) * 128
|
||||
outputs = rknn.inference(inputs=[input_data])
|
||||
|
||||
print(f"Shape: {outputs[0].shape}") # 应为 (1, 15, 12096)
|
||||
print(f"Min/Max: {outputs[0].min():.4f} / {outputs[0].max():.4f}") # 应无 inf
|
||||
print(f"Values: {outputs[0].flatten()[:10]}") # 应无 0.998047 重复
|
||||
```
|
||||
|
||||
### 检查数据类型
|
||||
```cpp
|
||||
// 在 ai_yolo_node.cpp 中添加
|
||||
LogInfo("[ai_yolo] output type=" + std::to_string(outputs[0].type) +
|
||||
" size=" + std::to_string(outputs[0].size));
|
||||
// 期望: type=0 (FLOAT32), size=725760 (对于 15*12096 FP32)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 相关文件
|
||||
|
||||
- `plugins/ai_yolo/ai_yolo_node.cpp`
|
||||
| 文件 | 说明 |
|
||||
|------|------|
|
||||
| `src/ai_scheduler.cpp` | RKNN 推理核心,本次修复的主要文件 |
|
||||
| `plugins/ai_yolo/ai_yolo_node.cpp` | YOLO 后处理,包含 FP16→FP32 转换 |
|
||||
| `include/ai_scheduler.h` | `InferOutput` 结构定义 |
|
||||
|
||||
## 影响范围
|
||||
|
||||
- ✅ YOLOv5 (INT8): 不受影响,继续正常工作
|
||||
- ✅ YOLOv8 (FP16): 修复后正常工作
|
||||
- ✅ YOLOv8 (FP32): 不受影响
|
||||
- ✅ YOLOv8 (INT8): 不受影响
|
||||
---
|
||||
|
||||
## 参考
|
||||
|
||||
- RKNN API 数据类型定义: `rknn_tensor_type` in `rknn_api.h`
|
||||
- `RKNN_TENSOR_FLOAT32 = 0`
|
||||
- `RKNN_TENSOR_FLOAT16 = 1`
|
||||
- `RKNN_TENSOR_INT8 = 2`
|
||||
- IEEE 754 半精度浮点标准
|
||||
- [RKNN Toolkit2 文档](https://github.com/rockchip-linux/rknn-toolkit2)
|
||||
- [RK3588 NPU 快速入门](https://wiki.t-firefly.com/en/ROC-RK3588-PC/rockchip_npu.html)
|
||||
- `rknn_api.h` 中的数据类型定义
|
||||
|
||||
---
|
||||
|
||||
**修复日期**: 2025-02-28
|
||||
**文档更新**: 2026-02-28
|
||||
|
||||
181
docs/fix_rknn_output_format.md
Normal file
181
docs/fix_rknn_output_format.md
Normal file
@ -0,0 +1,181 @@
|
||||
# RK3588 YOLOv8 模型输出格式修复记录
|
||||
|
||||
## 问题现象
|
||||
|
||||
使用新转换的 RKNN 模型 (`best-rk3588.rknn`) 进行推理时,出现以下异常现象:
|
||||
|
||||
1. **所有检测框的置信度分数相同** - 均为 `0.998047`
|
||||
2. **框坐标异常** - 大量框为全图 `(0,0,768,768)` 或包含 `inf` 值
|
||||
3. **部分帧无检测** - `valid_count=0`,`global_max_score=0.000580`
|
||||
|
||||
### 错误日志示例
|
||||
```
|
||||
[ai_yolo] raw box i=0 cx=inf cy=inf w=inf h=inf score=0.998047 cls=0
|
||||
[ai_yolo] raw box i=1 cx=inf cy=inf w=inf h=inf score=0.998047 cls=0
|
||||
[ai_yolo] det: cls=0 score=0.998047 bbox=(0.000000,0.000000,768.000000,768.000000)
|
||||
```
|
||||
|
||||
## 根本原因
|
||||
|
||||
### 1. RKNN 输出配置错误 (`src/ai_scheduler.cpp`)
|
||||
|
||||
原代码存在三个关键问题:
|
||||
|
||||
#### 问题 A: 使用 INT8 原始输出
|
||||
```cpp
|
||||
// 错误代码
|
||||
outputs[i].want_float = 0; // 获取 INT8 量化后的原始数据
|
||||
```
|
||||
|
||||
当 `want_float=0` 时,RKNN 返回 INT8 量化数据,但代码错误地将其当作 FP16 解析。
|
||||
|
||||
#### 问题 B: 硬编码输出类型
|
||||
```cpp
|
||||
// 错误代码
|
||||
out.type = RKNN_TENSOR_FLOAT16; // 硬编码为 FP16,与实际数据类型不符
|
||||
```
|
||||
|
||||
实际获取的是 INT8 数据(`type=2`),但标记为 FP16(`type=1`),导致后续解析逻辑错误。
|
||||
|
||||
#### 问题 C: 缓冲区大小不足
|
||||
```cpp
|
||||
// 错误代码
|
||||
uint32_t out_sz = ctx->output_attrs[j].n_elems * sizeof(uint16_t); // 只分配 2字节/元素
|
||||
```
|
||||
|
||||
当 `want_float=1` 时,RKNN 返回 FP32(4字节/元素),但缓冲区只分配了 FP16 大小(2字节/元素),导致数据截断。
|
||||
|
||||
### 2. 数值解释
|
||||
|
||||
`0.998047` 是 FP16 值 `0x3BFF` 转换为 FP32 后的结果:
|
||||
- FP16: `0x3BFF` = 0.99951171875
|
||||
- 由于所有 class scores 被错误解析为相同的 FP16 值,导致所有检测分数相同
|
||||
|
||||
## 解决方法
|
||||
|
||||
### 修复 1: 启用 FP32 输出 (ai_scheduler.cpp:620)
|
||||
|
||||
```cpp
|
||||
// 修改前
|
||||
outputs[i].want_float = 0;
|
||||
|
||||
// 修改后
|
||||
outputs[i].want_float = 1; // 请求 FP32 输出,RKNN 自动完成反量化
|
||||
```
|
||||
|
||||
### 修复 2: 正确标记输出类型 (ai_scheduler.cpp:647)
|
||||
|
||||
```cpp
|
||||
// 修改前
|
||||
out.type = RKNN_TENSOR_FLOAT16;
|
||||
|
||||
// 修改后
|
||||
out.type = RKNN_TENSOR_FLOAT32; // 与 want_float=1 对应
|
||||
```
|
||||
|
||||
### 修复 3: 分配正确大小的缓冲区 (ai_scheduler.cpp:220)
|
||||
|
||||
```cpp
|
||||
// 修改前
|
||||
uint32_t out_sz = ctx->output_attrs[j].n_elems * sizeof(uint16_t);
|
||||
|
||||
// 修改后
|
||||
uint32_t out_sz = ctx->output_attrs[j].n_elems * sizeof(float); // 4字节/元素
|
||||
```
|
||||
|
||||
## 修复后的效果
|
||||
|
||||
### 正常检测输出
|
||||
```
|
||||
[ALARM][info] detections=[{cls=6 score=0.53 bbox=(129,344,427,407)}]
|
||||
[ALARM][info] detections=[{cls=6 score=0.43 bbox=(73,316,382,452)}]
|
||||
[ALARM][info] detections=[{cls=6 score=0.52 bbox=(108,315,299,453)}]
|
||||
```
|
||||
|
||||
### 关键改进
|
||||
| 指标 | 修复前 | 修复后 |
|
||||
|------|--------|--------|
|
||||
| 分数分布 | 固定 0.998047 | 0.43 ~ 0.53 变化 |
|
||||
| 坐标范围 | 全图或 inf | 合理的 (x,y,w,h) |
|
||||
| 有效检测 | valid_count=0 或异常 | 稳定的有效检测 |
|
||||
|
||||
## 模型转换注意事项
|
||||
|
||||
### PC 端导出 ONNX
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO('best.pt')
|
||||
model.export(
|
||||
format='onnx',
|
||||
imgsz=768,
|
||||
opset=12,
|
||||
simplify=True,
|
||||
dynamic=False,
|
||||
)
|
||||
```
|
||||
|
||||
### PC 端转换为 RKNN
|
||||
```python
|
||||
from rknn.api import RKNN
|
||||
|
||||
rknn = RKNN()
|
||||
rknn.config(
|
||||
target_platform='rk3588',
|
||||
mean_values=[[0.0, 0.0, 0.0]],
|
||||
std_values=[[255.0, 255.0, 255.0]],
|
||||
)
|
||||
|
||||
rknn.load_onnx(
|
||||
model='best.onnx',
|
||||
input_size_list=[[1, 3, 768, 768]] # NCHW 格式
|
||||
)
|
||||
|
||||
rknn.build(do_quantization=False) # 先测试 FP16
|
||||
rknn.export_rknn('best-rk3588.rknn')
|
||||
```
|
||||
|
||||
## 调试技巧
|
||||
|
||||
### 1. 验证模型输出
|
||||
```python
|
||||
from rknnlite.api import RKNNLite
|
||||
import numpy as np
|
||||
|
||||
rknn = RKNNLite()
|
||||
rknn.load_rknn('best-rk3588.rknn')
|
||||
rknn.init_runtime()
|
||||
|
||||
input_data = np.ones((1, 3, 768, 768), dtype=np.float32) * 128
|
||||
outputs = rknn.inference(inputs=[input_data])
|
||||
|
||||
print(f"Output shape: {outputs[0].shape}")
|
||||
print(f"Min/Max: {outputs[0].min():.4f} / {outputs[0].max():.4f}")
|
||||
print(f"First 10 values: {outputs[0].flatten()[:10]}")
|
||||
```
|
||||
|
||||
### 2. 检查输出类型
|
||||
```cpp
|
||||
// 在 ai_scheduler.cpp 中添加日志
|
||||
LogInfo("[ai_scheduler] output[" + std::to_string(i) + "] type=" +
|
||||
std::to_string(ctx->output_attrs[i].type) +
|
||||
" qnt_type=" + std::to_string(ctx->output_attrs[i].qnt_type));
|
||||
```
|
||||
|
||||
## 相关文件
|
||||
|
||||
- `src/ai_scheduler.cpp` - RKNN 推理核心逻辑
|
||||
- `plugins/ai_yolo/ai_yolo_node.cpp` - YOLO 后处理
|
||||
- `include/ai_scheduler.h` - 数据结构定义
|
||||
|
||||
## 参考文档
|
||||
|
||||
- [RKNN Toolkit2 文档](https://github.com/rockchip-linux/rknn-toolkit2)
|
||||
- [RK3588 NPU 用户手册](https://www.rock-chips.com/uploads/pdf/RK3588%20NPU%20User%20Manual.pdf)
|
||||
- [YOLOv8 RKNN 部署指南](https://github.com/ultralytics/ultralytics)
|
||||
|
||||
---
|
||||
|
||||
**修复日期**: 2026-02-28
|
||||
**修复者**: AI Assistant
|
||||
**测试模型**: `models/best-rk3588.rknn`
|
||||
@ -55,3 +55,19 @@ watch -n 1 'grep -E "fps_calc|RKVENC" /proc/mpp_service/sessions-summary | head
|
||||
|
||||
# 综合监控(运行脚本)
|
||||
~/apps/OrangePi3588Media/scripts/monitor_hw.sh
|
||||
|
||||
|
||||
- 运行media-server
|
||||
./build/media-server -c configs/sample_cam4_best.json
|
||||
|
||||
- PT模型转RKNN
|
||||
|
||||
在Linux系统上,先克隆RKNN修改后的项目到本地:
|
||||
https://gitcode.com/GitHub_Trending/ul/ultralytics
|
||||
|
||||
安装依赖:
|
||||
pip install -e . rknn-toolkit2
|
||||
pip install "onnx==1.16.1"
|
||||
|
||||
进入模型目录,执行:
|
||||
yolo export model=best.pt format=rknn name=rk3588
|
||||
Binary file not shown.
@ -249,7 +249,7 @@ private:
|
||||
} else if (proto_ == "hls") {
|
||||
std::string seg = std::to_string(std::max(1, cfg_.segment_sec));
|
||||
av_dict_set(&opts, "hls_time", seg.c_str(), 0);
|
||||
av_dict_set(&opts, "hls_list_size", "0", 0);
|
||||
av_dict_set(&opts, "hls_list_size", "15", 0);
|
||||
av_dict_set(&opts, "hls_flags", "delete_segments+append_list", 0);
|
||||
std::filesystem::create_directories(std::filesystem::path(url_).parent_path());
|
||||
}
|
||||
|
||||
90
scripts/check_model.py
Normal file
90
scripts/check_model.py
Normal file
@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
"""检查 RKNN 模型结构和输出信息"""
|
||||
|
||||
import sys
|
||||
|
||||
try:
|
||||
from rknn.api import RKNN
|
||||
except ImportError:
|
||||
print("错误: 未安装 rknn-toolkit2")
|
||||
sys.exit(1)
|
||||
|
||||
def check_model(model_path):
|
||||
rknn = RKNN(verbose=False)
|
||||
|
||||
# 加载模型
|
||||
ret = rknn.load_rknn(model_path)
|
||||
if ret != 0:
|
||||
print(f"加载模型失败: {model_path}")
|
||||
return
|
||||
|
||||
# 获取模型信息
|
||||
print(f"\n=== 模型信息: {model_path} ===\n")
|
||||
|
||||
# 使用 rknn_query 获取信息
|
||||
try:
|
||||
# 获取输入输出数量
|
||||
from ctypes import c_void_p, sizeof, Structure, c_int, c_uint32
|
||||
|
||||
class rknn_input_output_num(Structure):
|
||||
_fields_ = [("n_input", c_uint32), ("n_output", c_uint32)]
|
||||
|
||||
io_num = rknn_input_output_num()
|
||||
# 尝试获取输入输出数量
|
||||
print(f"尝试分析模型结构...")
|
||||
|
||||
except Exception as e:
|
||||
print(f"查询失败: {e}")
|
||||
|
||||
# 尝试用推理方式测试
|
||||
print("\n尝试模拟推理查看输出形状...")
|
||||
import numpy as np
|
||||
|
||||
# 创建假输入
|
||||
dummy_input = np.zeros((1, 768, 768, 3), dtype=np.uint8)
|
||||
|
||||
ret = rknn.init_runtime(core_mask=RKNN.NPU_CORE_AUTO)
|
||||
if ret != 0:
|
||||
print("初始化 runtime 失败")
|
||||
rknn.release()
|
||||
return
|
||||
|
||||
# 推理
|
||||
outputs = rknn.inference(inputs=[dummy_input], data_format=['nhwc'])
|
||||
|
||||
print(f"\n输出数量: {len(outputs)}")
|
||||
for i, out in enumerate(outputs):
|
||||
print(f" 输出[{i}]: shape={out.shape}, dtype={out.dtype}")
|
||||
# 显示部分数据
|
||||
flat = out.flatten()
|
||||
print(f" 数据范围: [{flat.min():.4f}, {flat.max():.4f}]")
|
||||
print(f" 前10个值: {flat[:10]}")
|
||||
|
||||
# 判断模型类型
|
||||
print(f"\n=== 分析结果 ===")
|
||||
if len(outputs) == 1:
|
||||
shape = outputs[0].shape
|
||||
print(f"模型类型: YOLOv8 (单输出)")
|
||||
print(f"输出形状: {shape}")
|
||||
if len(shape) == 3:
|
||||
# YOLOv8 输出通常是 [1, 84, 8400] 或 [1, 15, 8400] 等
|
||||
num_classes = shape[1] - 4 # 减去 x,y,w,h
|
||||
num_boxes = shape[2]
|
||||
print(f"检测框数量: {num_boxes}")
|
||||
print(f"类别数: {num_classes}")
|
||||
elif len(outputs) == 3:
|
||||
print(f"模型类型: YOLOv5 (三输出)")
|
||||
for i, out in enumerate(outputs):
|
||||
print(f" 输出[{i}] shape={out.shape}")
|
||||
else:
|
||||
print(f"模型类型: 其他 ({len(outputs)} 个输出)")
|
||||
|
||||
rknn.release()
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("用法: python3 check_model.py <model_path>")
|
||||
print("示例: python3 check_model.py models/best-768.rknn")
|
||||
sys.exit(1)
|
||||
|
||||
check_model(sys.argv[1])
|
||||
94
scripts/test_pt_webcam.py
Normal file
94
scripts/test_pt_webcam.py
Normal file
@ -0,0 +1,94 @@
|
||||
import argparse
|
||||
import time
|
||||
|
||||
import cv2
|
||||
from ultralytics import YOLO
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Test YOLO .pt model with USB camera")
|
||||
parser.add_argument("--model", type=str, required=True, help="Path to .pt model")
|
||||
parser.add_argument("--camera", type=int, default=0, help="USB camera index")
|
||||
parser.add_argument("--imgsz", type=int, default=768, help="Inference image size")
|
||||
parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold")
|
||||
parser.add_argument("--iou", type=float, default=0.6, help="NMS IoU threshold")
|
||||
parser.add_argument(
|
||||
"--classes",
|
||||
type=str,
|
||||
default="",
|
||||
help="Optional class ids, e.g. '3,6' for boots+Person. Empty means all classes.",
|
||||
)
|
||||
parser.add_argument("--device", type=str, default="0", help="CUDA device id, e.g. 0, or 'cpu'")
|
||||
parser.add_argument("--line-width", type=int, default=2, help="Box line width")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def parse_classes(raw: str):
|
||||
raw = raw.strip()
|
||||
if not raw:
|
||||
return None
|
||||
ids = []
|
||||
for x in raw.split(","):
|
||||
x = x.strip()
|
||||
if x:
|
||||
ids.append(int(x))
|
||||
return ids if ids else None
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
classes = parse_classes(args.classes)
|
||||
|
||||
model = YOLO(args.model)
|
||||
|
||||
cap = cv2.VideoCapture(args.camera)
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError(f"Cannot open camera index {args.camera}")
|
||||
|
||||
prev_t = time.time()
|
||||
|
||||
while True:
|
||||
ok, frame = cap.read()
|
||||
if not ok:
|
||||
print("Failed to read frame from camera")
|
||||
break
|
||||
|
||||
results = model.predict(
|
||||
source=frame,
|
||||
imgsz=args.imgsz,
|
||||
conf=args.conf,
|
||||
iou=args.iou,
|
||||
classes=classes,
|
||||
device=args.device,
|
||||
verbose=False,
|
||||
)
|
||||
|
||||
plotted = results[0].plot(line_width=args.line_width)
|
||||
|
||||
now = time.time()
|
||||
fps = 1.0 / max(now - prev_t, 1e-6)
|
||||
prev_t = now
|
||||
|
||||
cls_info = "all" if classes is None else str(classes)
|
||||
cv2.putText(
|
||||
plotted,
|
||||
f"FPS: {fps:.1f} | conf={args.conf:.2f} iou={args.iou:.2f} classes={cls_info}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
cv2.LINE_AA,
|
||||
)
|
||||
|
||||
cv2.imshow("YOLO PT Webcam Test", plotted)
|
||||
key = cv2.waitKey(1) & 0xFF
|
||||
if key in (27, ord("q")):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -207,17 +207,17 @@ ModelHandle AiScheduler::LoadModel(const std::string& model_path, std::string& e
|
||||
for (uint32_t j = 0; j < ctx->n_output; ++j) {
|
||||
ctx->output_attrs[j].index = j;
|
||||
rknn_query(ctx->ctx, RKNN_QUERY_OUTPUT_ATTR, &ctx->output_attrs[j], sizeof(rknn_tensor_attr));
|
||||
LogInfo("[ai_scheduler] output[" + std::to_string(j) + "] type=" +
|
||||
std::to_string(ctx->output_attrs[j].type) + " qnt_type=" +
|
||||
std::to_string(ctx->output_attrs[j].qnt_type) + " zp=" +
|
||||
std::to_string(ctx->output_attrs[j].zp) + " scale=" +
|
||||
std::to_string(ctx->output_attrs[j].scale));
|
||||
}
|
||||
|
||||
ctx->output_buffers.resize(ctx->n_output);
|
||||
for (uint32_t j = 0; j < ctx->n_output; ++j) {
|
||||
uint32_t out_sz = ctx->output_attrs[j].size;
|
||||
if (out_sz == 0 && ctx->output_attrs[j].size_with_stride > 0) {
|
||||
out_sz = ctx->output_attrs[j].size_with_stride;
|
||||
}
|
||||
if (out_sz == 0 && ctx->output_attrs[j].n_elems > 0) {
|
||||
out_sz = ctx->output_attrs[j].n_elems * TensorTypeSizeBytes(ctx->output_attrs[j].type);
|
||||
}
|
||||
// FP32 output when want_float=1: 4 bytes per element
|
||||
uint32_t out_sz = ctx->output_attrs[j].n_elems * sizeof(float);
|
||||
if (out_sz > 0) {
|
||||
ctx->output_buffers[j].resize(out_sz);
|
||||
} else {
|
||||
@ -446,7 +446,7 @@ InferResult AiScheduler::Infer(ModelHandle handle, const InferInput& input) {
|
||||
std::vector<rknn_output> outputs(ctx->n_output);
|
||||
memset(outputs.data(), 0, sizeof(rknn_output) * ctx->n_output);
|
||||
for (uint32_t i = 0; i < ctx->n_output; ++i) {
|
||||
outputs[i].want_float = 0; // Keep quantized output
|
||||
outputs[i].want_float = 0; // Keep INT8 quantized output, manual dequantize
|
||||
}
|
||||
|
||||
ret = rknn_outputs_get(ctx->ctx, ctx->n_output, outputs.data(), nullptr);
|
||||
@ -617,7 +617,7 @@ AiScheduler::BorrowedInferResult AiScheduler::InferBorrowed(ModelHandle handle,
|
||||
std::vector<rknn_output> outputs(ctx->n_output);
|
||||
memset(outputs.data(), 0, sizeof(rknn_output) * ctx->n_output);
|
||||
for (uint32_t i = 0; i < ctx->n_output; ++i) {
|
||||
outputs[i].want_float = 0;
|
||||
outputs[i].want_float = 1; // Request FP32 output for direct use
|
||||
outputs[i].index = i;
|
||||
if (i < ctx->output_buffers.size() && !ctx->output_buffers[i].empty()) {
|
||||
outputs[i].is_prealloc = 1;
|
||||
@ -643,7 +643,8 @@ AiScheduler::BorrowedInferResult AiScheduler::InferBorrowed(ModelHandle handle,
|
||||
out.index = static_cast<int>(i);
|
||||
out.size = outputs[i].size;
|
||||
out.data = reinterpret_cast<const uint8_t*>(outputs[i].buf);
|
||||
out.type = ctx->output_attrs[i].type;
|
||||
// When want_float=1, RKNN outputs FP32
|
||||
out.type = RKNN_TENSOR_FLOAT32;
|
||||
out.zp = ctx->output_attrs[i].zp;
|
||||
out.scale = ctx->output_attrs[i].scale;
|
||||
out.dims.resize(ctx->output_attrs[i].n_dims);
|
||||
|
||||
BIN
transform/best-rk3588.rknn
Normal file
BIN
transform/best-rk3588.rknn
Normal file
Binary file not shown.
BIN
transform/best.onnx
Normal file
BIN
transform/best.onnx
Normal file
Binary file not shown.
BIN
transform/calib/img_0.npy
Normal file
BIN
transform/calib/img_0.npy
Normal file
Binary file not shown.
BIN
transform/calib/img_1.npy
Normal file
BIN
transform/calib/img_1.npy
Normal file
Binary file not shown.
BIN
transform/calib/img_2.npy
Normal file
BIN
transform/calib/img_2.npy
Normal file
Binary file not shown.
BIN
transform/calib/img_3.npy
Normal file
BIN
transform/calib/img_3.npy
Normal file
Binary file not shown.
BIN
transform/calib/img_4.npy
Normal file
BIN
transform/calib/img_4.npy
Normal file
Binary file not shown.
BIN
transform/calib/img_5.npy
Normal file
BIN
transform/calib/img_5.npy
Normal file
Binary file not shown.
BIN
transform/calib/img_6.npy
Normal file
BIN
transform/calib/img_6.npy
Normal file
Binary file not shown.
BIN
transform/calib/img_7.npy
Normal file
BIN
transform/calib/img_7.npy
Normal file
Binary file not shown.
BIN
transform/calib/img_8.npy
Normal file
BIN
transform/calib/img_8.npy
Normal file
Binary file not shown.
BIN
transform/calib/img_9.npy
Normal file
BIN
transform/calib/img_9.npy
Normal file
Binary file not shown.
10
transform/dataset.txt
Normal file
10
transform/dataset.txt
Normal file
@ -0,0 +1,10 @@
|
||||
models/calib/img_0.npy
|
||||
models/calib/img_1.npy
|
||||
models/calib/img_2.npy
|
||||
models/calib/img_3.npy
|
||||
models/calib/img_4.npy
|
||||
models/calib/img_5.npy
|
||||
models/calib/img_6.npy
|
||||
models/calib/img_7.npy
|
||||
models/calib/img_8.npy
|
||||
models/calib/img_9.npy
|
||||
10
transform/dataset_abs.txt
Normal file
10
transform/dataset_abs.txt
Normal file
@ -0,0 +1,10 @@
|
||||
/home/orangepi/apps/OrangePi3588Media/models/calib/img_0.npy
|
||||
/home/orangepi/apps/OrangePi3588Media/models/calib/img_1.npy
|
||||
/home/orangepi/apps/OrangePi3588Media/models/calib/img_2.npy
|
||||
/home/orangepi/apps/OrangePi3588Media/models/calib/img_3.npy
|
||||
/home/orangepi/apps/OrangePi3588Media/models/calib/img_4.npy
|
||||
/home/orangepi/apps/OrangePi3588Media/models/calib/img_5.npy
|
||||
/home/orangepi/apps/OrangePi3588Media/models/calib/img_6.npy
|
||||
/home/orangepi/apps/OrangePi3588Media/models/calib/img_7.npy
|
||||
/home/orangepi/apps/OrangePi3588Media/models/calib/img_8.npy
|
||||
/home/orangepi/apps/OrangePi3588Media/models/calib/img_9.npy
|
||||
Loading…
Reference in New Issue
Block a user