diff --git a/cmd/managerd/main.go b/cmd/managerd/main.go index 01c6c95..91473c4 100644 --- a/cmd/managerd/main.go +++ b/cmd/managerd/main.go @@ -67,6 +67,7 @@ func main() { r.Route("/api", func(r chi.Router) { r.Post("/discovery/search", h.Search) r.Get("/devices", h.ListDevices) + r.Post("/devices", h.CreateDevice) r.Get("/devices/{id}", h.GetDevice) // Proxy routes for device actions diff --git a/docs/cloned_device_id_conflict.md b/docs/cloned_device_id_conflict.md new file mode 100644 index 0000000..51d4005 --- /dev/null +++ b/docs/cloned_device_id_conflict.md @@ -0,0 +1,160 @@ +# 克隆设备 device_id 冲突问题及解决方案 + +## 问题描述 + +部署多台 RK3588 设备时,如果它们是使用同一个镜像克隆的,可能会出现以下问题: + +- UDP 发现只能找到部分设备(如 7 台中只能发现 2 台) +- 设备列表中某些设备时隐时现 +- 下发配置时提示设备离线,但实际服务运行正常 + +## 原因分析 + +管理端通过 UDP 广播发现设备时,使用 `device_id` 作为唯一标识存储设备信息。 + +```go +// 发现服务使用 device_id 作为 map 的 key +found := make(map[string]*models.Device) +... +found[dev.DeviceID] = dev // 相同的 device_id 会覆盖之前的设备 +``` + +RK3588 Agent 的 `device_id` 生成优先级: +1. `/etc/machine-id` 文件内容 +2. MAC 地址或序列号 +3. 自动生成并落盘 + +**克隆设备的 `/etc/machine-id` 完全相同**,导致所有克隆设备返回相同的 `device_id`,在发现过程中互相覆盖,最终只能看到一台。 + +## 解决方案 + +### 方案一:修复克隆设备(推荐,根治方案) + +**⚠️ 注意:必须同时删除 dbus 的 machine-id,否则可能生成相同的 ID** + +`systemd-machine-id-setup` 会优先复制 `/var/lib/dbus/machine-id`,克隆镜像中这个文件也是相同的,导致生成的 ID 仍然冲突。 + +**正确的修复步骤:** + +```bash +# 1. 同时删除两个 machine-id 文件 +sudo rm -f /etc/machine-id +sudo rm -f /var/lib/dbus/machine-id + +# 2. 重启(系统会自动生成新的随机 ID) +sudo reboot + +# 3. 验证新生成的 ID +# 在每台设备上执行,确认输出不同 +cat /etc/machine-id +``` + +**错误的做法(可能生成相同 ID):** + +```bash +# ❌ 不要只删除 /etc/machine-id +sudo rm -f /etc/machine-id +sudo systemd-machine-id-setup # 会复制 dbus 的相同 ID! +``` + +**替代方案:手动生成随机 ID** + +```bash +# 删除旧的 +sudo rm -f /etc/machine-id /var/lib/dbus/machine-id + +# 手动生成新的随机 ID(每台设备分别执行) +uuidgen | tr -d '-' | sudo tee /etc/machine-id + +# 重启 +sudo reboot +``` + +重启后,执行以下命令验证新的 device_id: + +```bash +cat /etc/machine-id +``` + +然后重新在管理端点击"UDP 搜索",所有设备应该都能被发现。 + +### 方案二:手动添加设备(临时方案) + +如果暂时无法登录设备修改 machine-id,可以通过管理端的"手动添加设备"功能: + +1. 打开管理端 Web 界面 +2. 进入设备页面 +3. 点击"手动添加设备"按钮 +4. 填写以下信息: + +| 字段 | 必填 | 说明 | +|------|------|------| +| 设备 ID | 是 | 必须唯一,建议从设备执行 `cat /etc/machine-id` 获取,或使用 `uuidgen` 生成 | +| 设备名称 | 否 | 便于识别的名称,如 cam-02 | +| IP 地址 | 是 | 设备的实际 IP 地址 | +| Agent 端口 | 否 | 默认 9100 | +| Media 端口 | 否 | 默认 9000 | + +**注意**:手动添加的设备,如果 device_id 与设备实际报告的 device_id 不一致,UDP 发现时可能会产生重复条目。 + +## 获取 device_id 的方法 + +在 RK3588 设备上执行: + +```bash +# 方法1:查看 machine-id(device_id 的来源) +cat /etc/machine-id + +# 方法2:查看 agent 信息(包含 device_id) +curl http://127.0.0.1:9100/v1/info + +# 方法3:生成新的唯一 ID +uuidgen | tr -d '-' +``` + +## 预防措施 + +制作克隆镜像前,建议清空 machine-id: + +```bash +# 在制作镜像前执行 +sudo rm -f /etc/machine-id +sudo touch /etc/machine-id # 确保文件存在但为空 +``` + +这样设备首次启动时会自动生成新的 machine-id。 + +## 相关接口 + +管理端提供以下相关接口: + +### 手动添加设备(API) + +```bash +POST /api/devices +Content-Type: application/json + +{ + "device_id": "rk3588-xxx", + "device_name": "cam-01", + "ip": "10.0.0.10", + "agent_port": 9100, + "media_port": 9000 +} +``` + +### UDP 发现 + +```bash +POST /api/discovery/search +Content-Type: application/json + +{ + "timeout_ms": 1200 +} +``` + +## 参考 + +- [PRD_04_Manager_ConfigGUI_AgentAPI.md](../PRD_04_Manager_ConfigGUI_AgentAPI.md) - Agent API 文档 +- [API_Device_RemoteMgmt_InterfaceTable.md](../API_Device_RemoteMgmt_InterfaceTable.md) - 设备远程管理接口 diff --git a/internal/api/handlers.go b/internal/api/handlers.go index 05f9bb3..91b4626 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -6,6 +6,7 @@ import ( "net/http" "net/url" "strings" + "time" "3588AdminBackend/internal/models" "3588AdminBackend/internal/service" @@ -56,6 +57,42 @@ func (h *Handler) ListDevices(w http.ResponseWriter, r *http.Request) { }) } +func (h *Handler) CreateDevice(w http.ResponseWriter, r *http.Request) { + var req struct { + DeviceID string `json:"device_id"` + DeviceName string `json:"device_name"` + IP string `json:"ip"` + AgentPort int `json:"agent_port"` + MediaPort int `json:"media_port"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, "invalid request body", http.StatusBadRequest) + return + } + if req.DeviceID == "" || req.IP == "" { + http.Error(w, "device_id and ip are required", http.StatusBadRequest) + return + } + if req.AgentPort == 0 { + req.AgentPort = 9100 + } + if req.MediaPort == 0 { + req.MediaPort = 9000 + } + + dev := &models.Device{ + DeviceID: req.DeviceID, + DeviceName: req.DeviceName, + IP: req.IP, + AgentPort: req.AgentPort, + MediaPort: req.MediaPort, + Online: true, + LastSeenMs: time.Now().UnixMilli(), + } + h.registry.UpdateDevice(dev) + json.NewEncoder(w).Encode(map[string]interface{}{"ok": true}) +} + func (h *Handler) GetDevice(w http.ResponseWriter, r *http.Request) { id := chi.URLParam(r, "id") dev, ok := h.findDevice(id) @@ -84,7 +121,6 @@ func (h *Handler) ProxyAgent(w http.ResponseWriter, r *http.Request) { http.Error(w, "device not found", http.StatusNotFound) return } - var agentPath string method := r.Method @@ -134,6 +170,11 @@ func (h *Handler) ProxyAgent(w http.ResponseWriter, r *http.Request) { return } + // Update online status on successful response + if code < 500 && h.registry != nil { + h.registry.TouchDevice(id) + } + w.WriteHeader(code) w.Write(resp) } @@ -146,7 +187,6 @@ func (h *Handler) ProxyAgentV1(w http.ResponseWriter, r *http.Request) { http.Error(w, "device not found", http.StatusNotFound) return } - prefix := fmt.Sprintf("/api/devices/%s", id) agentPath := strings.TrimPrefix(r.URL.Path, prefix) if !strings.HasPrefix(agentPath, "/v1/") && agentPath != "/v1" { @@ -164,6 +204,11 @@ func (h *Handler) ProxyAgentV1(w http.ResponseWriter, r *http.Request) { return } + // Update online status on successful response + if code < 500 && h.registry != nil { + h.registry.TouchDevice(id) + } + w.WriteHeader(code) _, _ = w.Write(resp) } diff --git a/internal/api/openapi.go b/internal/api/openapi.go index b915197..faee927 100644 --- a/internal/api/openapi.go +++ b/internal/api/openapi.go @@ -53,6 +53,27 @@ func OpenAPI(w http.ResponseWriter, r *http.Request) { }, }, }, + "post": map[string]any{ + "requestBody": map[string]any{ + "required": true, + "content": map[string]any{ + "application/json": map[string]any{ + "schema": map[string]any{ + "type": "object", + "properties": map[string]any{ + "device_id": map[string]any{"type": "string"}, + "device_name": map[string]any{"type": "string"}, + "ip": map[string]any{"type": "string"}, + "agent_port": map[string]any{"type": "integer"}, + "media_port": map[string]any{"type": "integer"}, + }, + "required": []any{"device_id", "ip"}, + }, + }, + }, + }, + "responses": map[string]any{"200": map[string]any{"description": "ok"}}, + }, }, "/api/devices/{id}": map[string]any{ "get": map[string]any{ diff --git a/internal/service/registry.go b/internal/service/registry.go index 2e30782..936c99e 100644 --- a/internal/service/registry.go +++ b/internal/service/registry.go @@ -75,6 +75,18 @@ func (s *RegistryService) GetDevices() []*models.Device { return list } +// TouchDevice updates the LastSeenMs for a device to keep it online +// when accessed via HTTP API (not just UDP discovery) +func (s *RegistryService) TouchDevice(deviceID string) { + s.mu.Lock() + defer s.mu.Unlock() + + if dev, ok := s.devices[deviceID]; ok { + dev.LastSeenMs = time.Now().UnixMilli() + dev.Online = true + } +} + func (s *RegistryService) startPruning() { ticker := time.NewTicker(2 * time.Second) for range ticker.C { diff --git a/internal/web/ui.go b/internal/web/ui.go index 8e3edb2..13a005c 100644 --- a/internal/web/ui.go +++ b/internal/web/ui.go @@ -121,6 +121,8 @@ func (u *UI) Routes() (chi.Router, error) { }) r.Get("/devices", u.pageDevices) + r.Get("/devices-add", u.pageDeviceAdd) + r.Post("/devices-add", u.actionDeviceAdd) r.Post("/devices/batch-action", u.actionDevicesBatchAction) r.Post("/discovery/search", u.actionDiscoverySearch) r.Get("/devices/{id}", u.pageDevice) @@ -192,6 +194,42 @@ func (u *UI) pageDevices(w http.ResponseWriter, r *http.Request) { }) } +func (u *UI) pageDeviceAdd(w http.ResponseWriter, r *http.Request) { + u.render(w, r, "device_add", PageData{Title: "添加设备"}) +} + +func (u *UI) actionDeviceAdd(w http.ResponseWriter, r *http.Request) { + _ = r.ParseForm() + deviceID := strings.TrimSpace(r.FormValue("device_id")) + deviceName := strings.TrimSpace(r.FormValue("device_name")) + ip := strings.TrimSpace(r.FormValue("ip")) + agentPort, _ := strconv.Atoi(strings.TrimSpace(r.FormValue("agent_port"))) + mediaPort, _ := strconv.Atoi(strings.TrimSpace(r.FormValue("media_port"))) + + if deviceID == "" || ip == "" { + u.render(w, r, "device_add", PageData{Title: "添加设备", Error: "device_id 和 ip 不能为空"}) + return + } + if agentPort == 0 { + agentPort = 9100 + } + if mediaPort == 0 { + mediaPort = 9000 + } + + dev := &models.Device{ + DeviceID: deviceID, + DeviceName: deviceName, + IP: ip, + AgentPort: agentPort, + MediaPort: mediaPort, + Online: true, + LastSeenMs: time.Now().UnixMilli(), + } + u.registry.UpdateDevice(dev) + http.Redirect(w, r, "/ui/devices", http.StatusFound) +} + func (u *UI) actionDiscoverySearch(w http.ResponseWriter, r *http.Request) { _ = r.ParseForm() timeoutMs, _ := strconv.Atoi(strings.TrimSpace(r.FormValue("timeout_ms"))) diff --git a/internal/web/ui/templates/device_add.html b/internal/web/ui/templates/device_add.html new file mode 100644 index 0000000..6d81d22 --- /dev/null +++ b/internal/web/ui/templates/device_add.html @@ -0,0 +1,49 @@ +{{define "device_add"}} +
sudo rm -f /etc/machine-id /var/lib/dbus/machine-id +sudo reboot+