YOLOv5+pyqt5+摄像头在特定条件下进行目标检测并采集原始数据

项目介绍

项目地址

GitHub - biabu0/Yolov5_D435i: 通过YOLOV5与pyqt5实现一个使用D435i深度摄像头采集特定需求与场景下的深度数据的小程序

通过YOLOV5对指定的区域进行检测，当检测到目标进入特定区域时，开始保存数据，摄像头采用D435i深度相机，用于采集深度数据集。

指定需要屏蔽的检测区域，即使目标进入该区域也无法进行有效的检测，应用于特定场景的检测。
只有目标在检测区域内，才进行数据的采集与保存，避免一直采集数据，目标离开检测区域则停止保存数据，避免在数据采集过程中存在大量的无效数据，节约数据清洗时间，节省磁盘容量。
按照时间存储数据。
使用pyqt5设计可视化界面，将UI界面与逻辑代码分离。

项目演示视频

演示视频

环境配置

按照requements.txt文件配置yolov5环境，安装pyqt5和pyrealsense2。

核心代码解析

detect_logical.py：负责加载模型，并初始化模型参数；选择遮蔽区域以及需要保存的数据文件地址；加载D435深度相机数据流，将数据送入检测，检测到特定目标返回数据保存的标志位进行数据存储。
main_logic.py:主界面，可以进行注册账号与登录账号。
ui/ori_ui:ui源文件，可以通过使用QTdesigner对UI界面进行修改，修改后使用**pyuic5 main.ui > ui_main.py**,（注意最好使用绝对路径，不然可能出现问题）转换成py文件。
utlis/id_utlis.py与userInfo.csv：用于写入账户信息。

遮蔽区域选择

通过鼠标左键获取需要屏蔽的区域的四个角的位置，保存到一个全局变量中，用于后序检测的时候生成指定区域的掩码，从而屏蔽特定区域。

    def mouse_callback(self, event, x, y, flags, param):if event == cv2.EVENT_LBUTTONDOWN:# 将位置标准化（可选，根据需求决定是否需要）normalized_x = x / self.frame_shape[1]normalized_y = y / self.frame_shape[0]# 将位置添加到二维数组中self.mouse_positions.append([normalized_x, normalized_y])return ;def select_mask(self):self.mouse_positions = []self.pipeline.start(self.config)frames = self.pipeline.wait_for_frames()img_color = frames.get_color_frame()# 检查摄像头是否成功打开if img_color is None:print("Error: Could not open video device.")exit()img_color = np.asanyarray(img_color.get_data())self.frame_shape = img_color.shape[:2]# 创建一个窗口cv2.namedWindow('Camera Image')# 设置鼠标回调函数cv2.setMouseCallback('Camera Image', self.mouse_callback)while True:# 显示图像cv2.imshow('Camera Image', img_color)#等待按键，如果按下'q'键，退出循环if cv2.waitKey(0) & 0xFF == ord('q'):break# 释放D435i对象self.pipeline.stop()  # 停止RealSense管道# 销毁创建的窗口print("mouse_positions", self.mouse_positions)QtWidgets.QMessageBox.information(self, u"Notice", u"遮掩区域选择成功", buttons=QtWidgets.QMessageBox.Ok,defaultButton=QtWidgets.QMessageBox.Ok)

选择数据保存地址

直接将寻找的路径保存到全局变量中，后序需要保存地址的时候加载进去。

    def open_file(self):self.openfile_name_dataset = QFileDialog.getExistingDirectory(self, '选择数据集目录')if not self.openfile_name_dataset:QtWidgets.QMessageBox.warning(self, u"Warning", u"打开文件地址失败", buttons=QtWidgets.QMessageBox.Ok,defaultButton=QtWidgets.QMessageBox.Ok)else:QtWidgets.QMessageBox.information(self, u"Notice", u"数据集路径为：" + str(self.openfile_name_dataset), buttons=QtWidgets.QMessageBox.Ok,defaultButton=QtWidgets.QMessageBox.Ok)

采集数据

当检测到目标存在时，需要进行数据保存，调用该函数。从D435i中获取帧作为参数。将深度帧与彩色帧对齐，获取深度图与彩色图。按照时间格式创建数据保存的文件夹，可以选择保存四种数据格：color：彩色图；depth:原始深度图npy格式；depthjpg与可视化后的彩色图。

    def save_dataset(self, frames):align_to = rs.stream.coloralign = rs.align(align_to)  # 对齐aligned_frames = align.process(frames)aligned_depth_frame = aligned_frames.get_depth_frame()color_frame = aligned_frames.get_color_frame()depth_image = np.asanyarray(aligned_depth_frame.get_data())depth_data = np.asanyarray(aligned_depth_frame.get_data(), dtype="uint16")color_image = np.asanyarray(color_frame.get_data())t1 = time.strftime("%Y_%m_%d_%H_%M", time.localtime())if not self.openfile_name_dataset:QtWidgets.QMessageBox.warning(self, u"Warning", u"请先选择数据集地址", buttons=QtWidgets.QMessageBox.Ok,defaultButton=QtWidgets.QMessageBox.Ok)returnsave_path = os.path.join(self.openfile_name_dataset, "outfile", t1)os.makedirs(save_path, exist_ok=True)os.makedirs(os.path.join(save_path, "color"), exist_ok=True)os.makedirs(os.path.join(save_path, "depth"), exist_ok=True)os.makedirs(os.path.join(save_path, "depthjpg"), exist_ok=True)os.makedirs(os.path.join(save_path, "depth_mapped_image"), exist_ok=True)saved_count = int(time.time() * 1000) #毫秒级的时间戳depth_mapped_image = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)# 彩色图片保存为png格式cv2.imwrite(save_path + "/color/" + "{}".format(saved_count) + '.jpg', color_image)# -----------深度图保存信息----------------## 深度信息由采集到的float16直接保存为npy格式np.save(os.path.join(save_path, "depth", "{}".format(saved_count)), depth_data)  ## 黑白图# 使用jpg格式保存的图片，图像采集错误还能肉眼发现cv2.imwrite(save_path + "/depthjpg/" + "{}.jpg".format(saved_count), depth_image)# 渲染的图片cv2.imwrite(save_path + "/depth_mapped_image/"+"{}.jpg".format(saved_count), depth_mapped_image)return True

目标检测信息

根据选择掩码阶段选择的四个坐标位置生成mask应用到图像上，达到遮蔽区域检测的目的。实现mask后查看掩码具体位置，然后进入检测逻辑，返回检测信息以及数据保存位。

    def detect(self, name_list, img):#(1, 3, 480, 640) [[[145 146 143], [148 149 146# ]]]showimg = imghl1 = self.mouse_positions[0][1]  # 监测区域高度距离图片顶部比例wl1 = self.mouse_positions[0][0]  # 监测区域高度距离图片左部比例hl2 = self.mouse_positions[1][1]  # 监测区域高度距离图片顶部比例wl2 = self.mouse_positions[1][0]  # 监测区域高度距离图片左部比例hl3 = self.mouse_positions[3][1]  # 监测区域高度距离图片顶部比例wl3 = self.mouse_positions[3][0]  # 监测区域高度距离图片左部比例hl4 = self.mouse_positions[2][1]  # 监测区域高度距离图片顶部比例wl4 = self.mouse_positions[2][0]  # 监测区域高度距离图片左部比例mask = np.zeros([img.shape[0], img.shape[1]], dtype=np.uint8)pts = np.array([[int(img.shape[1] * wl1), int(img.shape[0] * hl1)],  # pts1[int(img.shape[1] * wl2), int(img.shape[0] * hl2)],  # pts2[int(img.shape[1] * wl3), int(img.shape[0] * hl3)],  # pts3[int(img.shape[1] * wl4), int(img.shape[0] * hl4)]], np.int32)cv2.fillPoly(mask, [pts], (255, 255, 255))mask = 255 - mask# 应用mask：将mask为0的部分设置为黑色（0,0,0）img = cv2.add(img, np.zeros(np.shape(img), dtype=np.uint8), mask=mask)# 2========================================================================================if not self.border:# 只显示一次# 定义框的颜色和线宽border_color = (255, 0, 0)  # 红色border_thickness = 2cv2.polylines(img, [pts], True, border_color, border_thickness)self.border = True# 显示结果cv2.imshow('Image with Mask and Border', img)cv2.waitKey(0)cv2.destroyAllWindows()# 2========================================================================================with torch.no_grad():img = letterbox(img, new_shape=self.opt.img_size)[0]# Convertimg = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416img = np.ascontiguousarray(img)img = torch.from_numpy(img).to(self.device)img = img.half() if self.half else img.float()  # uint8 to fp16/32img /= 255.0  # 0 - 255 to 0.0 - 1.0if img.ndimension() == 3:img = img.unsqueeze(0)# Inference# 1==============================================================================================================# 1========================================================================================pred = self.model(img, augment=self.opt.augment)[0]# Apply NMSpred = non_max_suppression(pred, self.opt.conf_thres, self.opt.iou_thres, classes=self.opt.classes,agnostic=self.opt.agnostic_nms)info_show = ""info_show_target = ""# Process detectionsself.info_show_int = 1for i, det in enumerate(pred):if det is not None and len(det):# 3=====================================================================================condition = (det[:, 5] == 0.0) & (det[:, 4] > 0.6)if condition.any():#print("有人员进入监测区域")info_show_target = "有人员进入检测区域"self.info_show_int = 0else:info_show_target = "无人员进入检测区域"self.info_show_int = 1# 3================================================================================================================================================================# Rescale boxes from img_size to im0 sizedet[:, :4] = scale_coords(img.shape[2:], det[:, :4], showimg.shape).round()for *xyxy, conf, cls in reversed(det):label = '%s %.2f' % (self.names[int(cls)], conf)name_list.append(self.names[int(cls)])single_info = plot_one_box2(xyxy, showimg, label=label, color=self.colors[int(cls)], line_thickness=2)# print(single_info)info_show = info_show + single_info + "\n"return info_show_target, self.info_show_int

视频帧操作逻辑

打开D435i，获取彩色图，要将彩色图copy一份再送入detect检测逻辑，不然会导致最后保存的数据还有检测的目标框。

   def show_video_frame(self):frames = self.pipeline.wait_for_frames()color_frame = frames.get_color_frame()#在此处就获取帧，后面获取帧会导致获取color含有检测框# depth_frame = frames.get_depth_frame()if not color_frame:self.finish_detect()returncolor_image = np.asanyarray(color_frame.get_data())color_image_detect = color_image.copy()info_show, info_show_int = self.detect([], color_image_detect)  # 检测结果写入到原始img上#print(info_show)if info_show_int == 0:#print("---开始处理保存数据程序---")flag = self.save_dataset(frames)if flag:#print("数据保存成功")info_show += " 数据保存成功"elif info_show_int == 1:#print("---停止保存数据程序---")info_show += " 停止保存数据"# 显示检测信息和图像self.ui.textBrowser.setText(info_show)show = cv2.resize(color_image_detect, (640, 480))self.result = cv2.cvtColor(show, cv2.COLOR_BGR2RGB)showImage = QtGui.QImage(self.result.data, self.result.shape[1], self.result.shape[0],QtGui.QImage.Format_RGB888)self.ui.label.setPixmap(QtGui.QPixmap.fromImage(showImage))self.ui.label.setScaledContents(True)