图片去黑边(只考虑了去水平方向上的黑边)的核心算法是要找到图片顶部或顶部的黑边位置,即两个纵坐标值, 主要用到了canny边缘计算、
houghlines直线检测、easyocr识别等算法。
给图片去黑边的实现逻辑为:
-
先进行canny边缘计算,再进行houghlines直线检测,取出图片的水平边缘 如果没有找到水平边缘,那么不做处理
-
对目标水平边缘进行过滤和分类
过滤逻辑是: 一侧为黑色,另一侧非黑色
分类逻辑是:
上边是黑色,下边是非黑色的,且位于图片水平中线以上,作为候选上边缘;
上边是非黑色,下边是黑色的,,且位于图片水平中线以下,作为候选下边缘 -
对候选的上下边缘从外向内逐一校验,校验标准是边缘之外不应存在文字(因为图片上的文字对于图片也是有意义的) 也不应存在高度超过一定阈值的元素, 从而得出符合条件且最靠内侧的上下边缘
如果找不到符合条件的上边缘,那么上边缘就是0
如果找不到符合条件的下边缘,那么下边缘就是图片高度-1 -
根据找出的上线边缘对原图进行裁剪
import cv2
import numpy as np
import easyocrdef isPixelBlack(pixel):return pixel[0] <= 10 and pixel[1] <= 10 and pixel[2] <= 10def checkLineIsBlack(img, width, y):midX = int((width - 1) / 2)pixel = img[y, midX]if not isPixelBlack(pixel):return Falsefor x in range(1, midX + 1):if midX - x >= 0:leftPixel = img[y, midX - x]if not isPixelBlack(leftPixel):return Falseif midX + x < width:rightPixel = img[y, midX + x]if not isPixelBlack(rightPixel):return Falsereturn Truedef computeBlackPixelNum(img, fromY, toY, x):totalNum = 0for y in range(fromY, toY):curPixel = img[y, x]if isPixelBlack(curPixel):totalNum += 1return totalNum# 对于接近顶部或底部的边缘忽略;对于中线附近的边缘也忽略;
def isLevelLineNeedIgnore(height, y):if y <= 50 or height - 1 - y <= 50:return True# 判断y是否介于3/8 到 5/8 的高度之间midZoneStart = int(0.4 * height)midZoneEnd = int(0.6 * height)if y >= midZoneStart and y <= midZoneEnd:return Truereturn False# 将宽度的1/6视作最小线段长度
def getMinLineLength(width):return int(width / 10)def computeValidFlag(valid_flag_list, left, right):sum = 0for index in range(left, right):if valid_flag_list[index] > 0:sum += 1if sum <= 5:return 0return sum# 计算水平线的边缘类型: 0=无效 1=潜在的上边缘 2=潜在的下边缘 3 潜在的边缘
def checkEdgeType(valid_flag_list, y, height, init):midY = int(height / 2)aboveFlag = computeValidFlag(valid_flag_list, max(0, y - 10 - init), y - 10)belowFlag = computeValidFlag(valid_flag_list, y + 10, min(y + 10 + init, height - 1))if aboveFlag > 0 and belowFlag > 0:return 0elif aboveFlag > 0 and belowFlag == 0 and y > midY:return 2elif aboveFlag == 0 and belowFlag > 0 and y < midY:return 1elif aboveFlag == 0 and belowFlag == 0:return 3return 0# 挑选合适的上边缘
def pickOutFinalTopY(img, height, width, valid_topY_array, valid_flag_list, reader):bestTopY = 0matchedTopY = []otherTopY = []for currentY in valid_topY_array:validFlagNum = computeValidFlag(valid_flag_list, 0, currentY - 2)if validFlagNum <= 20:matchedTopY.append(currentY)else:otherTopY.append(currentY)if len(otherTopY) == 0:return matchedTopY[0]else:matchedTopY.sort()if len(matchedTopY) > 0:bestTopY = matchedTopY[len(matchedTopY) - 1]# 将topY列表升序排列, 逐一验证是否符合条件valid_topY_array.sort()midX = int(width / 2)for candidateY in valid_topY_array:if candidateY < bestTopY:continuesumFlag = computeValidFlag(valid_flag_list, 0, candidateY)if sumFlag > 100:breaksumBlack = computeBlackPixelNum(img, 0, candidateY, midX)if sumBlack > 100:break# ocr读取 (0,candidateY) 范围内的子图, 判断是否包含有文字# 如果包含了文字,那么就不符合条件roi = img[0:candidateY, 0:width]result = reader.readtext(roi)if len(result) > 0:breakbestTopY = candidateYreturn bestTopYdef pickOutFinalEndY(img, height, width, valid_endY_array, valid_flag_list, reader):bestEndY = height - 1matchedEndY = []otherEndY = []for currentY in valid_endY_array:validFlagNum = computeValidFlag(valid_flag_list, currentY + 2, height)if validFlagNum <= 20:matchedEndY.append(currentY)else:otherEndY.append(currentY)if len(otherEndY) == 0:return matchedEndY[0]else:matchedEndY.sort(reverse=True)if len(matchedEndY) > 0:bestEndY = matchedEndY[0]# 将endY列表降序排列, 逐一验证是否符合条件valid_endY_array.sort(reverse=True)midX = int(width / 2)for candidateY in valid_endY_array:if candidateY > bestEndY:continuesum = computeValidFlag(valid_flag_list, candidateY, height)if sum > 100:breaksumBlack = computeBlackPixelNum(img, candidateY, height, midX)if sumBlack > 100:break# ocr读取 (candidateY,height) 范围内的子图, 判断是否包含有文字# 如果包含了文字,那么就不符合条件roi = img[candidateY:height, 0:width]result = reader.readtext(roi)if len(result) > 0:breakbestEndY = candidateYreturn bestEndYdef computeTopAndEnd(img, height, width, valid_flag_list, level_lines, reader):# 1.过滤出有效的边缘valid_topY_array = []valid_endY_array = []midY = int(height / 2)for level_line in level_lines:x1, y, x2, y2 = level_line[0]# 临时划线# cv2.line(img, (0, y), (width - 1, y), (0, 0, 255), 1)# 先判断是否是有效的边缘,如果是有效的边缘, 再放入候选集合中edgeType = checkEdgeType(valid_flag_list, y, height, 50)if edgeType == 0:continueelif edgeType == 1:valid_topY_array.append(y)elif edgeType == 2:valid_endY_array.append(y)elif edgeType == 3:if y > midY:valid_endY_array.append(y)elif y < midY:valid_topY_array.append(y)if len(valid_topY_array) <= 0 and len(valid_endY_array) <= 0:return 0, height - 1# 2.判断有效的边缘是否可以上边缘或下边缘(这个步骤里可能会用到ocr技术)finalTopY = 0finalEndY = height - 1if len(valid_topY_array) > 0:finalTopY = pickOutFinalTopY(img, height, width, valid_topY_array, valid_flag_list, reader)if len(valid_endY_array) > 0:finalEndY = pickOutFinalEndY(img, height, width, valid_endY_array, valid_flag_list, reader)# 3.返回上下黑边纵坐标return finalTopY, finalEndY# 对于无边缘的纵坐标, 重新计算该纵坐标上是否存在非黑像素
def recomputeValidFlagList(img, height, width, valid_flag_list):for y in range(0, height):if valid_flag_list[y] == 0:lineBlackFlag = checkLineIsBlack(img, width, y)if not lineBlackFlag:valid_flag_list[y] = 1def recognizeImageValidZone(imagePath, reader):# 读取图片img = cv2.imread(imagePath)# 获取图像尺寸height, width = img.shape[:2]edges = cv2.Canny(img, 100, 200)lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 100, minLineLength=getMinLineLength(width), maxLineGap=10)if lines is None:print(imagePath + "不存在直线")return 0, height - 1levelLines = []for line in lines:x1, y1, x2, y2 = line[0]if y1 != y2:continueif isLevelLineNeedIgnore(height, y1):continue# print(f"水平直线===================={y1}")# cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2)levelLines.append(line)if len(levelLines) == 0:print(imagePath + "-----不存在水平直线")return 0, height - 1# 计算标识数组,用于标识各行是否存在非黑像素valid_flag_list = [0 for _ in range(height)]# 遍历边缘检测后的图像,找到边缘像素的坐标for y in range(edges.shape[0]):for x in range(edges.shape[1]):if edges[y][x] != 0: # 如果当前像素不是背景(即边缘)valid_flag_list[y] = 1breakrecomputeValidFlagList(img, height, width, valid_flag_list)return computeTopAndEnd(img, height, width, valid_flag_list, levelLines, reader)def doDropForImage(srcDir, srcFile, targetDir, reader):# 读取图片img = cv2.imread(srcDir + srcFile)# 获取图像尺寸height, width = img.shape[:2]# 获取起止的纵坐标startY, overY = recognizeImageValidZone(srcDir + srcFile, reader)crop_img = img[startY:overY + 1, 0:width]cv2.imwrite(targetDir + srcFile + "_dealed.jpg", crop_img)def preDropForImage(srcDir, srcFile, targetDir, reader):# 读取图片img = cv2.imread(srcDir + srcFile)# 获取图像尺寸height, width = img.shape[:2]# 获取起止的纵坐标startY, overY = recognizeImageValidZone(srcDir + srcFile, reader)# 标记一下图片边缘if startY != 0:cv2.line(img, (0, startY), (width - 1, startY), (0, 255, 0), 2)if overY != height - 1:cv2.line(img, (0, overY), (width - 1, overY), (0, 255, 0), 2)if startY == 0 and overY == height - 1:cv2.imwrite(targetDir + 'unchanged/' + srcFile + "_dealed.jpg", img)else:cv2.imwrite(targetDir + 'changed/' + srcFile, img)reader = easyocr.Reader(['ch_sim', 'en'], gpu=False)
preDropForImage('E:/black/sample_images_black/', "1.jpg", 'E:/black/success_dealed/', reader)