# 图像处理智能化的探索[二]:文字区块识别

## 2. 探索

• 均值计算

`mean, std = cv2.meanStdDev(img)  `

• OCR

• 边缘检测

## 4. 实践

### 4.1 图像降噪

```gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
sobel = cv2.Sobel(gray, cv2.CV_8U, 1, 0, ksize=3)  ```

`dst = cv2.Sobel(src, ddepth, dx, dy[, dst[, ksize[, scale[, delta[, borderType]]]]])  `

### 4.2 膨胀与腐蚀

```c1 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 8))
c2 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 6))

ret, bimg = cv2.threshold(sobel, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY)
dilation = cv2.dilate(bimg, c2, iterations=1)  ```

```erosion = cv2.erode(dilation, c1, iterations=1)
img_edge = cv2.dilate(erosion, c2, iterations=1)  ```

### 4.3 筛选文字区域

`contours, hierarchy = cv2.findContours(img_edge, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)  `

```# 记录文字区块数量
area_text_num = 0
region = []

# 根据边缘连接得到所有轮廓
contours, hierarchy = cv2.findContours(img_edge, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

for i in range(len(contours)):
cnt = contours[i]
area = cv2.contourArea(cnt)

# 筛掉面积过小的区块
if area < 1000:
continue

# 得到最小矩形区域，转换为顶点坐标形式（矩形可能会有角度）
rect = cv2.minAreaRect(cnt)
box = cv2.cv.BoxPoints(rect)
box = np.asarray(box)
box = box.astype(int)

x0 = box[0][0] if box[0][0] > 0 else 0
x1 = box[2][0] if box[2][0] > 0 else 0
y0 = box[0][1] if box[0][1] > 0 else 0
y1 = box[2][1] if box[2][1] > 0 else 0
height = abs(y0 - y1)
width = abs(x0 - x1)

# 筛掉不够“扁”的的区块，它们更有可能不是文字
if height > width * 0.3:
continue
area_text_num += height * width
region.append(box)

return region, area_text_num  ```

### 4.4 边缘调整

```for i in range(len(contours)):
cnt = contours[i]
area = cv2.contourArea(cnt)

# 筛掉面积过小的区块
if area < 1000:
continue

# 得到最小矩形区域，转换为顶点坐标形式（矩形可能会有角度）
rect = cv2.minAreaRect(cnt)
box = cv2.cv.BoxPoints(rect)
box = np.asarray(box)
box = box.astype(int)

# 过滤掉过于模糊的区块
lap = cv2.Laplacian(gray[box[1][1]:box[0][1], box[0][0]:box[3][0]], cv2.CV_64F)
if lap is None or lap.var() < TEXT_LAPLACIAN_THRESHOLD:
continue

# Code...```

## 5. 尾声

