A method for calculating the weighted average threshold as the criterion for opt

原创

Swing Dunn

发布于 2025-10-21 11:00:45

600

文章被收录于专栏：Some studies in imgsSome studies in imgs

If only using the morphological method, it may result in differences between the final judged image and the original image, failing to take into account the influence of filling details. By using weighted averaging to select a threshold, more factors can be considered to affect the result.

The selection of the weighted average threshold takes into account the differences in foreground and background color between filled and unfilled blocks, as well as the direct transition from one option to another.

1.By using the previous method, we have managed to determine the exact boundaries of the filled-in area as much as possible.

    ori_img = cv2.imread(img_path, cv2.IMREAD_ANYCOLOR)
    display_img = ori_img.copy()

    gray = cv2.cvtColor(ori_img,cv2.COLOR_RGB2GRAY)
    display_gray = gray.copy()

    valid_filling_ranges = target_areas(ori_img)  


    # 二值化处理，使用OTSU自动阈值
    g_threshold, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    #img_show(binary)
    #img_show(binary)

    for i in range(len(valid_filling_ranges)):
        lt_x = valid_filling_ranges[i][0]
        lt_y = valid_filling_ranges[i][1]
        rb_x = valid_filling_ranges[i][2]
        rb_y = valid_filling_ranges[i][3]
        cv2.rectangle(display_img,(lt_x,lt_y), (rb_x,rb_y), (0,0,255),2)

2.Perform region connectivity on the binary images within the target area (eliminating voids caused by uneven filling, etc.)

In the binary image of the target area, contour searching is conducted. For regions whose area is less than 4, the gray level is reduced.

  contours, hierarchy = cv2.findContours(opt_rect_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        # 遍历轮廓
        for i in range(len(contours)):
            area = cv2.contourArea(contours[i])
            if area < 4:
                cv2.drawContours(opt_rect_bin, [contours[i]], -1, (0), -1)
        #img_show(opt_rect_bin)

3.Count the total number of black pixels and the total number of white pixels in the binary image within the statistical area

Based on the statistical results, calculate the proportion of black pixels within the calculation options as a reference for the foreground color; calculate the proportion of white pixels within the calculation options as a reference for the background color.

total_pixels_count = bin_opt_rect.width * bin_opt_rect.height

rate_foreground (rf) = count_black_pixels / total_pixels_count

rate_background (rb) = count_white_pixels / total_pixels_count

        result_arr = np.where(opt_rect_bin > 0, 1 , 0)
        count_white_pixels = np.sum(result_arr)
        count_black_pixels = result_arr.size - count_white_pixels
        
        rf_bg_rate = count_black_pixels / result_arr.size
        rb_bg_rate = count_white_pixels / result_arr.size

        print('黑色像素占比：', rf_bg_rate)
        print('白色像素占比：', rb_bg_rate)

gray_f_value = int(rf_bg_rate * 255)
        gray_b_value = int(rb_bg_rate * 255)

        rf_view = np.full_like(opt_rect_bin, gray_f_value)
        #img_show(rf_view)
        #display_gray[lt_y: rb_y, lt_x : rb_x] = rf_view      
        #cv2.putText(display_gray,str(gray_f_value),(lt_x,lt_y),font,0.3,(0,0,255),1,cv2.LINE_AA)
        
        rb_view = np.full_like(opt_rect_bin, gray_b_value)
        #img_show(rb_view)
        # display_gray[lt_y: rb_y, lt_x : rb_x] = rb_view
        # cv2.putText(display_gray,str(gray_b_value),(lt_x,lt_y),font,0.3,(0,0,255),1,cv2.LINE_AA)

Visualization result of foreground color reference value:

The division of the numerical value by 255 reflects the proportion of black(filling) pixels.

Visualization result of background color reference value:

The division of the numerical value by 255 reflects the proportion of white(un-filling) pixels.

4.Based on the results of the above statistics, the average pixel value within the target area was calculated through weighted computation.

Reference from the paper:

5.Calculate the average gray level threshold of the target area according to the formula:

gwm_mask_arr = np.where(opt_rect_gray >=  gray_f_value, 0.0, np.where(opt_rect_gray >=gray_b_value, 1- (opt_rect_gray-gray_b_value) / (gray_f_value - gray_b_value),1))

gmw_pixel_arr = opt_rect_gray * gwm_mask_arr

gmw_pixel_value = int(np.sum(gmw_pixel_arr) / np.sum(gwm_mask_arr))

print('加权平均阈值：', gmw_pixel_value)

The visualized result of the threshold:

6.Calculate the proportion rwf of pixels whose values are lower than the weighted average pixel value gmw.

gwm_arvage_view = np.full_like(opt_rect_bin, gmw_pixel_value)
        display_gray[lt_y: rb_y, lt_x : rb_x] = gwm_arvage_view  
        cv2.putText(display_gray,str(gmw_pixel_value),(lt_x,lt_y),font,0.3,(0,0,255),1,cv2.LINE_AA)    

        gwm_result_arr = np.where(opt_rect_gray <= gmw_pixel_value, 1 , 0)
        count_gwmite_pixels = np.sum(gwm_result_arr)
        gmw_rate = count_gwmite_pixels / opt_rect_gray.size
        
        print('低于加权阈值点像素占比：', gmw_rate)
        #print(gmw_rate)
        gwm_value =  gmw_rate * 255
        #print(gwm_value)
        # gwm_view = np.full_like(opt_rect_bin, gwm_value)
        # display_gray[lt_y: rb_y, lt_x : rb_x] = gwm_view

The visualization results of rwf:

Sorce code:

import cv2
import sys
import numpy as np
import matplotlib.pyplot as plt

img_path = './img7/3.jpg'
template_block_pos = [(30,11), (76,11), (124, 11), (172, 11),
                      (30,41), (76,41), (124, 41), (172, 41),
                      (30,68), (76,68), (124, 68), (172, 68),
                      (30,97), (76,97), (124, 97), (172, 97),
                      (30,126), (76,126), (124, 126), (172, 126)]
template_block_size =(28,17)

font = cv2.FONT_HERSHEY_SIMPLEX

def img_show(img):
    cv2.namedWindow("default", cv2.WINDOW_FREERATIO)
    cv2.imshow("default", img)
    cv2.waitKey(0)
    cv2.destroyWindow("default")

def target_areas(ori_img):
   
    display_img = ori_img.copy()

    gray = cv2.cvtColor(ori_img, cv2.COLOR_BGR2GRAY)
    result = []
    #img_show(gray)

    for i in range(len(template_block_pos)):
        temp_block_ltpt = template_block_pos[i]

        ex_block_ltpt = (temp_block_ltpt[0] - 10, temp_block_ltpt[1] - 5)
        ex_block_size = template_block_size[0] + 20, template_block_size[1] + 10
        ex_block_rbpt = ex_block_ltpt[0] + ex_block_size[0], ex_block_ltpt[1] + ex_block_size[1]

        window_img = gray[ex_block_ltpt[1]: ex_block_rbpt[1], ex_block_ltpt[0] :ex_block_rbpt[0]]
        display_window_img  = cv2.cvtColor(window_img, cv2.COLOR_GRAY2BGR)
        #img_show(window_img)

        block_width = template_block_size[0]
        block_height = template_block_size[1]

        max_sumpixels = sys.maxsize
        h_targer_start = 0
        for col in range(0, ex_block_size[0] - block_width):
            h_target_img = window_img[: , col : col + block_width]
            sum_pixls = np.sum(h_target_img)
            if sum_pixls < max_sumpixels:
                max_sumpixels = sum_pixls
                h_targer_start = col

        cv2.rectangle(display_window_img,(h_targer_start,0) ,(h_targer_start + block_width, ex_block_size[1]), (0,255,255),2)
        display_img[ex_block_ltpt[1]: ex_block_rbpt[1],ex_block_ltpt[0]: ex_block_rbpt[0]] = display_window_img            
        #img_show(display_window_img)

        max_sumpixels = sys.maxsize
        v_target_start = 0
        for row in range(0, ex_block_size[1] - block_height):
            v_target_img = window_img[row : row + block_height, h_targer_start : h_targer_start + block_width]
            sum_pixls = np.sum(v_target_img)
            if sum_pixls < max_sumpixels:
                max_sumpixels = sum_pixls
                v_target_start = row

        cv2.rectangle(display_window_img,(h_targer_start,v_target_start) ,(h_targer_start + block_width, v_target_start + block_height), (255,0,0),1)
        #img_show(display_window_img)
        display_img[ex_block_ltpt[1]: ex_block_rbpt[1],ex_block_ltpt[0]: ex_block_rbpt[0]] = display_window_img

        rough_valid_filling_area = window_img[v_target_start : v_target_start + block_height , h_targer_start : h_targer_start + block_width]
       #img_show(rough_valid_filling_area)

        gray_average = np.sum(rough_valid_filling_area) / rough_valid_filling_area.size

        #边缘上的空白边框宽度
        l_scaled_level = 0
        t_scaled_level = 0
        r_scaled_level = 0
        b_scaled_level = 0
        #去除左边界的空白边缘
        for i in  range(0,2):
            target_range_height = rough_valid_filling_area.shape[0]
            target_range_width = rough_valid_filling_area.shape[1]
            l_scaled_img = rough_valid_filling_area[0: target_range_height, 1: target_range_width]
            l_scaled_img_average = np.sum(l_scaled_img) / l_scaled_img.size
            if(l_scaled_img_average < gray_average):
                gray_average = l_scaled_img_average
                rough_valid_filling_area = l_scaled_img.copy()
                l_scaled_level = i

        #去除上边界的空白边缘
        for i in  range(0,2):
            target_range_height = rough_valid_filling_area.shape[0]
            target_range_width = rough_valid_filling_area.shape[1]
            l_scaled_img = rough_valid_filling_area[1: target_range_height, 0: target_range_width]
            l_scaled_img_average = np.sum(l_scaled_img) / l_scaled_img.size
            if(l_scaled_img_average < gray_average):
                gray_average = l_scaled_img_average
                rough_valid_filling_area = l_scaled_img.copy()
                t_scaled_level = i

        #去除右边界的空白边缘
        for i in  range(0,2):
            target_range_height = rough_valid_filling_area.shape[0]
            target_range_width = rough_valid_filling_area.shape[1]
            l_scaled_img = rough_valid_filling_area[0: target_range_height, 0: target_range_width - 1]
            l_scaled_img_average = np.sum(l_scaled_img) / l_scaled_img.size
            if(l_scaled_img_average < gray_average):
                gray_average = l_scaled_img_average
                rough_valid_filling_area = l_scaled_img.copy()
                r_scaled_level = i

        #去除下边界的空白边缘
        for i in  range(0,2):
            target_range_height = rough_valid_filling_area.shape[0]
            target_range_width = rough_valid_filling_area.shape[1]
            l_scaled_img = rough_valid_filling_area[0: target_range_height - 1, 0: target_range_width]
            l_scaled_img_average = np.sum(l_scaled_img) / l_scaled_img.size
            if(l_scaled_img_average < gray_average):
                gray_average = l_scaled_img_average
                rough_valid_filling_area = l_scaled_img.copy()
                b_scaled_level = i
                
        final_start_x = h_targer_start + l_scaled_level
        final_start_y = v_target_start + t_scaled_level

        final_end_x = h_targer_start + block_width - r_scaled_level
        finale_end_y = v_target_start + block_height - b_scaled_level
        # cv2.rectangle(display_window_img,(final_start_x,final_start_y) ,(final_end_x, finale_end_y), (0,0,255),1)
        # display_img[ex_block_ltpt[1]: ex_block_rbpt[1],ex_block_ltpt[0]: ex_block_rbpt[0]] = display_window_img
        #img_show(display_window_img)
        #cv2.rectangle(ori_img,(ex_block_ltpt[0] +final_start_x,ex_block_ltpt[1] + final_start_y) ,(ex_block_ltpt[0] + final_end_x, ex_block_ltpt[1]+ finale_end_y), (0,0,255),1)

        result.append((ex_block_ltpt[0] + final_start_x,ex_block_ltpt[1] + final_start_y, ex_block_ltpt[0] + final_end_x,ex_block_ltpt[1] + finale_end_y))
    #img_show(ori_img)
    return result

if __name__ == '__main__':
    ori_img = cv2.imread(img_path, cv2.IMREAD_ANYCOLOR)
    display_img = ori_img.copy()

    gray = cv2.cvtColor(ori_img,cv2.COLOR_RGB2GRAY)
    display_gray = gray.copy()

    valid_filling_ranges = target_areas(ori_img)  


    # 二值化处理，使用OTSU自动阈值
    g_threshold, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    #img_show(binary)
    #img_show(binary)

    for i in range(len(valid_filling_ranges)):
        lt_x = valid_filling_ranges[i][0]
        lt_y = valid_filling_ranges[i][1]
        rb_x = valid_filling_ranges[i][2]
        rb_y = valid_filling_ranges[i][3]
        cv2.rectangle(display_img,(lt_x,lt_y), (rb_x,rb_y), (0,0,255),2)

    
        opt_rect_bin = binary[lt_y : rb_y, lt_x : rb_x]
        opt_rect_gray = gray[lt_y : rb_y, lt_x : rb_x]
        #print(opt_rect_bin)
        #img_show(opt_rect_gray)
     
        contours, hierarchy = cv2.findContours(opt_rect_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        # 遍历轮廓
        for i in range(len(contours)):
            area = cv2.contourArea(contours[i])
            if area < 4:
                cv2.drawContours(opt_rect_bin, [contours[i]], -1, (0), -1)
        #img_show(opt_rect_bin)

        result_arr = np.where(opt_rect_bin > 0, 1 , 0)
        count_white_pixels = np.sum(result_arr)
        count_black_pixels = result_arr.size - count_white_pixels
        
        rf_bg_rate = count_black_pixels / result_arr.size
        rb_bg_rate = count_white_pixels / result_arr.size

        print('黑色像素占比：', rf_bg_rate)
        print('白色像素占比：', rb_bg_rate)

        gray_f_value = int(rf_bg_rate * 255)
        gray_b_value = int(rb_bg_rate * 255)

        rf_view = np.full_like(opt_rect_bin, gray_f_value)
        #img_show(rf_view)
        # display_gray[lt_y: rb_y, lt_x : rb_x] = rf_view      
        # cv2.putText(display_gray,str(gray_f_value),(lt_x,lt_y),font,0.3,(0,0,255),1,cv2.LINE_AA)
        
        rb_view = np.full_like(opt_rect_bin, gray_b_value)
        #img_show(rb_view)
        #display_gray[lt_y: rb_y, lt_x : rb_x] = rb_view
        #cv2.putText(display_gray,str(gray_b_value),(lt_x,lt_y),font,0.3,(0,0,255),1,cv2.LINE_AA)

        gwm_mask_arr = np.where(opt_rect_gray >=  gray_f_value, 0.0, np.where(opt_rect_gray >=gray_b_value, 1- (opt_rect_gray-gray_b_value) / (gray_f_value - gray_b_value),1))

        gmw_pixel_arr = opt_rect_gray * gwm_mask_arr

        gmw_pixel_value = int(np.sum(gmw_pixel_arr) / np.sum(gwm_mask_arr))

        print('加权平均阈值：', gmw_pixel_value)

        gwm_arvage_view = np.full_like(opt_rect_bin, gmw_pixel_value)
        # display_gray[lt_y: rb_y, lt_x : rb_x] = gwm_arvage_view  
        # cv2.putText(display_gray,str(gmw_pixel_value),(lt_x,lt_y),font,0.3,(0,0,255),1,cv2.LINE_AA)    

        gwm_result_arr = np.where(opt_rect_gray <= gmw_pixel_value, 1 , 0)
        count_gwmite_pixels = np.sum(gwm_result_arr)
        gmw_rate = count_gwmite_pixels / opt_rect_gray.size
        
        print('低于加权阈值点像素占比：', gmw_rate)
        #print(gmw_rate)
        gwm_value =  gmw_rate * 255
        #print(gwm_value)
        gwm_view = np.full_like(opt_rect_bin, gwm_value)
        display_gray[lt_y: rb_y, lt_x : rb_x] = gwm_view 

        # _,gmw_bin = cv2.threshold(gray, gmw_pixel_value, 255, cv2.THRESH_BINARY)
        # img_show(gmw_bin)

    img_show(display_gray)

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。

image-recognition