我想以下面这张从网上下载的中国地图为基础,来DIY地图可视化功能,即能根据各省(含中国台湾,各直辖市)的某种数据以相应的颜色来显示地图上的各个区域。
首先,以颜色为特征,利用机器学习库sklearn中的KMeans算法可以将地图分成7个区域。
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import pickle
img = plt.imread("chinamap1.jpg")
rows, columns, chanels = img.shape
flattened = img.reshape((-1, 3))
k = 7 #分为7类 (省份5中颜色+字体+背景)
k_means = KMeans(init='k-means++', n_clusters=k,max_iter=300, n_init=10)
t0 = time.time()
k_means.fit(flattened)
t_batch = time.time() - t0
groups = []
for i in range(k):
output = np.zeros((rows*columns))
output[np.where(np.equal(k_means.labels_, i))[0]] = 1
output = output.reshape((rows, columns))
groups.append(output)
with open('text.txt', 'wb') as file:
pickle.dump(groups, file)
我们发现通常一种颜色都由好几个省市共享。如下图中的8个省市就共享绿色。注意,下图中各个子图均有放大和平移。
我们可以用上篇介绍的连通域算法来分割图像。如下面的代码就将8个在原图中显示绿色的省市在图像上分割开来。
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 15 10:15:18 2019
@author: wangsp
"""
import numpy as np
from matplotlib import pyplot as plt
import pickle
with open('text.txt', 'rb') as file:
groups = pickle.load(file)
print(len(groups))
img=groups[1]
#plt.imshow(img)
#plt.show()
def connectedDomain(img):
h,w = img.shape
# 4-connected domain
domain =dict()
label = -1 # 用于标记连通域ID
#i==0 # 第一行
flag = True #上一个像素是否为0?
for j in range(w):
if img[0,j]==1:
if flag: #上一个元素是0则新建连通域
label += 1
domain[label] = set()
domain[label].add((0,j))
flag = False
else:
flag = True
for i in range(1,h):#第1行外的其它行
flag = True
for j in range(w):
if img[i,j] == 1:
for key in domain:
if (i-1,j) in domain[key]:#上一行对应元素是否属于某个连通域?
domain[key].add((i,j))
if not flag:#左边的元素为1
if last_label != key:
domain[key]=domain[key].union(domain[last_label]) #合并
del domain[last_label]
last_label = key
break
else:#不属于上行已有的连通域
if flag:
label += 1
domain[label] = set()
domain[label].add((i,j))
last_label = label
else:
domain[last_label].add((i,j))
flag = False
else:
flag = True
return domain
domain = connectedDomain(img)
print()
print(sum(img))
print(sum([ len(value) for key, value in domain.items()]))# 元素1的个数
print()
#按面积排序
descended = sorted(domain.items(),key = lambda x: len(x[1]),reverse=True)
i =1
for item in descended[:8]: #最大的3块
z = np.zeros((img.shape))
for x,y in item[1]:
z[x,y] =1
plt.subplot(2,4,i)
plt.imshow(z)
i += 1
plt.show()
[西藏,黑龙江,甘肃,河南,重庆,海南,中国台湾,北京] = [descended[i][1] for i in range(8)]
strings = "西藏","黑龙江","甘肃","河南","重庆","海南","中国台湾","北京"
for string,obj in zip(strings,[西藏,黑龙江,甘肃,河南,重庆,海南,中国台湾,北京]):
with open('%s.db'%string, 'wb') as file:
pickle.dump(obj, file)
如此,我们便将各个省市的像素坐标都保存到了文件:
最后,就可以实现地图数据的可视化了。
加载像素坐标数据:
"""
Created on Mon Dec 16 22:49:10 2019
@author: Administrator
"""
import os
import pickle
path = r"E:\Python36\MyPythonFiles\MyPyQt5\中国地图\provinces"
dic = dict()
regions =[]
for x in os.listdir(path):
p = os.path.splitext(x)[0]
regions.append(p)
fullpath = os.path.join(path,x)
with open(fullpath, 'rb') as file:
dic[p] = pickle.load(file)
regions.remove("背景")
print(regions)
随机生成各省市的数据(可添加真实数据),并将数据映射到RGB颜色:
from random import random
data = dict()
for region in regions:
data[region]= random() #随机生成数据
#注:这里可以加载各省市 口,收入中位数等真实数据
#data["背景"]=1
def x2RGB(x,LSL=0, USL=1.0):
r=(x-LSL)/(USL-LSL)
if r>1:
return (255, 255, 255)
elif r>=0.75:
return (255, int(255*(1-r)*4), 0)
elif r>=0.5:
return (int(255*(r-0.5)*4), 255, 0)
elif r>=0.25:
return (0, 255, int(255*(0.5-r)*4))
elif r>=0:
return (0, int(255*r*4), 255)
else:
return (0,0,0)
from matplotlib import pyplot as plt
import numpy as np
img0 = plt.imread("chinamap1.jpg")
rows, columns, chanels = img0.shape
img = np.zeros((rows,columns,3),dtype =np.uint8)
for region in regions:
rgb = x2RGB(data[region])
for x,y in dic[region]:
img[x,y] = rgb
bg_rgb =(255,255,255)
for x,y in dic["背景"]:
img[x,y] = bg_rgb
可视化:
#bg = np.ones((rows,columns,3))*255
#plt.imshow(bg,zorder=0)
im = plt.imshow(img,zorder=1)
#plt.colorbar(im)
plt.show()
完成。
本文分享自 Python可视化编程机器学习OpenCV 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体分享计划 ,欢迎热爱写作的你一起参与!