# Python实践 | 亿级经纬度距离计算工具V2

```1import pandas as pd
2import numpy as np
3from math import radians, cos, sin, asin, sqrt, ceil
4import math
5import time```

```1def geodistance(lng1,lat1,lng2,lat2):
2    lng1, lat1, lng2, lat2 = map(radians, [float(lng1), float(lat1), float(lng2), float(lat2)])
3    # 经纬度转换成弧度
4    dlon=lng2-lng1
5    dlat=lat2-lat1
6    a=sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
7    distance=2*asin(sqrt(a))*6371*1000 # 地球平均半径，6371km
8    distance=round(distance,0)
9    return distance```

`pandas`分别导入源表和目标表，两个表关联得到原点与目标点的所有配对

```1file_name = r'D:\python\geo\sTable.csv'
3file_name2 = r'D:\python\geo\tTable.csv'
5m = pd.concat([pd.concat([df1]*len(df2)).sort_index().reset_index(drop=True),
6               pd.concat([df2]*len(df1)).reset_index(drop=True) ], 1)```

```    x = m[abs(m.lon-m.lon2) < diff_lon]
n = x[abs(x.lat-x.lat2) < diff_lat]```

```    nn = n.copy()
nn['distance'] = nn.apply(lambda ser: geodistance(ser['lon'], ser['lat'], ser['lon2'], ser['lat2']), axis=1)```

`1distance = distance.append(nn[nn.distance <= minx_mile])`

`1pieces = ceil(count_a * count_b / 10000000)   # 计算量上限为1000万`

`1linesPerFile = ceil(count_a / pieces)+1`

``` 1filecount = 1
2# 以0为起点，文件行数为终点，分片大小为间隔，循环遍历文件，每次遍历行数即为分片大小，而不是每行遍历一次，处理效率极高，但是比较吃内存
3for i in range(0, len(csv_file), linesPerFile):
4    # 打开目标文件准备写入，不存在则创建
5    with open(file_name[:-4] + '_' + str(filecount) + '.csv', 'w+') as f:
6        # 判断是否为第一个文件，不是的话需要先写入标题行
7        if filecount > 1:
8            f.write(csv_file[0])
9        # 批量写入i至i+分片大小的多行数据，效率极高
10        f.writelines(csv_file[i:i+linesPerFile])
11    # 完成一个文件写入之后，文件编号增加1
12    filecount += 1```

```distance = pd.DataFrame(columns=('name','lon','lat','name2', 'lon2', 'lat2', 'distance'))
for i in range(1, filecount):
df_temp = pd.read_csv(file_name[:-4] + '_' + str(i) + '.csv')
m = pd.concat([pd.concat([df_temp]*len(df2)).sort_index().reset_index(drop=True),
pd.concat([df2]*len(df_temp)).reset_index(drop=True)], 1)
# 避免链式赋值
x = m[abs(m.lon-m.lon2) < diff_lon]
n = x[abs(x.lat-x.lat2) < diff_lat]
nn = n.copy()
nn['distance'] = nn.apply(lambda ser: geodistance(ser['lon'], ser['lat'], ser['lon2'], ser['lat2']), axis=1)
distance = distance.append(nn[nn.distance <= minx_mile])
distance.to_csv('D:/python/geo/distance_result.csv')```

