ofo数据获取及基于arcpy的ArcGIS空间数据处理与分析(二)

ofo数据获取及

基于arcpy的ArcGIS空间数据处理与分析(二)

随着共享单车的不断增加以及其重要性,很多人开始通过共享单车大数据对城市生活进行研究与分析。前段时间做ofo数据分析时一直被数据所困扰,通过学习与借鉴其他学者的研究,利用python对ofo单车数据进行爬取。

相对于其他爬取程序,该程序有以下特点:

1. 本程序利用python2.7进行编写,考虑到arcpy模块使用版本问题;

2. 由于爬取的是高德地图上面的ofo定位数据,所以又将火星坐标系下的定位数据转换为WGS84坐标系下的定位数据;

3. 同时将转换后的数据导出为shp点数据,方便操作与研究。

首先呢,该ofo定位数据爬取程序是通过登录ofo网页爬取数据的,登录地址:https://common.ofo.so/newdist/?Journey。进行爬取前必须获取认证信息token,获取方式如图1所示:

图1 token获取

#!coding=utf-8

from __future__ import division

import requests

import datetime

import threading

import json

import os

import pandas as pd

import numpy as np

import time

import sqlite3

import math

import arcpy

from requests.packages.urllib3.exceptions import InsecureRequestWarning

from requests_toolbelt.multipart.encoder import MultipartEncoder

from concurrent.futures import ThreadPoolExecutor

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

class Crawler:

def __init__(self):

self.start_time = datetime.datetime.now()

self.db_name = "file:database?mode=memory&cache=shared"

self.save_path = "./data/" + datetime.datetime.now().strftime("%Y%m%d")

self.file_name = datetime.datetime.now().strftime("%Y%m%d_%H%M") + "_ofo"

self.lock = threading.Lock()

self.total = 0

self.done = 0

self.bikes_count = 0

self.x_pi = 3.14159265358979324 * 3000.0 / 180.0

self.pi = 3.1415926535897932384626 # π

self.a = 6378245.0 # 长半轴

self.ee = 0.00669342162296594323 # 偏心率平方

self.message = os.path.isdir(self.save_path)

def get_nearby_bikes(self, args):

try:

url = "https://san.ofo.so/ofo/Api/nearbyofoCar"

headers = {

'charset': "utf-8",

'Accept': '*/*',

'Accept-Encoding': 'gzip, deflate',

'Accept-Language': 'zh-CN',

'Content-Length': '516',

'Content-Type': 'multipart/form-data; boundary=----ofo-boundary-MC4wOTk1ODUy',

'Host': 'san.ofo.so',

'Origin': 'https://common.ofo.so',

'Referer': 'https://common.ofo.so/newdist/',

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393'

}

self.request(headers, args, url)

except Exception as ex:

print(ex)

def request(self, headers, args, url):

multipart_encoder = MultipartEncoder(

fields={

"token": "e923c290-0d27-11e7-b9d2-b5e857d3318f",

"source": "0",

"source-version": "9999",

# "lat": "36.103235",

"lat": str(args[0]),

# "lng":"103.709681"

"lng": str(args[1])

# file为路径

},

boundary='----ofo-boundary-MC4wOTk1ODUy'

)

response = requests.request(

"POST", url, headers=headers,

timeout=30,

verify=False,

data=multipart_encoder

)

with self.lock:

with self.connect_db() as c:

try:

decoded = json.loads(response.text)['values']['info']['cars']

self.done += 1

for x in decoded:

self.bikes_count += 1

at = self.gcj02_to_wgs84_lat(x['lng'], x['lat'])#将火星坐标转换为WGS84--纬度

on = self.gcj02_to_wgs84_lng(x['lng'], x['lat'])#将火星坐标转换为WGS84--经度

c.execute("INSERT OR IGNORE INTO ofo VALUES (%d,'%s',%f,%f)" % (

int(time.time()) * 1000, x['carno'], at, on))

timespent = datetime.datetime.now() - self.start_time

percent = self.done / self.total

total = timespent.total_seconds() / percent

print("位置 %s, 未去重单车数量 %s, 进度 %0.2f%%, 速度 %0.2f个/分钟, 总时间 %s, 剩余时间 %s" % (

args, self.bikes_count, percent * 100, self.done / timespent.total_seconds() * 60, total,

total - timespent.total_seconds()))

except Exception as ex:

print(ex)

def connect_db(self):

return sqlite3.connect(self.db_name)

def generate_create_table_sql(self, brand):

return '''CREATE TABLE

(

"bikeId" VARCHAR(12),

lat DOUBLE,

lng DOUBLE,

CONSTRAINT "_bikeId_lat_lon_pk"

PRIMARY KEY (bikeId, lat, lng)

);'''.format(brand)

#创建shp点数据

def CreateFeaturclass(self, savepath, featurename, spatial):

if arcpy.Exists(savepath + '\\' + featurename + '.shp') == False:

arcpy.CreateFeatureclass_management(savepath, featurename, 'POINT', '', '', '', spatial)

else:

pass

#添加字段

def AddField(self, savepath, featurename):

arcpy.AddField_management(savepath + '\\' + featurename + '.shp', 'bikeid', 'TEXT')

arcpy.AddField_management(savepath + '\\' + featurename + '.shp', 'lon', 'TEXT')

arcpy.AddField_management(savepath + '\\' + featurename + '.shp', 'lat', 'TEXT')

#遍历点并添加字段值

def InsertRow(self, savepath, featurename, data):

Insercur = arcpy.InsertCursor(savepath + '\\' + featurename + '.shp')

for value in range(1,len(data)+1):

point = arcpy.Point()

newrow = Insercur.newRow()

point.X = float(data.head(value)['lng'][value-1])

point.Y = float(data.head(value)['lat'][value-1])

newrow.setValue('Id', value)

newrow.setValue('bikeid',data.head(value)['bikeId'][value-1])

newrow.setValue('lon',data.head(value)['lng'][value-1])

newrow.setValue('lat',data.head(value)['lat'][value-1])

pointGeo = arcpy.PointGeometry(point)

newrow.shape = pointGeo

Insercur.insertRow(newrow)

def gcj02_to_wgs84_lng(self, lng1, lat1):

if self.out_of_china(lng1, lat1): # 判断是否在国内

return lng1, lat1

dlng = self.transformlng(lng1 - 105.0, lat1 - 35.0)

radlat = lat1 / 180.0 * self.pi

magic = math.sin(radlat)

magic = 1 - self.ee * magic * magic

sqrtmagic = math.sqrt(magic)

dlng = (dlng * 180.0) / (self.a / sqrtmagic * math.cos(radlat) * self.pi)

mglng = lng1 + dlng

return lng1 * 2 - mglng

def gcj02_to_wgs84_lat(self, lng1, lat1):

if self.out_of_china(lng1, lat1): # 判断是否在国内

return lng1, lat1

dlat = self.transformlat(lng1 - 105.0, lat1 - 35.0)

radlat = lat1 / 180.0 * self.pi

magic = math.sin(radlat)

magic = 1 - self.ee * magic * magic

sqrtmagic = math.sqrt(magic)

dlat = (dlat * 180.0) / ((self.a * (1 - self.ee)) / (magic * sqrtmagic) * self.pi)

mglat = lat1 + dlat

return lat1 * 2 - mglat

def transformlat(self, lng1, lat1):

ret = -100.0 + 2.0 * lng1 + 3.0 * lat1 + 0.2 * lat1 * lat1 + \

0.1 * lng1 * lat1 + 0.2 * math.sqrt(math.fabs(lng1))

ret += (20.0 * math.sin(6.0 * lng1 * self.pi) + 20.0 *

math.sin(2.0 * lng1 * self.pi)) * 2.0 / 3.0

ret += (20.0 * math.sin(lat1 * self.pi) + 40.0 *

math.sin(lat1 / 3.0 * self.pi)) * 2.0 / 3.0

ret += (160.0 * math.sin(lat1 / 12.0 * self.pi) + 320 *

math.sin(lat1 * self.pi / 30.0)) * 2.0 / 3.0

return ret

def transformlng(self, lng1, lat1):

ret = 300.0 + lng1 + 2.0 * lat1 + 0.1 * lng1 * lng1 + \

0.1 * lng1 * lat1 + 0.1 * math.sqrt(math.fabs(lng1))

ret += (20.0 * math.sin(6.0 * lng1 * self.pi) + 20.0 *

math.sin(2.0 * lng1 * self.pi)) * 2.0 / 3.0

ret += (20.0 * math.sin(lng1 * self.pi) + 40.0 *

math.sin(lng1 / 3.0 * self.pi)) * 2.0 / 3.0

ret += (150.0 * math.sin(lng1 / 12.0 * self.pi) + 300.0 *

math.sin(lng1 / 30.0 * self.pi)) * 2.0 / 3.0

return ret

def out_of_china(self, lng1, lat1):

return not (lng1 > 73.66 and lng1 3.86 and lat1

def group_data(self):

print("正在导出数据")

conn = self.connect_db()

self.export_to_shp(conn, "ofo")

def export_to_shp(self, conn, brand):

spRef = arcpy.SpatialReference(4326)

df = pd.read_sql_query("SELECT * FROM %s" % brand, conn, parse_dates=True)

self.CreateFeaturclass(self.save_path, self.file_name, spRef)

self.AddField(self.save_path, self.file_name)

self.InsertRow(self.save_path, self.file_name, df)

print(brand)

print ("去重后数量")

print (len(df))

def start(self, top_lng, top_lat, bottom_lng, bottom_lat):

while True:

self.__init__()

if self.message == False:

os.makedirs(self.save_path)#创建路径

try:

with self.connect_db() as c:

c.execute(self.generate_create_table_sql('ofo'))

except Exception as ex:

print(ex)

pass

executor = ThreadPoolExecutor(max_workers=100)

print("Start")

self.total = 0

offset = 0.002

lat_range = np.arange(float(top_lat), float(bottom_lat), -offset)

for lat in lat_range:

lng_range = np.arange(float(top_lng), float(bottom_lng), offset)

for lon in lng_range:

self.total += 1

executor.submit(self.get_nearby_bikes, (lat, lon))

executor.shutdown()

self.group_data()

#是否继续运行

always_run = False

if not always_run:

break

waittime = 1

print("等待%s分钟后继续运行" % waittime)

time.sleep(waittime * 60)

if __name__ == '__main__':

c = Crawler()

c.start(103.686592, 36.114191, 103.741781, 36.091515)#爬取范围(左上角,右下角经纬度)

print("完成")

最后将导出为shp数据在ArcGIS中展示如图2和图3所示:

图2 定位数据

图3 属性数据

本文基于arcpy的ArcGIS空间数据处理与分析主要体现在shp点数据的创建、属性数据的添加、点数据的遍历。详细代码见CreateFeaturclass、AddField、InsertRow函数。

参考文献

https://github.com/SilverBooker/ofoSpider

本文仅供参考学习,有不到之处,望大家谅解。

GISWareLab

投稿:丁双龙

编辑:许凯峰

指导:刘涛教授

  • 发表于:
  • 原文链接https://kuaibao.qq.com/s/20180823G18SQ600?refer=cp_1026
  • 腾讯「云+社区」是腾讯内容开放平台帐号(企鹅号)传播渠道之一,根据《腾讯内容开放平台服务协议》转载发布内容。

扫码关注云+社区

领取腾讯云代金券