爬取地理坐标
爬取GPSspg查询网
import requests, re
from urllib import parse
def query(region):
header = {'User-Agent': 'Opera/8.0 (Windows NT 5.1; U; en)'}
url = 'http://apis.map.qq.com/jsapi?'
data = {
'qt': 'poi',
'wd': region,
'pn': 0,
'rn': 10,
'rich_source': 'qipao',
'rich': 'web',
'nj': 0,
'c': 1,
'key': 'FBOBZ-VODWU-C7SVF-B2BDI-UK3JE-YBFUS',
'output': 'jsonp',
'pf': 'jsapi',
'ref': 'jsapi',
'cb': 'qq.maps._svcb3.search_service_0'}
coordinate_url = url + parse.urlencode(data)
r = requests.get(coordinate_url, headers=header)
longitude = re.findall('"pointx":\s*"(.+?)"', r.text)[0]
latitude = re.findall('"pointy":\s*"(.+?)"', r.text)[0]
print([region, longitude, latitude])
query('佛山南海')
# [‘佛山南海’, ‘113.142780’, ‘23.028820’]
爬取百度地图拾取坐标系统
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re, pandas as pd
def coordinate(site):
# 创建浏览器驱动对象
driver = webdriver.Firefox()
driver.get('http://api.map.baidu.com/lbsapi/getpoint/index.html')
# 显式等待,设置timeout
wait = WebDriverWait(driver, 9)
# 判断输入框是否加载
input = wait.until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, '#localvalue')))
# 判断搜索按钮是否加载
submit = wait.until(
EC.element_to_be_clickable(
(By.CSS_SELECTOR, '#localsearch')))
# 输入搜索词,点击搜索按钮
input.clear()
input.send_keys(site)
submit.click()
# 等待坐标
wait.until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, '#no_0')))
# 获取网页文本,提取经纬度
source = driver.page_source
xy = re.findall('坐标:([\d.]+),([\d.]+)', source)
# 转浮点数,取中位数
df = pd.DataFrame(xy, columns=['longitude', 'latitude'])
df['longitude'] = pd.to_numeric(df['longitude'])
df['latitude'] = pd.to_numeric(df['latitude'])
longitude = df['longitude'].median()
latitude = df['latitude'].median()
# 关闭浏览器驱动
driver.close()
return [longitude, latitude]
print(coordinate('南海桂城地铁站'))
# [113.1611575, 23.044811000000003]
在使用 webdriver
的时候,需要将下载的驱动器加载到 PATH
环境变量中,另一种方案:将驱动器放到当前脚本目录下。