爬取地理坐标

爬取GPSspg查询网

import requests, re
from urllib import parse
def query(region):
    header = {'User-Agent': 'Opera/8.0 (Windows NT 5.1; U; en)'}
    url = 'http://apis.map.qq.com/jsapi?'
    data = {
        'qt': 'poi',
        'wd': region,
        'pn': 0,
        'rn': 10,
        'rich_source': 'qipao',
        'rich': 'web',
        'nj': 0,
        'c': 1,
        'key': 'FBOBZ-VODWU-C7SVF-B2BDI-UK3JE-YBFUS',
        'output': 'jsonp',
        'pf': 'jsapi',
        'ref': 'jsapi',
        'cb': 'qq.maps._svcb3.search_service_0'}
    coordinate_url = url + parse.urlencode(data)
    r = requests.get(coordinate_url, headers=header)
    longitude = re.findall('"pointx":\s*"(.+?)"', r.text)[0]
    latitude = re.findall('"pointy":\s*"(.+?)"', r.text)[0]
    print([region, longitude, latitude])
query('佛山南海')
# [‘佛山南海’, ‘113.142780’, ‘23.028820’]

爬取百度地图拾取坐标系统

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re, pandas as pd
def coordinate(site):
    # 创建浏览器驱动对象
    driver = webdriver.Firefox()
    driver.get('http://api.map.baidu.com/lbsapi/getpoint/index.html')
    # 显式等待,设置timeout
    wait = WebDriverWait(driver, 9)
    # 判断输入框是否加载
    input = wait.until(
        EC.presence_of_element_located(
            (By.CSS_SELECTOR, '#localvalue')))
    # 判断搜索按钮是否加载
    submit = wait.until(
        EC.element_to_be_clickable(
            (By.CSS_SELECTOR, '#localsearch')))
    # 输入搜索词,点击搜索按钮
    input.clear()
    input.send_keys(site)
    submit.click()
    # 等待坐标
    wait.until(
        EC.presence_of_element_located(
            (By.CSS_SELECTOR, '#no_0')))
    # 获取网页文本,提取经纬度
    source = driver.page_source
    xy = re.findall('坐标:([\d.]+),([\d.]+)', source)
    # 转浮点数,取中位数
    df = pd.DataFrame(xy, columns=['longitude', 'latitude'])
    df['longitude'] = pd.to_numeric(df['longitude'])
    df['latitude'] = pd.to_numeric(df['latitude'])
    longitude = df['longitude'].median()
    latitude = df['latitude'].median()
    # 关闭浏览器驱动
    driver.close()
    return [longitude, latitude]
print(coordinate('南海桂城地铁站'))
# [113.1611575, 23.044811000000003]

在使用 webdriver 的时候,需要将下载的驱动器加载到 PATH 环境变量中,另一种方案:将驱动器放到当前脚本目录下。

参考

Update time: 2020-07-19

results matching ""

    No results matching ""