全國315個城市,用python爬取肯德基老爺爺的店面資訊

 

 

我覺得我生活在這世上二十多年裡,去過最多的餐廳就是肯德基小時候逢生日必去,現在長大了,肯德基成了我的日常零食下班後從門前路過餓了便會進去點分黃金雞塊或者小吃拼盤早上路過,會買杯咖啡。主要快捷美味且飽腹,而且到處都是總是會路過,現在只要一餓,心心念念便是肯德基的味道

環境介紹

python 3.6

pycharm

requests

csv

爬蟲的一般思路

1、確定爬取的url路徑,headers參數

2、發送請求 — requests 模擬瀏覽器發送請求,獲取響應數據

3、解析數據

4、保存數據

步驟

1、確定爬取的url路徑,headers參數

先爬取北京的數據

 

 

base_url = '//www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'}
data = {
    'cname': '',
    'pid': '',
    'keyword': '北京',
    'pageIndex': '1',
    'pageSize': '10',
}

 

2、發送請求 — requests 模擬瀏覽器發送請求,獲取響應數據

response = requests.post(url=base_url, headers=headers, data=data)
json_data = response.json()
# pprint.pprint(json_data)

 

3、解析數據

 

 

 

data_list = json_data['Table1']
# pprint.pprint(data_list)
# 構建循環,解析數據欄位
for ls in data_list:
    storeName = ls['storeName'] + '餐廳'  # 餐廳名稱
    cityName = ls['cityName']  # 餐廳城市
    addressDetail = ls['addressDetail']  # 餐廳地址
    pro = ls['pro']  # 餐廳詳情
    # print(storeName, cityName, addressDetail, pro)

 

4、保存數據

 

 
 print('正在爬取:', storeName)
    with open('data.csv', 'a', newline='') as csvfile:  # newline=''  指定一行一行寫入
        csvwriter = csv.writer(csvfile, delimiter=',')  # delimiter=','  csv數據的分隔符
        csvwriter.writerow([storeName, cityName, addressDetail, pro])  # 序列化數據,寫入csv

 

5、全國315個城市的數據

獲取拉勾網315個城市的數據

 

 

 

 

# coding:utf-8
import requests
import csv
import time
import random

ip = [{'HTTP': '1.199.31.213:9999'}, {'HTTP': '182.46.197.33:9999'}, {'HTTP': '58.18.133.101:56210'},
      {'HTTP': '175.44.108.123:9999'}, {'HTTP': '123.52.97.90:9999'}, {'HTTP': '182.92.233.137:8118'},
      {'HTTP': '223.242.225.42:9999'}, {'HTTP': '113.194.28.84:9999'}, {'HTTP': '113.194.30.115:9999'},
      {'HTTP': '113.195.19.41:9999'}, {'HTTP': '144.123.69.123:9999'}, {'HTTP': '27.192.168.202:9000'},
      {'HTTP': '163.204.244.179:9999'}, {'HTTP': '112.84.53.197:9999'}, {'HTTP': '117.69.13.69:9999'},
      {'HTTP': '1.197.203.214:9999'}, {'HTTP': '125.108.111.22:9000'}, {'HTTP': '171.35.169.69:9999'},
      {'HTTP': '171.15.173.234:9999'}, {'HTTP': '171.13.103.52:9999'}, {'HTTP': '183.166.97.201:9999'},
      {'HTTP': '60.2.44.182:44990'}, {'HTTP': '58.253.158.21:9999'}, {'HTTP': '47.94.89.87:3128'},
      {'HTTP': '60.13.42.235:9999'}, {'HTTP': '60.216.101.46:32868'}, {'HTTP': '117.90.137.91:9000'},
      {'HTTP': '123.169.164.163:9999'}, {'HTTP': '123.169.162.230:9999'}, {'HTTP': '125.108.119.189:9000'},
      {'HTTP': '163.204.246.68:9999'}, {'HTTP': '223.100.166.3:36945'}, {'HTTP': '113.195.18.134:9999'},
      {'HTTP': '163.204.245.50:9999'}, {'HTTP': '125.108.79.50:9000'}, {'HTTP': '163.125.220.205:8118'},
      {'HTTP': '1.198.73.246:9999'}, {'HTTP': '175.44.109.51:9999'}, {'HTTP': '121.232.194.47:9000'},
      {'HTTP': '113.194.30.27:9999'}, {'HTTP': '129.28.183.30:8118'}, {'HTTP': '123.169.165.73:9999'},
      {'HTTP': '120.83.99.190:9999'}, {'HTTP': '175.42.128.48:9999'}, {'HTTP': '123.101.212.223:9999'},
      {'HTTP': '60.190.250.120:8080'}, {'HTTP': '125.94.44.129:1080'}, {'HTTP': '118.112.195.91:9999'},
      {'HTTP': '110.243.5.163:9999'}, {'HTTP': '118.89.91.108:8888'}, {'HTTP': '125.122.199.13:9000'},
      {'HTTP': '171.11.28.248:9999'}, {'HTTP': '211.152.33.24:39406'}, {'HTTP': '59.62.35.130:9000'},
      {'HTTP': '123.163.96.124:9999'}]


def get_page(keyword):
    global base_url
    base_url = '//www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
    global headers
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'}
    data = {
        'cname': '',
        'pid': '',
        'keyword': keyword,
        'pageIndex': '1',
        'pageSize': '10',
    }
    try:
        response = requests.post(url=base_url, headers=headers, data=data)
        json_data = response.json()
        page = json_data['Table'][0]['rowcount']

        if page % 10 > 0:
            page_num = page // 10 + 1
        else:
            page_num = page // 10

        return page_num
    except Exception as e:
        print(e)




def send_request(keyword):
    page_num = get_page(keyword)

    try:
        for page in range(1, page_num + 1):
            print('============正在獲取第{}頁資訊=========='.format(page))
            data = {
                'cname': '',
                'pid': '',
                'keyword': keyword,
                'pageIndex': str(page),
                'pageSize': '10',
            }

            response = requests.post(url=base_url, headers=headers, data=data, proxies=random.choice(ip),timeout=3)

            json_data = response.json()
            # pprint.pprint(json_data)
            time.sleep(0.4)

            # 3、解析數據
            data_list = json_data['Table1']
            # pprint.pprint(data_list)

            # 構建循環,解析數據欄位
            for ls in data_list:
                storeName = ls['storeName'] + '餐廳'  # 餐廳名稱
                cityName = ls['cityName']  # 餐廳城市
                addressDetail = ls['addressDetail']  # 餐廳地址
                pro = ls['pro']  # 餐廳詳情
                # print(storeName, cityName, addressDetail, pro)

                # 4、保存數據
                print('正在爬取:', storeName)
                with open('data5.csv', 'a', newline='') as csvfile:  # newline=''  指定一行一行寫入
                    csvwriter = csv.writer(csvfile, delimiter=',')  # delimiter=','  csv數據的分隔符
                    csvwriter.writerow([storeName, cityName, addressDetail, pro])  # 序列化數據,寫入csv
            time.sleep(0.2)
    except Exception as e:
        print(e)


if __name__ == '__main__':
    # //www.lagou.com/lbs/getAllCitySearchLabels.json
    all_cities = ['安陽', '安慶', '鞍山', '澳門特別行政區', '安順', '阿勒泰', '安康', '阿克蘇', '阿壩藏族羌族自治州', '阿拉善盟', '北京', '保定', '蚌埠', '濱州',
                  '包頭', '寶雞', '北海', '亳州', '百色', '畢節', '巴中', '本溪', '巴音郭楞', '巴彥淖爾', '博爾塔拉', '保山', '白城', '白山', '成都', '長沙',
                  '重慶', '長春', '常州', '滄州', '赤峰', '郴州', '潮州', '常德', '朝陽', '池州', '滁州', '承德', '昌吉', '楚雄', '崇左', '東莞', '大連',
                  '德州', '德陽', '大慶', '東營', '大同', '達州', '大理', '德宏', '丹東', '定西', '儋州', '迪慶', '鄂州', '恩施', '鄂爾多斯', '佛山',
                  '福州', '阜陽', '撫州', '撫順', '阜新', '防城港', '廣州', '貴陽', '桂林', '贛州', '廣元', '貴港', '廣安', '固原', '甘孜藏族自治州', '杭州',
                  '合肥', '惠州', '哈爾濱', '海口', '呼和浩特', '邯鄲', '衡陽', '湖州', '淮安', '海外', '菏澤', '衡水', '河源', '懷化', '黃岡', '黃石',
                  '黃山', '淮北', '淮南', '葫蘆島', '呼倫貝爾', '漢中', '紅河', '賀州', '河池', '鶴壁', '鶴崗', '海東', '哈密', '濟南', '金華', '嘉興',
                  '濟寧', '江門', '晉中', '吉林', '九江', '揭陽', '焦作', '荊州', '錦州', '荊門', '吉安', '景德鎮', '晉城', '佳木斯', '酒泉', '濟源',
                  '昆明', '開封', '克拉瑪依', '喀什', '蘭州', '臨沂', '廊坊', '洛陽', '柳州', '六安', '聊城', '連雲港', '呂梁', '瀘州', '拉薩', '麗水',
                  '樂山', '龍岩', '臨汾', '漯河', '六盤水', '涼山彝族自治州', '麗江', '婁底', '萊蕪', '遼源', '隴南', '臨夏', '來賓', '綿陽', '茂名', '馬鞍山',
                  '梅州', '牡丹江', '眉山', '南京', '寧波', '南昌', '南寧', '南通', '南陽', '南充', '寧德', '南平', '內江', '莆田', '濮陽', '萍鄉',
                  '平頂山', '盤錦', '攀枝花', '平涼', '普洱', '青島', '泉州', '清遠', '秦皇島', '曲靖', '衢州', '齊齊哈爾', '黔西南', '黔南', '欽州', '黔東南',
                  '慶陽', '七台河', '日照', '深圳', '上海', '蘇州', '瀋陽', '石家莊', '紹興', '汕頭', '宿遷', '商丘', '三亞', '上饒', '宿州', '邵陽',
                  '十堰', '遂寧', '韶關', '三門峽', '汕尾', '隨州', '三沙', '三明', '綏化', '石嘴山', '四平', '朔州', '商洛', '松原', '天津', '太原',
                  '唐山', '台州', '泰安', '泰州', '天水', '通遼', '銅陵', '台灣', '銅仁', '銅川', '鐵嶺', '塔城', '天門', '通化', '武漢', '無錫', '溫州',
                  '濰坊', '烏魯木齊', '蕪湖', '威海', '梧州', '渭南', '吳忠', '烏蘭察布', '文山', '烏海', '西安', '廈門', '徐州', '新鄉', '西寧', '咸陽',
                  '許昌', '邢台', '孝感', '襄陽', '香港特別行政區', '湘潭', '信陽', '忻州', '咸寧', '宣城', '西雙版納', '湘西土家族苗族自治州', '新余', '興安盟',
                  '煙台', '揚州', '銀川', '鹽城', '宜春', '岳陽', '宜昌', '陽江', '玉溪', '玉林', '益陽', '運城', '宜賓', '榆林', '雲浮', '營口', '永州',
                  '延安', '鷹潭', '伊犁', '延邊', '陽泉', '雅安', '鄭州', '珠海', '中山', '株洲', '淄博', '遵義', '湛江', '肇慶', '鎮江', '張家口', '周口',
                  '駐馬店', '漳州', '棗莊', '長治', '昭通', '舟山', '資陽', '張掖', '自貢', '中衛', '張家界']
    for city in all_cities:
        send_request(city)

如果你處於想學Python或者正在學習Python,Python的教程不少了吧,但是是最新的嗎?說不定你學了可能是兩年前人家就學過的內容,在這小編分享一波2020最新的Python教程。獲取方式,私信小編 「 資料 」,即可免費獲取哦!