寫爬蟲獲取天氣數據


不多說了,直接上代碼,詳情看注釋。

import re
import requests
import csv
import codecs
import datetime

class Weather:

    def __init__(self,city_id):
        self.city_id = city_id
        self.url = 'http://www.weather.com.cn/weather/{}.shtml'.format(self.city_id)
        self.header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
    # 請求網站,返回網頁源代碼
    def get_html_text(self):
        html = requests.get(self.url, headers = self.header)
        html.encoding = 'utf-8'
        self.html = html.text
        # print(self.html)
    # 去掉字符串里的方括號
    def rinse(self,string):
        string = string.replace('[', '')
        string = string.replace(']', '')
        # self.string = string
        return string
    # 寫入本地文件
    def save_csv(self,data):
        csv_file = codecs.open('weather_7day.csv', 'a', 'utf-8')
        try:
            writer = csv.writer(csv_file)
            writer.writerow(data)
            # print('成功寫入。。。。。。。。')
        finally:
            csv_file.close()
    # 獲取目標數據並做存儲
    def get_goal(self):
        day = re.compile('var hour3data={.*?"7d":(.*?)}')
        day = re.findall(day, self.html)[0]
        #調用清洗方法
        day = self.rinse(day)
        #在次匹配
        day_7 = re.compile('".*?"')
        day_7 = re.findall(day_7, day)
        # time.ctime() >>>>>>>>>>>>>>>> Fri Apr 20 07:44:44 2018
        #datetime.datetime.now() >>>>>> 2018-04-20 07:58:34.547408
        title = ['時間','num','天氣','溫度','風向','風力級別','num','采集時間','城市ID']
        self.save_csv(title)
        #遍歷
        for i in day_7:
            hour = i[1:-1].split(",")+[datetime.datetime.now()]+[self.city_id]
            self.save_csv(hour)
            # print(hour)
        # print(day_7)
"""
在網頁上要獲取的數據是這個樣子的>>>
    var hour3data={"1d":["19日20時,n01,多雲,18℃,西南風,3-4級,0","19日23時,n01,多雲,14℃,西南風,5-6級,0","20日02時,n01,多雲,14℃,西南風,5-6級,0","20日05時,n01,多雲,18℃,西南風,5-6級,0","20日08時,d01,多雲,22℃,西南風,5-6級,3","20日11時,d01,多雲,28℃,西南風,4-5級,3","20日14時,d01,多雲,31℃,西南風,5-6級,3","20日17時,d01,多雲,30℃,西南風,5-6級,3","20日20時,n01,多雲,23℃,西南風,3-4級,0"],"23d":[["21日08時,d07,小雨,16℃,東北風,4-5級,3","21日11時,d07,小雨,15℃,東北風,4-5級,3","21日14時,d07,小雨,9℃,東北風,4-5級,3","21日17時,d07,小雨,15℃,東北風,3-4級,3","21日20時,n08,中雨,10℃,東北風,3-4級,0","21日23時,n01,多雲,7℃,東北風,<3級,0","22日02時,n01,多雲,6℃,東北風,<3級,0","22日05時,n01,多雲,6℃,東北風,3-4級,0"],["22日08時,d01,多雲,12℃,東北風,3-4級,3","22日11時,d01,多雲,15℃,東北風,3-4級,3","22日14時,d01,多雲,16℃,東北風,3-4級,3","22日17時,d01,多雲,16℃,東北風,3-4級,2","22日20時,n01,多雲,15℃,東北風,<3級,0","23日02時,n01,多雲,6℃,東北風,3-4級,0"]],"7d":[["19日20時,n01,多雲,18℃,西南風,3-4級,0","19日23時,n01,多雲,14℃,西南風,5-6級,0","20日02時,n01,多雲,14℃,西南風,5-6級,0","20日05時,n01,多雲,18℃,西南風,5-6級,0"],["20日08時,d01,多雲,22℃,西南風,5-6級,3","20日11時,d01,多雲,28℃,西南風,4-5級,3","20日14時,d01,多雲,31℃,西南風,5-6級,3","20日17時,d01,多雲,30℃,西南風,5-6級,3","20日20時,n01,多雲,23℃,西南風,3-4級,0","20日23時,n01,多雲,20℃,東北風,<3級,0","21日02時,n02,陰,18℃,東北風,3-4級,0","21日05時,n02,陰,16℃,東北風,4-5級,0"],["21日08時,d07,小雨,16℃,東北風,4-5級,3","21日11時,d07,小雨,15℃,東北風,4-5級,3","21日14時,d07,小雨,9℃,東北風,4-5級,3","21日17時,d07,小雨,15℃,東北風,3-4級,3","21日20時,n08,中雨,10℃,東北風,3-4級,0","21日23時,n01,多雲,7℃,東北風,<3級,0","22日02時,n01,多雲,6℃,東北風,<3級,0","22日05時,n01,多雲,6℃,東北風,3-4級,0"],["22日08時,d01,多雲,12℃,東北風,3-4級,3","22日11時,d01,多雲,15℃,東北風,3-4級,3","22日14時,d01,多雲,16℃,東北風,3-4級,3","22日17時,d01,多雲,16℃,東北風,3-4級,2","22日20時,n01,多雲,15℃,東北風,<3級,0","23日02時,n01,多雲,6℃,東北風,3-4級,0"],["23日08時,d01,多雲,11℃,東北風,3-4級,2","23日14時,d01,多雲,16℃,東北風,3-4級,2","23日20時,n01,多雲,10℃,東北風,<3級,0","24日02時,n00,晴,6℃,北風,3-4級,0"],["24日08時,d00,晴,10℃,北風,3-4級,1","24日14時,d00,晴,20℃,北風,3-4級,1","24日20時,n00,晴,16℃,北風,<3級,0","25日02時,n00,晴,8℃,西北風,3-4級,0"],["25日08時,d00,晴,13℃,西北風,3-4級,1","25日14時,d00,晴,20℃,西北風,3-4級,1","25日20時,n00,晴,16℃,西北風,<3級,0","26日02時,n01,多雲,9℃,南風,3-4級,0"],["26日08時,d01,多雲,16℃,南風,3-4級,3","26日14時,d01,多雲,24℃,南風,3-4級,2","26日20時,n01,多雲,19℃,南風,3-4級,0"]]}
"""
if __name__ == '__main__':
    #城市ID >>> 101010100  如果你有足夠的城鎮ID,下面可以做循環獲取不同城市的天氣數據
    city_id = 101010100
    weather = Weather(city_id)
    weather.get_html_text()
    weather.get_goal()

"""
保存到本地是這個樣子的>>>
    時間,num,天氣,溫度,風向,風力級別,num,采集時間,城市ID
    20日08時,d00,晴,18℃,南風,<3級,4,2018-04-20 08:07:01.028206,101010100
    20日11時,d01,多雲,23℃,南風,<3級,4,2018-04-20 08:07:01.029206,101010100
    20日14時,d01,多雲,28℃,南風,4-5級,4,2018-04-20 08:07:01.033226,101010100
    20日17時,d01,多雲,26℃,南風,4-5級,2,2018-04-20 08:07:01.036240,101010100
    20日20時,n01,多雲,23℃,南風,3-4級,0,2018-04-20 08:07:01.039248,101010100
    20日23時,n01,多雲,21℃,南風,3-4級,0,2018-04-20 08:07:01.042258,101010100
    21日02時,n02,陰,19℃,南風,<3級,0,2018-04-20 08:07:01.045264,101010100
    21日05時,n07,小雨,15℃,南風,<3級,0,2018-04-20 08:07:01.047778,101010100
    21日08時,d07,小雨,14℃,南風,3-4級,3,2018-04-20 08:07:01.053790,101010100
    ...

"""
 

打開CSV文件。


注意!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系我们删除。



 
粤ICP备14056181号  © 2014-2021 ITdaan.com