园长

学无止境 知行合一

杨村(杨陵)天气愈发寒冷了,以至于今天已经穿上了保暖裤[捂脸],凭感觉来说是比去年冷的,于是就想爬一下历史的天气数据,再进行可视化(零上温度)。

requirements.txt

1
2
3
4
snapshot_selenium==0.0.2
requests==2.26.0
pyecharts==1.9.0
beautifulsoup4==4.10.0

多线程 th.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import threading
import time
import tianqi
from pyecharts.charts import Line,Page
def single_th():
for url in tianqi.urls:
line=tianqi.draw(url)

def multi_th():
ths=[]
for url in tianqi.urls:
ths.append(threading.Thread(target=tianqi.draw,args=(url,)))
for th in ths:
th.start()
for th in ths:
th.join()

if __name__ == '__main__':
# start = time.time()
# single_th()
# end = time.time()
# print('Cost {} seconds'.format((end - start) / 5))

start = time.time()
multi_th()
end = time.time()
print('Cost {} seconds'.format((end - start) / 5))

功能实现tianqi.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import requests
from bs4 import BeautifulSoup
import sqlite3
import re
from pyecharts.charts import Line,Page
import pyecharts.options as opts
from pyecharts.render import make_snapshot
from snapshot_selenium import snapshot



url = 'http://www.tianqihoubao.com/lishi/yangling/month/'

def ask_url(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWe bKit/537.36 (KHTML, like Gecko) Chrome/93.0.4544.0 Safari/537.36 Edg/93.0.933.1",
} # 头部信息用户代理
try:
html = requests.get(url,headers=headers).text
except Exception as err:
print(err)
html=err
return html



def analyze_html(url):
datalist = []
html =ask_url(url)
soup = BeautifulSoup(html, "lxml")
for item in soup.find_all('table', class_="b"):
item = str(item)
datalist.append(item.replace(' ','').replace('\n','').replace('\r',''))
weatherlist = datalist[0].split('</tr>')
weatherlist.pop(0)
weatherlist.pop(len(weatherlist)-1)
return weatherlist

def draw(url):
list=analyze_html(url)
temp_dic={}
# 日期
date_list=[]
# 白天温度
temp_day=[]
# 夜晚温度
temp_night=[]
for item in list:
find_temp=re.compile('[\u4e00-\u9fa5]</td><td>(.*?)℃</td><td>')
find_date=re.compile('[\u4e00-\u9fa5]">(.*?)日</a>')
find_year=re.compile('[\u4e00-\u9fa5]">(.*?)月')
date=re.findall(find_date,item)
temp=re.findall(find_temp,item)
year=re.findall(find_year,item)[0]
temp_dic[date[0]]=temp[0].replace('℃','')
date_list.append(date[0])
temp_sum=temp[0].replace('℃','').split('/')
#有些数据不全,没有白天的温度或者晚上的温度
if temp_sum[0]=='':
print('0空')
else:
temp_day.append(int(temp_sum[0]))
if temp_sum[1]=='':
print('1空')
else:
temp_night.append(int(temp_sum[1]))
#24H温度列表
all_temp=(temp_day+temp_night)
print(all_temp)
#最高温
max_temp=int(max(all_temp))
#最低温
min_temp=int(min(all_temp))

# print(temp_dic)
# print(temp_day)
# print(len(temp_day))

# 绘图
line1=(
Line(init_opts=opts.InitOpts(bg_color='#ffffff')) # 生成line类型图表
.add_xaxis(date_list)
.add_yaxis('白天',temp_day,is_smooth=True,
markline_opts=opts.MarkLineOpts(
data=[

opts.MarkLineItem(symbol="none", x="90%", y="max"),
opts.MarkLineItem(symbol="circle", type_="max", name="最高点"),
]
),
)
.add_yaxis('夜晚',temp_night,is_smooth=True,
markline_opts=opts.MarkLineOpts(
data=[
opts.MarkLineItem(type_="min", name="最低点"),
opts.MarkLineItem(symbol="none", x="90%", y="max"),
]
),
)
.set_global_opts(
legend_opts=opts.LegendOpts(pos_right=0),
title_opts=opts.TitleOpts(title='%s月温度情况'%year,subtitle='24H最高温度%s℃,最低温度%s℃\n部分数据存在空值'%(max_temp,min_temp),pos_left='center',),
xaxis_opts=opts.AxisOpts(
axistick_opts=opts.AxisTickOpts(is_align_with_label=True),
is_scale=False,
boundary_gap=False,
name="日期",
),
yaxis_opts=opts.AxisOpts(name="温度(℃)"),
)
)
make_snapshot(snapshot, line1.render(), "%s.png"%year)
return line1
# line1.render('pyecharts-line.html') # 生成网页文件
# make_snapshot(snapshot, line1.render(), "%s.png"%year) #渲染图片


#运行此文件可将所有图表渲染在一个html文件中
if __name__ == '__main__':
from time import time
start = time()
page = Page()
for i in range(11):
askurl=url+str(2010+i+1)+'10.html'
print(askurl)
line=draw(askurl)
page.add(line)
page.render("test.html")
end = time()
print('Cost {} seconds'.format((end - start) / 5))

爬取的部分数据:

实现比较简单,访问网页获取数据ask_url(),再对数据进行提取分析analyze_html(),最后渲染网页或渲染图片draw

值得一提的是,本次使用了pyecharts,跟此前我在学Flask与数据可视化中的ECharts相似,其官方文档写到:当数据分析遇上数据可视化时,pyecharts 诞生了

从折线堆叠图中可以看到,(我也不知道能看出来啥)



 评论




博客内容遵循 署名-非商业性使用-相同方式共享 4.0 国际 (CC BY-NC-SA 4.0) 协议

本站使用 volantis 作为主题 。