1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
| import requests from bs4 import BeautifulSoup import sqlite3 import re from pyecharts.charts import Line,Page import pyecharts.options as opts from pyecharts.render import make_snapshot from snapshot_selenium import snapshot
url = 'http://www.tianqihoubao.com/lishi/yangling/month/'
def ask_url(url): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWe bKit/537.36 (KHTML, like Gecko) Chrome/93.0.4544.0 Safari/537.36 Edg/93.0.933.1", } try: html = requests.get(url,headers=headers).text except Exception as err: print(err) html=err return html
def analyze_html(url): datalist = [] html =ask_url(url) soup = BeautifulSoup(html, "lxml") for item in soup.find_all('table', class_="b"): item = str(item) datalist.append(item.replace(' ','').replace('\n','').replace('\r','')) weatherlist = datalist[0].split('</tr>') weatherlist.pop(0) weatherlist.pop(len(weatherlist)-1) return weatherlist
def draw(url): list=analyze_html(url) temp_dic={} date_list=[] temp_day=[] temp_night=[] for item in list: find_temp=re.compile('[\u4e00-\u9fa5]</td><td>(.*?)℃</td><td>') find_date=re.compile('[\u4e00-\u9fa5]">(.*?)日</a>') find_year=re.compile('[\u4e00-\u9fa5]">(.*?)月') date=re.findall(find_date,item) temp=re.findall(find_temp,item) year=re.findall(find_year,item)[0] temp_dic[date[0]]=temp[0].replace('℃','') date_list.append(date[0]) temp_sum=temp[0].replace('℃','').split('/') if temp_sum[0]=='': print('0空') else: temp_day.append(int(temp_sum[0])) if temp_sum[1]=='': print('1空') else: temp_night.append(int(temp_sum[1])) all_temp=(temp_day+temp_night) print(all_temp) max_temp=int(max(all_temp)) min_temp=int(min(all_temp))
line1=( Line(init_opts=opts.InitOpts(bg_color='#ffffff')) .add_xaxis(date_list) .add_yaxis('白天',temp_day,is_smooth=True, markline_opts=opts.MarkLineOpts( data=[
opts.MarkLineItem(symbol="none", x="90%", y="max"), opts.MarkLineItem(symbol="circle", type_="max", name="最高点"), ] ), ) .add_yaxis('夜晚',temp_night,is_smooth=True, markline_opts=opts.MarkLineOpts( data=[ opts.MarkLineItem(type_="min", name="最低点"), opts.MarkLineItem(symbol="none", x="90%", y="max"), ] ), ) .set_global_opts( legend_opts=opts.LegendOpts(pos_right=0), title_opts=opts.TitleOpts(title='%s月温度情况'%year,subtitle='24H最高温度%s℃,最低温度%s℃\n部分数据存在空值'%(max_temp,min_temp),pos_left='center',), xaxis_opts=opts.AxisOpts( axistick_opts=opts.AxisTickOpts(is_align_with_label=True), is_scale=False, boundary_gap=False, name="日期", ), yaxis_opts=opts.AxisOpts(name="温度(℃)"), ) ) make_snapshot(snapshot, line1.render(), "%s.png"%year) return line1
if __name__ == '__main__': from time import time start = time() page = Page() for i in range(11): askurl=url+str(2010+i+1)+'10.html' print(askurl) line=draw(askurl) page.add(line) page.render("test.html") end = time() print('Cost {} seconds'.format((end - start) / 5))
|