python简易的大乐透数据获取及初步分析
2021-02-12 14:19
标签:too 简单 退出 tom range 爬取 top book clear 该项目从网上爬取并分析彩票数据,为用户查看和初步分析往期数据提供一种简易的工具。 https://github.com/unknowcry/Lottery 1 # -*- coding: utf-8 -*-
2 """
3 @author UKC_unknowcry
4 @date 2020-4-12
5 @desc 彩票数据获取
6 @filename Lottery_data.py
7 tips:
8 数据来自:http://kaijiang.500.com
9
10 """
11
12 import requests
13 import re
14 import random
15 import datetime
16 import threading
17 import time
18 from bs4 import BeautifulSoup
19 from fake_useragent import UserAgent
20 from concurrent.futures import ThreadPoolExecutor,as_completed
21
22 class Lottery:
23 """
24 单一网页数据获取
25 """
26 no=None
27 url=None
28 header=None
29 data=None
30 data_history=None
31 new_url=None
32 __threadlock=None
33 def __init__(self, no=None):
34 ‘‘‘
35 :param no:期号
36 ‘‘‘
37 if no == None:
38 self.no=None
39 else:
40 self.no=no
41 self.url=None
42 self.header=None
43 self.data=None
44 self.data_history=None
45 self.new_url=‘http://kaijiang.500.com/dlt.shtml‘
46 self.threadlock=threading.Lock()
47
48 def set_header(self):
49 """
50 随机生成ip,设置X-Forwarded-For
51 设置用户代理
52 :return:
53 """
54 if self.header == None:
55 ua = UserAgent()
56 ip = ‘{}.{}.{}.{}‘.format(112, random.randint(64, 68), random.randint(0, 255), random.randint(0, 255))
57 self.header={
58 "X-Forwarded-For": ip,
59 "User-Agent":ua.random
60 }
61 else:
62 pass
63
64 def set_url(self):
65 """
66 :return:
67 """
68 self.url=‘http://kaijiang.500.com/shtml/dlt/{}.shtml?‘.format(self.no)
69
70 def get_response(self,url):
71 """
72 链接测试
73 :return: get请求返回的response
74 """
75 response = requests.get(url=url, headers=self.header)
76 return response
77
78 def get_html(self,response):
79 """
80 :return: html文档
81 """
82 try:
83 r=response
84 r.raise_for_status()
85 charset=re.search(re.compile(r‘charset=(\w+)‘),r.text).group()[8:]
86 r.encoding=charset
87 return r.text
88 except Exception as err:
89 print(err)
90 return ‘‘
91
92 def fill_data(self, soup):
93 """
94 :param soup:
95 :return:
96 """
97 try:
98 tableinfo=soup.find(‘table‘,‘kj_tablelist02‘)
99 response_no=re.findall(re.compile(r‘(\d+)‘),str(tableinfo))[0]
100 if int(response_no) != int(self.no):
101 raise Exception(‘期号错误,响应期号{0}不匹配请求期号{1}‘.format(response_no,self.no))
102 else:
103 date_l=re.findall(re.compile(r‘(\d+)年(\d+)月(\d+)日 兑奖截止日期:(\d+)年(\d+)月(\d+)日‘),str(tableinfo))
104 date_start=datetime.date(int(date_l[0][0]),int(date_l[0][1]),int(date_l[0][2]))
105 date_end=datetime.date(int(date_l[0][3]),int(date_l[0][4]),int(date_l[0][5]))
106 nums=tuple(re.findall(re.compile(r‘>(\d\d)‘),str(tableinfo)))
107 money_l=re.findall(r‘(\d+(\.\d+)?)‘,str(tableinfo))
108 sale=money_l[0][0]
109 jackpot=money_l[1][0]
110 self.data=tuple((response_no,nums,date_start,date_end,sale,jackpot))
111 except Exception as err:
112 print(err)
113
114 def get_newno(self):
115 """
116 :return: bool,最新期号
117 """
118 self.set_header()
119 response=self.get_response(self.new_url)
120 if response.status_code != 200:
121 print(‘error\n‘,response.status_code,self.url)
122 return False,None
123 else:
124 soup=BeautifulSoup(self.get_html(response),‘html.parser‘)
125 tableinfo=soup.find(‘span‘,‘iSelectBox‘)
126 newno=re.findall(re.compile(r‘(\d\d\d\d\d)‘),str(tableinfo))[0]
127 return True,newno
128
129 def get_nos(self):
130 """
131 :return: 历史期号
132 """
133 self.set_header()
134 response=self.get_response(self.new_url)
135 if response.status_code != 200:
136 print(‘error\n‘,response.status_code,self.url)
137 else:
138 soup=BeautifulSoup(self.get_html(response),‘html.parser‘)
139 tableinfo=soup.find(‘span‘,‘iSelectBox‘)
140 nos=re.findall(re.compile(r‘(\d\d\d\d\d)‘),str(tableinfo))
141 return nos[1:]
142
143 def data_single(self,no=None):
144 """
145 :return: bool,no期数据
146 """
147 if no == None:
148 pass
149 else:
150 self.no=no
151 self.set_header()
152 self.set_url()
153 response=self.get_response(self.url)
154 if response.status_code != 200:
155 print(‘error\n‘,response.status_code,self.url)
156 return False,None
157 else:
158 soup=BeautifulSoup(self.get_html(response),‘html.parser‘)
159 self.fill_data(soup)
160 return True,self.data
161
162
163 class Lottery_multi:
164 """
165 多网页数据获取
166 """
167 number=None
168 data=None
169 __threadlock=None
170 max_workers=None
171 nos=None
172 renos=None
173 list=None
174 def __init__(self,number=None,max_workers=8):
175 """
176 :param number: 数量
177 :param max_workers=5:默认线程数
178 """
179 self.number=number
180 self.data=set()
181 self.threadlock=threading.Lock()
182 self.max_workers=max_workers
183 self.nos=Lottery().get_nos()
184 if number != None:
185 self.nos=self.nos[:number]
186 self.renos=None
187 self.list=None
188
189 def thread_onedata(self,no):
190 """
191 :param no: 期号
192 :return: 期号,bool
193 """
194 a=Lottery()
195 data=a.data_single(no)[1]
196 if data == None:
197 flag=False
198 else:
199 flag=True
200 self.threadlock.acquire()
201 self.data.add(data)
202 self.threadlock.release()
203 return no,flag
204
205 def data_multi(self,number=None):
206 """
207 :return: 历史数据
208 """
209 self.data.clear()
210 nos=self.nos
211 with ThreadPoolExecutor(max_workers=self.max_workers) as t:
212 obj_list=[]
213 for i in nos:
214 obj=t.submit(self.thread_onedata,i)
215 obj_list.append(obj)
216 for future in as_completed(obj_list):
217 no,flag=future.result()
218 if flag:
219 print(‘thread‘,no,‘done‘)
220 else:
221 print(‘thread‘,no,‘failed‘)
222 self.check()
223 return self.data
224
225 def check_no(self,number=None):
226 """
227 :param number: 期数量
228 :return: 未匹配期号列
229 """
230 nos=self.nos
231 if number != None:
232 nos=nos[:number]
233 for data in self.data:
234 i=data[0]
235 nos.remove(i)
236 self.renos=nos
237 print(‘check_no done‘)
238 return nos
239
240 def adddata(self):
241 with ThreadPoolExecutor(max_workers=self.max_workers) as t:
242 obj_list=[]
243 for i in self.renos:
244 print(‘thread‘,i,‘restart‘)
245 obj=t.submit(self.thread_onedata,i)
246 obj_list.append(obj)
247 for future in as_completed(obj_list):
248 no,flag=future.result()
249 if flag:
250 print(‘thread‘,no,‘done‘)
251 else:
252 print(‘thread‘,no,‘failed‘)
253 print(‘adddata done‘)
254
255 def check(self):
256 self.check_no(self.number)
257 if len(self.renos)==0:
258 print(‘no done‘)
259 return True
260 else:
261 print(‘no‘,self.renos)
262 self.adddata()
263 self.check_no(self.number)
264 if len(self.renos)==0:
265 return True
266 else:
267 return False
268
269 def get_list(self):
270 """
271 :return: 数据以顺序列表返回
272 """
273 self.list=list(self.data)
274 self.list.sort(key=lambda x: x[0],reverse=True)
275 return self.list
276
277 if __name__ == "__main__":
278 time_start=time.time()
279 l=Lottery_multi(number=10,max_workers=5)
280 l.data_multi()
281 data=l.get_list()
282 data.sort(key=lambda x: x[0],reverse=True)
283 time_end=time.time()
284 for i in range(len(data)):
285 print(i+1,data[i])
286 print(‘time‘,time_end-time_start)
1 # -*- coding: utf-8 -*-
2 """
3 @auther UKC_unknowcry
4 @date 2020-4-18
5 @desc 简单的彩票分析及ui
6 @filename Lottery_ui.py
7
8 """
9
10 import os
11 import sys
12 import datetime
13 import tkinter as tk
14 import tkinter.messagebox
15 import matplotlib
16 import matplotlib.pyplot as plt
17 import configparser
18 from tkinter import ttk
19 from Lottery_data import Lottery
20 from Lottery_data import Lottery_multi
21 from matplotlib.pyplot import MultipleLocator
22 from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg,NavigationToolbar2Tk
23
24 class window():
25 """
26 界面设计
27 """
28 root=None
29 menubar=None
30 filemenu=None
31 windowmenu=None
32 statusbar=None
33 tabbar=None
34 tab_new=None
35 tab_history=None
36 canvas_number=None
37 canvas_position=None
38
39 data=None
40 data_no=0
41 data_number=0
42 data_date_start=0
43 data_date_end=0
44 data_sale=0
45 data_jackpot=0
46 data_query_no=0
47 data_query_number=0
48 data_query_date_start=0
49 data_query_date_end=0
50 data_query_sale=0
51 data_query_jackpot=0
52 data_newno=0
53
54 myfont=‘Arial‘
55
56 def __init__(self):
57 """
58 切换工作目录为文件目录
59 获取最新数据
60 加载组件
61 """
62 path=os.path.realpath(__file__)
63 dirpath=os.path.dirname(path)
64 os.chdir(dirpath)
65 self.get_data_new()
66 self.set_root()
67 self.set_statusbar()
68 self.update_statusbar(‘准备中…‘)
69 self.set_menubar()
70 self.set_notebookbar()
71 self.update_statusbar()
72
73 #主窗口
74 def set_root(self):
75 self.root=tk.Tk()
76 self.root.title(‘超级大乐透‘)
77 try:
78 self.root.iconbitmap(‘icon.ico‘)
79 except:
80 pass
81 try:
82 config=configparser.ConfigParser()
83 config.read("Lottery_ui.conf")
84 # with open("Lottery_ui.conf", "r") as conf:
85 # alignstr=conf.read()
86 alignstr=config.get(‘window‘,‘geometry‘)
87 self.root.geometry(alignstr)
88 print(alignstr)
89 except Exception as err:
90 print(err)
91 self.reset_root_geometry()
92 self.root.resizable(width=True,height=True)
93 self.root.protocol(‘WM_DELETE_WINDOW‘,self.quit)
94
95 ‘‘‘控件‘‘‘
96 #菜单栏
97 def set_menubar(self):
98 self.menubar=tk.Menu(self.root)
99 self.set_filemenu()
100 self.set_windowmenu()
101 self.root.config(menu=self.menubar)
102
103 #文件项
104 def set_filemenu(self):
105 self.filemenu=tk.Menu(self.menubar)
106 self.save=tk.Menu(self.filemenu)
107 self.save.add_command(label=‘按数分析‘,font=self.myfont,command=self.save_number)
108 self.save.add_command(label=‘按位分析‘,font=self.myfont,command=self.save_position)
109 self.filemenu.add_cascade(label=‘图片另存为‘,font=self.myfont,menu=self.save)
110 self.filemenu.add_command(label=‘退出‘,font=self.myfont,command=self.quit)
111 self.menubar.add_cascade(label=‘文件‘,font=self.myfont,menu=self.filemenu)
112
113 #窗口项
114 def set_windowmenu(self):
115 self.windowmenu=tk.Menu(self.menubar)
116 self.windowmenu.add_command(label=‘重置‘,font=self.myfont,command=self.reset_root_geometry)
117 self.menubar.add_cascade(label=‘窗口‘,font=self.myfont,menu=self.windowmenu)
118
119 #状态栏
120 def set_statusbar(self):
121 self.statusbar=tk.Label(self.root,text=‘状态‘,font=self.myfont,bd=1,relief=tk.SUNKEN,anchor=tk.W)
122 self.statusbar.pack(side=tk.BOTTOM,fill=tk.X)
123
124 #标签页
125 def set_notebookbar(self):
126 self.tab=ttk.Notebook(self.root)
127 self.set_tab_new()
128 self.set_tab_history()
129 self.set_tab_number()
130 self.set_tab_position()
131 self.tab.pack(expand=True,fill=‘both‘)
132 self.tab.select(self.tab_new)
133
134 #标签页-最新数据
135 def set_tab_new(self):
136 self.tab_new=tk.Frame(self.tab,bg=‘blue‘)
137 self.tab.add(self.tab_new,text=‘现在‘)
138 self.fill_tab_new()
139
140 #标签页-历史数据查询
141 def set_tab_history(self):
142 self.tab_history=tk.Frame(self.tab,bg=‘red‘)
143 self.tab.add(self.tab_history,text=‘历史‘)
144 self.fill_tab_history()
145
146 #标签页-按数分析
147 def set_tab_number(self):
148 self.tab_number=tk.Frame(self.tab)
149 self.tab.add(self.tab_number,text=‘按数分析‘)
150 self.fill_tab_number()
151
152 #标签页-按位分析
153 def set_tab_position(self):
154 self.tab_position=tk.Frame(self.tab)
155 self.tab.add(self.tab_position,text=‘按位分析‘)
156 self.fill_tab_position()
157
158 #小部件
159 #填充最新页
160 def fill_tab_new(self):
161 self.tab_new_no=tk.Label(self.tab_new,text=‘第‘+str(self.data_no)+‘期‘,bg=‘orange‘,font=(self.myfont, 14))
162 self.tab_new_no.pack(side=‘top‘,fill=‘x‘)
163 if type(self.data_number)==tuple:
164 self.tab_new_number=tk.Label(self.tab_new,text=‘开奖号码‘+‘{0}-{1}-{2}-{3}-{4}-{5}-{6}‘.format(self.data_tmp_number[0], 165 self.data_tmp_number[1],self.data_tmp_number[2],self.data_tmp_number[3],self.data_tmp_number[4], 166 self.data_tmp_number[5],self.data_tmp_number[6]),bg=‘pink‘,font=(self.myfont, 14))
167 else:
168 tkinter.messagebox.showerror(‘error‘,‘数据错误‘)
169 self.tab_new_number=tk.Label(self.tab_new,text=‘开奖号码‘+str(self.data_number),bg=‘pink‘,font=(self.myfont, 14))
170 self.tab_new_number.pack(side=‘top‘,fill=‘x‘)
171 self.tab_new_date_start=tk.Label(self.tab_new,text=‘开奖日期‘+str(self.data_date_start),bg=‘pink‘,font=(self.myfont, 14))
172 self.tab_new_date_start.pack(side=‘top‘,fill=‘x‘)
173 self.tab_new_date_end=tk.Label(self.tab_new,text=‘兑奖截止‘+str(self.data_date_end),bg=‘pink‘,font=(self.myfont, 14))
174 self.tab_new_date_end.pack(side=‘top‘,fill=‘x‘)
175 self.tab_new_sale=tk.Label(self.tab_new,text=‘销售金额‘+str(self.data_sale)+‘亿‘,bg=‘pink‘,font=(self.myfont, 14))
176 self.tab_new_sale.pack(side=‘top‘,fill=‘x‘)
177 self.tab_new_jackpot=tk.Label(self.tab_new,text=‘奖池滚存‘+str(self.data_jackpot)+‘亿‘,bg=‘pink‘,font=(self.myfont, 14))
178 self.tab_new_jackpot.pack(side=‘top‘,fill=‘x‘)
179
180 #填充历史页
181 def fill_tab_history(self):
182 self.tab_history_query=tk.Frame(self.tab_history,bg=‘blue‘)
183 self.tab_history_query.pack(side=‘top‘,fill=‘x‘)
184 self.tab_history_label=tk.Label(self.tab_history_query,text=‘输入期号‘,bd=1,font=self.myfont)
185 self.tab_history_label.pack(side=‘left‘)
186 self.tab_history_btn=tk.Button(self.tab_history_query,text=‘搜索‘,bd=1,font=self.myfont,command=lambda :self.tab_history_search(self.tab_history_entry.get()))
187 self.tab_history_btn.pack(side=‘right‘)
188 self.tab_history_entry=tk.Entry(self.tab_history_query,bd=3,font=self.myfont)
189 self.tab_history_entry.pack(side=‘left‘,fill=‘x‘)
190 self.tab_history_frame=tk.Frame(self.tab_history,bg=‘pink‘)
191 self.tab_history_frame.pack(side=‘top‘,fill=‘x‘)
192 self.tab_history_no=tk.Label(self.tab_history_frame,text=‘第‘+str(self.data_query_no)+‘期‘,bg=‘orange‘,font=(self.myfont, 14))
193 self.tab_history_no.pack(side=‘top‘,fill=‘x‘)
194 self.tab_history_number=tk.Label(self.tab_history_frame,text=‘开奖号码‘+str(self.data_query_number),bg=‘pink‘,font=(self.myfont, 14))
195 self.tab_history_number.pack(side=‘top‘,fill=‘x‘)
196 self.tab_history_date_start=tk.Label(self.tab_history_frame,text=‘开奖日期‘+str(self.data_query_date_start),bg=‘pink‘,font=(self.myfont, 14))
197 self.tab_history_date_start.pack(side=‘top‘,fill=‘x‘)
198 self.tab_history_date_end=tk.Label(self.tab_history_frame,text=‘兑奖截止‘+str(self.data_query_date_end),bg=‘pink‘,font=(self.myfont, 14))
199 self.tab_history_date_end.pack(side=‘top‘,fill=‘x‘)
200 self.tab_history_sale=tk.Label(self.tab_history_frame,text=‘销售金额‘+str(self.data_query_sale)+‘亿‘,bg=‘pink‘,font=(self.myfont, 14))
201 self.tab_history_sale.pack(side=‘top‘,fill=‘x‘)
202 self.tab_history_jackpot=tk.Label(self.tab_history_frame,text=‘奖池滚存‘+str(self.data_query_jackpot)+‘亿‘,bg=‘pink‘,font=(self.myfont, 14))
203 self.tab_history_jackpot.pack(side=‘top‘,fill=‘x‘)
204
205 #填充按数分析
206 def fill_tab_number(self):
207 self.tab_number_query=tk.Frame(self.tab_number,bg=‘blue‘)
208 self.tab_number_query.pack(side=‘top‘,fill=‘x‘)
209 self.tab_number_label=tk.Label(self.tab_number_query,text=‘最近‘,bd=1,font=self.myfont)
210 self.tab_number_label.pack(side=‘left‘)
211 self.tab_number_btn=tk.Button(self.tab_number_query,text=‘生成‘,bd=1,font=self.myfont,command=self.tab_number_search)
212 self.tab_number_btn.pack(side=‘right‘)
213 self.tab_number_entry=tk.Entry(self.tab_number_query,bd=3,font=self.myfont)
214 self.tab_number_entry.pack(side=‘left‘,fill=‘x‘)
215 self.tab_number_label2=tk.Label(self.tab_number_query,text=‘期‘,bd=1,font=self.myfont)
216 self.tab_number_label2.pack(side=‘left‘)
217 self.tab_number_frame=tk.Frame(self.tab_number,bg=‘pink‘)
218 self.tab_number_frame.pack(side=‘top‘,fill=‘both‘)
219
220 #填充按位分析
221 def fill_tab_position(self):
222 self.tab_position_query=tk.Frame(self.tab_position,bg=‘blue‘)
223 self.tab_position_query.pack(side=‘top‘,fill=‘x‘)
224 self.tab_position_label=tk.Label(self.tab_position_query,text=‘最近‘,bd=1,font=self.myfont)
225 self.tab_position_label.pack(side=‘left‘)
226 self.tab_position_btn=tk.Button(self.tab_position_query,text=‘生成‘,bd=1,font=self.myfont,command=self.tab_position_search)
227 self.tab_position_btn.pack(side=‘right‘)
228 self.tab_position_entry=tk.Entry(self.tab_position_query,bd=3,font=self.myfont)
229 self.tab_position_entry.pack(side=‘left‘,fill=‘x‘)
230 self.tab_position_label=tk.Label(self.tab_position_query,text=‘期‘,bd=1,font=self.myfont)
231 self.tab_position_label.pack(side=‘left‘)
232 self.tab_position_frame=tk.Frame(self.tab_position,bg=‘pink‘)
233 self.tab_position_frame.pack(side=‘top‘,fill=‘both‘)
234
235 ‘‘‘事件‘‘‘
236
237 #重置窗口大小和位置
238 def reset_root_geometry(self):
239 width=380
240 height=300
241 screenwidth=self.root.winfo_screenwidth()
242 screenheight=self.root.winfo_screenheight()
243 alignstr=‘{}x{}+{}+{}‘.format(width,height,int((screenwidth-width)/2),int((screenheight-height)/2))
244 self.root.geometry(alignstr)
245 config=configparser.ConfigParser()
246 config[‘window‘]={‘geometry‘:‘{}‘.format(self.root.geometry())}
247 with open("Lottery_ui.conf"
上一篇:VBA方法属性
下一篇:java粗略版图书管理系统
文章标题:python简易的大乐透数据获取及初步分析
文章链接:http://soscw.com/index.php/essay/54474.html