python简易的大乐透数据获取及初步分析

2021-02-12 14:19

阅读:532

标签:too   简单   退出   tom   range   爬取   top   book   clear   

该项目从网上爬取并分析彩票数据,为用户查看和初步分析往期数据提供一种简易的工具。

https://github.com/unknowcry/Lottery

  1 # -*- coding: utf-8 -*-
  2 """
  3 @author UKC_unknowcry
  4 @date 2020-4-12
  5 @desc 彩票数据获取
  6 @filename Lottery_data.py
  7 tips:
  8 数据来自:http://kaijiang.500.com
  9 
 10 """
 11 
 12 import requests
 13 import re
 14 import random
 15 import datetime
 16 import threading
 17 import time
 18 from bs4 import BeautifulSoup
 19 from fake_useragent import UserAgent
 20 from concurrent.futures import ThreadPoolExecutor,as_completed
 21 
 22 class Lottery:
 23     """
 24     单一网页数据获取
 25     """
 26     no=None
 27     url=None
 28     header=None
 29     data=None
 30     data_history=None
 31     new_url=None
 32     __threadlock=None
 33     def __init__(self, no=None):
 34         ‘‘‘
 35         :param no:期号
 36         ‘‘‘
 37         if no == None:
 38             self.no=None
 39         else:
 40             self.no=no    
 41         self.url=None
 42         self.header=None
 43         self.data=None
 44         self.data_history=None
 45         self.new_url=http://kaijiang.500.com/dlt.shtml
 46         self.threadlock=threading.Lock()
 47 
 48     def set_header(self):
 49         """
 50         随机生成ip,设置X-Forwarded-For
 51         设置用户代理
 52         :return:
 53         """
 54         if self.header == None:
 55             ua = UserAgent()
 56             ip = {}.{}.{}.{}.format(112, random.randint(64, 68), random.randint(0, 255), random.randint(0, 255))
 57             self.header={
 58                 "X-Forwarded-For": ip,
 59                 "User-Agent":ua.random
 60             }
 61         else:
 62             pass
 63 
 64     def set_url(self):
 65         """
 66         :return:
 67         """
 68         self.url=http://kaijiang.500.com/shtml/dlt/{}.shtml?.format(self.no)
 69 
 70     def get_response(self,url):
 71         """
 72         链接测试
 73         :return: get请求返回的response
 74         """
 75         response = requests.get(url=url, headers=self.header)
 76         return response
 77 
 78     def get_html(self,response):
 79         """
 80         :return: html文档
 81         """
 82         try:
 83             r=response
 84             r.raise_for_status()
 85             charset=re.search(re.compile(rcharset=(\w+)),r.text).group()[8:]
 86             r.encoding=charset
 87             return r.text
 88         except Exception as err:
 89             print(err)
 90             return ‘‘
 91     
 92     def fill_data(self, soup):
 93         """
 94         :param soup:
 95         :return:
 96         """
 97         try:
 98             tableinfo=soup.find(table,kj_tablelist02)
 99             response_no=re.findall(re.compile(r(\d+)),str(tableinfo))[0]
100             if int(response_no) != int(self.no):
101                 raise Exception(期号错误,响应期号{0}不匹配请求期号{1}.format(response_no,self.no))
102             else:
103                 date_l=re.findall(re.compile(r(\d+)年(\d+)月(\d+)日 兑奖截止日期:(\d+)年(\d+)月(\d+)日),str(tableinfo))
104                 date_start=datetime.date(int(date_l[0][0]),int(date_l[0][1]),int(date_l[0][2]))
105                 date_end=datetime.date(int(date_l[0][3]),int(date_l[0][4]),int(date_l[0][5]))
106                 nums=tuple(re.findall(re.compile(r>(\d\d)),str(tableinfo)))
107                 money_l=re.findall(r(\d+(\.\d+)?),str(tableinfo))
108                 sale=money_l[0][0]
109                 jackpot=money_l[1][0]
110                 self.data=tuple((response_no,nums,date_start,date_end,sale,jackpot))
111         except Exception as err:
112             print(err)
113 
114     def get_newno(self):
115         """
116         :return: bool,最新期号
117         """
118         self.set_header()
119         response=self.get_response(self.new_url)
120         if response.status_code != 200:
121             print(error\n,response.status_code,self.url)
122             return False,None
123         else:
124             soup=BeautifulSoup(self.get_html(response),html.parser)
125             tableinfo=soup.find(span,iSelectBox)
126             newno=re.findall(re.compile(r(\d\d\d\d\d)),str(tableinfo))[0]
127             return True,newno
128 
129     def get_nos(self):
130         """
131         :return: 历史期号
132         """
133         self.set_header()
134         response=self.get_response(self.new_url)
135         if response.status_code != 200:
136             print(error\n,response.status_code,self.url)
137         else:
138             soup=BeautifulSoup(self.get_html(response),html.parser)
139             tableinfo=soup.find(span,iSelectBox)
140             nos=re.findall(re.compile(r(\d\d\d\d\d)),str(tableinfo))
141             return nos[1:]
142 
143     def data_single(self,no=None):
144         """
145         :return: bool,no期数据
146         """
147         if no == None:
148             pass
149         else:
150             self.no=no
151         self.set_header()
152         self.set_url()
153         response=self.get_response(self.url)
154         if response.status_code != 200:
155             print(error\n,response.status_code,self.url)
156             return False,None
157         else:
158             soup=BeautifulSoup(self.get_html(response),html.parser)
159             self.fill_data(soup)
160         return True,self.data
161 
162         
163 class Lottery_multi:
164     """
165     多网页数据获取
166     """
167     number=None
168     data=None
169     __threadlock=None
170     max_workers=None
171     nos=None
172     renos=None
173     list=None
174     def __init__(self,number=None,max_workers=8):
175         """
176         :param number: 数量
177         :param max_workers=5:默认线程数
178         """
179         self.number=number
180         self.data=set()
181         self.threadlock=threading.Lock()
182         self.max_workers=max_workers
183         self.nos=Lottery().get_nos()
184         if number != None:
185             self.nos=self.nos[:number]
186         self.renos=None
187         self.list=None
188 
189     def thread_onedata(self,no):
190         """
191         :param no: 期号
192         :return: 期号,bool
193         """
194         a=Lottery()
195         data=a.data_single(no)[1]
196         if data == None:
197             flag=False
198         else:
199             flag=True
200             self.threadlock.acquire()
201             self.data.add(data)
202             self.threadlock.release()
203         return no,flag
204 
205     def data_multi(self,number=None):
206         """
207         :return: 历史数据
208         """
209         self.data.clear()
210         nos=self.nos
211         with ThreadPoolExecutor(max_workers=self.max_workers) as t:
212             obj_list=[]
213             for i in nos:
214                 obj=t.submit(self.thread_onedata,i)
215                 obj_list.append(obj)
216             for future in as_completed(obj_list):
217                 no,flag=future.result()
218                 if flag:
219                     print(thread,no,done)
220                 else:
221                     print(thread,no,failed)
222         self.check()
223         return self.data
224 
225     def check_no(self,number=None):
226         """
227         :param number: 期数量
228         :return: 未匹配期号列
229         """
230         nos=self.nos
231         if number != None:
232             nos=nos[:number]
233         for data in self.data:
234             i=data[0]
235             nos.remove(i)
236         self.renos=nos
237         print(check_no done)
238         return nos
239 
240     def adddata(self):
241         with ThreadPoolExecutor(max_workers=self.max_workers) as t:
242             obj_list=[]
243             for i in self.renos:
244                 print(thread,i,restart)
245                 obj=t.submit(self.thread_onedata,i)
246                 obj_list.append(obj)
247             for future in as_completed(obj_list):
248                 no,flag=future.result()
249                 if flag:
250                     print(thread,no,done)
251                 else:
252                     print(thread,no,failed)
253         print(adddata done)
254 
255     def check(self):
256         self.check_no(self.number)
257         if len(self.renos)==0:
258             print(no done)
259             return True
260         else:
261             print(no,self.renos)
262             self.adddata()
263             self.check_no(self.number)
264             if len(self.renos)==0:
265                 return True
266             else:
267                 return False
268 
269     def get_list(self):
270         """
271         :return: 数据以顺序列表返回
272         """
273         self.list=list(self.data)
274         self.list.sort(key=lambda x: x[0],reverse=True)    
275         return self.list
276 
277 if __name__ == "__main__":
278     time_start=time.time()
279     l=Lottery_multi(number=10,max_workers=5)
280     l.data_multi()
281     data=l.get_list()
282     data.sort(key=lambda x: x[0],reverse=True)
283     time_end=time.time()
284     for i in range(len(data)):
285         print(i+1,data[i])
286     print(time,time_end-time_start)

 

  1 # -*- coding: utf-8 -*-
  2 """
  3 @auther UKC_unknowcry
  4 @date 2020-4-18
  5 @desc 简单的彩票分析及ui
  6 @filename Lottery_ui.py
  7 
  8 """
  9 
 10 import os
 11 import sys
 12 import datetime
 13 import tkinter as tk
 14 import tkinter.messagebox
 15 import matplotlib
 16 import matplotlib.pyplot as plt
 17 import configparser
 18 from tkinter import ttk
 19 from Lottery_data import Lottery
 20 from Lottery_data import Lottery_multi
 21 from matplotlib.pyplot import MultipleLocator
 22 from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg,NavigationToolbar2Tk
 23 
 24 class window():
 25     """
 26     界面设计
 27     """
 28     root=None
 29     menubar=None
 30     filemenu=None
 31     windowmenu=None
 32     statusbar=None
 33     tabbar=None
 34     tab_new=None
 35     tab_history=None
 36     canvas_number=None
 37     canvas_position=None
 38 
 39     data=None
 40     data_no=0
 41     data_number=0
 42     data_date_start=0
 43     data_date_end=0
 44     data_sale=0
 45     data_jackpot=0
 46     data_query_no=0
 47     data_query_number=0
 48     data_query_date_start=0
 49     data_query_date_end=0
 50     data_query_sale=0
 51     data_query_jackpot=0
 52     data_newno=0
 53 
 54     myfont=Arial
 55 
 56     def __init__(self):
 57         """
 58         切换工作目录为文件目录
 59         获取最新数据
 60         加载组件
 61         """
 62         path=os.path.realpath(__file__)
 63         dirpath=os.path.dirname(path)
 64         os.chdir(dirpath)
 65         self.get_data_new()
 66         self.set_root()
 67         self.set_statusbar()
 68         self.update_statusbar(准备中…)
 69         self.set_menubar()
 70         self.set_notebookbar()
 71         self.update_statusbar()
 72 
 73     #主窗口    
 74     def set_root(self):
 75         self.root=tk.Tk()
 76         self.root.title(超级大乐透)
 77         try:
 78             self.root.iconbitmap(icon.ico)
 79         except:
 80             pass
 81         try:
 82             config=configparser.ConfigParser()
 83             config.read("Lottery_ui.conf")
 84             # with open("Lottery_ui.conf", "r") as conf:
 85                 # alignstr=conf.read()
 86             alignstr=config.get(window,geometry)
 87             self.root.geometry(alignstr)
 88             print(alignstr)
 89         except Exception as err:
 90             print(err)
 91             self.reset_root_geometry()
 92         self.root.resizable(width=True,height=True)
 93         self.root.protocol(WM_DELETE_WINDOW,self.quit)
 94 
 95     ‘‘‘控件‘‘‘
 96     #菜单栏
 97     def set_menubar(self):
 98         self.menubar=tk.Menu(self.root)
 99         self.set_filemenu()
100         self.set_windowmenu()
101         self.root.config(menu=self.menubar)
102 
103     #文件项
104     def set_filemenu(self):
105         self.filemenu=tk.Menu(self.menubar)
106         self.save=tk.Menu(self.filemenu)
107         self.save.add_command(label=按数分析,font=self.myfont,command=self.save_number)
108         self.save.add_command(label=按位分析,font=self.myfont,command=self.save_position)
109         self.filemenu.add_cascade(label=图片另存为,font=self.myfont,menu=self.save)
110         self.filemenu.add_command(label=退出,font=self.myfont,command=self.quit)
111         self.menubar.add_cascade(label=文件,font=self.myfont,menu=self.filemenu)
112 
113     #窗口项
114     def set_windowmenu(self):    
115         self.windowmenu=tk.Menu(self.menubar)
116         self.windowmenu.add_command(label=重置,font=self.myfont,command=self.reset_root_geometry)
117         self.menubar.add_cascade(label=窗口,font=self.myfont,menu=self.windowmenu)
118 
119     #状态栏
120     def set_statusbar(self):
121         self.statusbar=tk.Label(self.root,text=状态,font=self.myfont,bd=1,relief=tk.SUNKEN,anchor=tk.W)
122         self.statusbar.pack(side=tk.BOTTOM,fill=tk.X)
123 
124     #标签页
125     def set_notebookbar(self):
126         self.tab=ttk.Notebook(self.root)
127         self.set_tab_new()
128         self.set_tab_history()
129         self.set_tab_number()
130         self.set_tab_position()
131         self.tab.pack(expand=True,fill=both)
132         self.tab.select(self.tab_new)
133 
134     #标签页-最新数据
135     def set_tab_new(self):
136         self.tab_new=tk.Frame(self.tab,bg=blue)
137         self.tab.add(self.tab_new,text=现在)
138         self.fill_tab_new()
139 
140     #标签页-历史数据查询
141     def set_tab_history(self):
142         self.tab_history=tk.Frame(self.tab,bg=red)
143         self.tab.add(self.tab_history,text=历史)
144         self.fill_tab_history()
145 
146     #标签页-按数分析
147     def set_tab_number(self):
148         self.tab_number=tk.Frame(self.tab)
149         self.tab.add(self.tab_number,text=按数分析)
150         self.fill_tab_number()
151 
152     #标签页-按位分析
153     def set_tab_position(self):
154         self.tab_position=tk.Frame(self.tab)
155         self.tab.add(self.tab_position,text=按位分析)
156         self.fill_tab_position()
157 
158     #小部件
159     #填充最新页
160     def fill_tab_new(self):
161         self.tab_new_no=tk.Label(self.tab_new,text=+str(self.data_no)+,bg=orange,font=(self.myfont, 14))
162         self.tab_new_no.pack(side=top,fill=x)
163         if type(self.data_number)==tuple:
164             self.tab_new_number=tk.Label(self.tab_new,text=开奖号码+{0}-{1}-{2}-{3}-{4}-{5}-{6}.format(self.data_tmp_number[0], 165                 self.data_tmp_number[1],self.data_tmp_number[2],self.data_tmp_number[3],self.data_tmp_number[4], 166                 self.data_tmp_number[5],self.data_tmp_number[6]),bg=pink,font=(self.myfont, 14))
167         else:
168             tkinter.messagebox.showerror(error,数据错误)
169             self.tab_new_number=tk.Label(self.tab_new,text=开奖号码+str(self.data_number),bg=pink,font=(self.myfont, 14))
170         self.tab_new_number.pack(side=top,fill=x)
171         self.tab_new_date_start=tk.Label(self.tab_new,text=开奖日期+str(self.data_date_start),bg=pink,font=(self.myfont, 14))
172         self.tab_new_date_start.pack(side=top,fill=x)
173         self.tab_new_date_end=tk.Label(self.tab_new,text=兑奖截止+str(self.data_date_end),bg=pink,font=(self.myfont, 14))
174         self.tab_new_date_end.pack(side=top,fill=x)
175         self.tab_new_sale=tk.Label(self.tab_new,text=销售金额+str(self.data_sale)+亿,bg=pink,font=(self.myfont, 14))
176         self.tab_new_sale.pack(side=top,fill=x)
177         self.tab_new_jackpot=tk.Label(self.tab_new,text=奖池滚存+str(self.data_jackpot)+亿,bg=pink,font=(self.myfont, 14))
178         self.tab_new_jackpot.pack(side=top,fill=x)
179 
180     #填充历史页
181     def fill_tab_history(self):
182         self.tab_history_query=tk.Frame(self.tab_history,bg=blue)
183         self.tab_history_query.pack(side=top,fill=x)
184         self.tab_history_label=tk.Label(self.tab_history_query,text=输入期号,bd=1,font=self.myfont)
185         self.tab_history_label.pack(side=left)
186         self.tab_history_btn=tk.Button(self.tab_history_query,text=搜索,bd=1,font=self.myfont,command=lambda :self.tab_history_search(self.tab_history_entry.get()))
187         self.tab_history_btn.pack(side=right)
188         self.tab_history_entry=tk.Entry(self.tab_history_query,bd=3,font=self.myfont)
189         self.tab_history_entry.pack(side=left,fill=x)
190         self.tab_history_frame=tk.Frame(self.tab_history,bg=pink)
191         self.tab_history_frame.pack(side=top,fill=x)
192         self.tab_history_no=tk.Label(self.tab_history_frame,text=+str(self.data_query_no)+,bg=orange,font=(self.myfont, 14))
193         self.tab_history_no.pack(side=top,fill=x)
194         self.tab_history_number=tk.Label(self.tab_history_frame,text=开奖号码+str(self.data_query_number),bg=pink,font=(self.myfont, 14))
195         self.tab_history_number.pack(side=top,fill=x)
196         self.tab_history_date_start=tk.Label(self.tab_history_frame,text=开奖日期+str(self.data_query_date_start),bg=pink,font=(self.myfont, 14))
197         self.tab_history_date_start.pack(side=top,fill=x)
198         self.tab_history_date_end=tk.Label(self.tab_history_frame,text=兑奖截止+str(self.data_query_date_end),bg=pink,font=(self.myfont, 14))
199         self.tab_history_date_end.pack(side=top,fill=x)
200         self.tab_history_sale=tk.Label(self.tab_history_frame,text=销售金额+str(self.data_query_sale)+亿,bg=pink,font=(self.myfont, 14))
201         self.tab_history_sale.pack(side=top,fill=x)
202         self.tab_history_jackpot=tk.Label(self.tab_history_frame,text=奖池滚存+str(self.data_query_jackpot)+亿,bg=pink,font=(self.myfont, 14))
203         self.tab_history_jackpot.pack(side=top,fill=x)
204 
205     #填充按数分析
206     def fill_tab_number(self):
207         self.tab_number_query=tk.Frame(self.tab_number,bg=blue)
208         self.tab_number_query.pack(side=top,fill=x)
209         self.tab_number_label=tk.Label(self.tab_number_query,text=最近,bd=1,font=self.myfont)
210         self.tab_number_label.pack(side=left)
211         self.tab_number_btn=tk.Button(self.tab_number_query,text=生成,bd=1,font=self.myfont,command=self.tab_number_search)
212         self.tab_number_btn.pack(side=right)
213         self.tab_number_entry=tk.Entry(self.tab_number_query,bd=3,font=self.myfont)
214         self.tab_number_entry.pack(side=left,fill=x)
215         self.tab_number_label2=tk.Label(self.tab_number_query,text=,bd=1,font=self.myfont)
216         self.tab_number_label2.pack(side=left)
217         self.tab_number_frame=tk.Frame(self.tab_number,bg=pink)
218         self.tab_number_frame.pack(side=top,fill=both)
219 
220     #填充按位分析
221     def fill_tab_position(self):
222         self.tab_position_query=tk.Frame(self.tab_position,bg=blue)
223         self.tab_position_query.pack(side=top,fill=x)
224         self.tab_position_label=tk.Label(self.tab_position_query,text=最近,bd=1,font=self.myfont)
225         self.tab_position_label.pack(side=left)
226         self.tab_position_btn=tk.Button(self.tab_position_query,text=生成,bd=1,font=self.myfont,command=self.tab_position_search)
227         self.tab_position_btn.pack(side=right)
228         self.tab_position_entry=tk.Entry(self.tab_position_query,bd=3,font=self.myfont)
229         self.tab_position_entry.pack(side=left,fill=x)
230         self.tab_position_label=tk.Label(self.tab_position_query,text=,bd=1,font=self.myfont)
231         self.tab_position_label.pack(side=left)
232         self.tab_position_frame=tk.Frame(self.tab_position,bg=pink)
233         self.tab_position_frame.pack(side=top,fill=both)
234 
235     ‘‘‘事件‘‘‘
236 
237     #重置窗口大小和位置
238     def reset_root_geometry(self):
239         width=380
240         height=300
241         screenwidth=self.root.winfo_screenwidth()
242         screenheight=self.root.winfo_screenheight()
243         alignstr={}x{}+{}+{}.format(width,height,int((screenwidth-width)/2),int((screenheight-height)/2))
244         self.root.geometry(alignstr)
245         config=configparser.ConfigParser()
246         config[window]={geometry:{}.format(self.root.geometry())}
247         with open("Lottery_ui.conf"


评论


亲,登录后才可以留言!