多线程爬虫
2020-12-13 04:49
标签:tab 创建 class driver color www 代理 pre else threading模块 condition的生产者消费者模式 lock版的生产者消费者模式 queue的线程安全 threading类实现多线程 selenium关闭页面和浏览器 selenium页面等待 selenium打开多个页面和页面间的切换 多线程共享全局变量 selenium设置代理ip 多线程爬虫 标签:tab 创建 class driver color www 代理 pre else 原文地址:https://www.cnblogs.com/kenD/p/11123543.htmlimport threading
import time
def coding():
for i in range(3):
print("正在写代码%s"%threading.current_thread())
time.sleep(1)
def drawing():
for i in range(3):
print("正在画画%s"%threading.current_thread())
time.sleep(1)
def main():
# 创建一个子线程
t1 = threading.Thread(target=coding, )
t1.start()
t2 = threading.Thread(target=drawing, )
t2.start()
if __name__ == ‘__main__‘:
main()
# threading.Condition 继承threading.Lock
import threading
import random
import time
gMoney = 1000
gCondition = threading.Condition()
gTimes = 0
gTotalTimes = 10
class Producer(threading.Thread):
def run(self):
global gMoney
global gTimes
global gTotalTimes
while True:
money = random.randint(100,1000)
gCondition.acquire()
if gTimes >= gTotalTimes:
gCondition.release()
break
gMoney += money
gTimes += 1
print("%s生产了%d的钱,现在总共有%d" % (threading.current_thread(), money, gMoney))
gCondition.notify_all() #通知wait等待的线程
gCondition.release()
time.sleep(1)
class Consumer(threading.Thread):
def run(self):
global gMoney
while True:
money = random.randint(100,1000)
gCondition.acquire()
while gMoney money:
if gTimes > gTotalTimes:
gCondition.release()
return
print("%s,准备消费%d,剩余金额%d,不足!!!" % (threading.current_thread, money, gMoney))
gCondition.wait()
gMoney -= money
print("消费者%s,消费了%d,剩余金额%d"%(threading.current_thread,money,gMoney))
gCondition.release()
time.sleep(1)
def main():
for x in range(2):
t = Producer(name="生产者%d"%x)
t.start()
for x in range(3):
t = Consumer(name=‘消费者%d‘%x)
t.start()
if __name__ == ‘__main__‘:
main()
import threading
import random
import time
gMoney = 1000
gLock = threading.Lock()
gTimes = 0
gTotalTimes = 10
class Producer(threading.Thread):
def run(self):
global gMoney
global gTimes
global gTotalTimes
while True:
money = random.randint(100,1000)
gLock.acquire()
# if gTimes >= gTotalTimes:
# gLock.release()
# break
# gMoney += money
# gTimes += 1
# print("%s生产了%d的钱,现在总共有%d" % (threading.current_thread(), money, gMoney))
# gLock.release()
# time.sleep(1)
if gTimes gTotalTimes:
gMoney += money
gTimes += 1
print("%s生产了%d的钱,现在总共有%d"%(threading.current_thread(),money,gMoney))
gLock.release()
time.sleep(1)
else:
print("已经生产了10次, 停止生产")
gLock.release()
break
class Consumer(threading.Thread):
def run(self):
global gMoney
while True:
money = random.randint(100,1000)
gLock.acquire()
if gMoney >= money:
gMoney -= money
print("消费者%s,消费了%d,还剩有%d"%(threading.current_thread(),money,gMoney))
else:
if gTimes >= gTotalTimes:
gLock.release()
break
print("余额不足,当前金额是%d, 需要消费的金额是%d"%(gMoney,money))
gLock.release()
time.sleep(1)
def main():
for x in range(2):
t = Producer(name="生产者%d"%x)
t.start()
for x in range(3):
t = Consumer(name=‘消费者%d‘%x)
t.start()
if __name__ == ‘__main__‘:
main()
from queue import Queue
import time
import threading
# q.put(2)
# q.put(1)
# q.put(3)
#
# print(q.qsize())
# print(q.full())
# print(q.empty())
# print(q.get())
def set_value(q):
index = 0
while True:
q.put(index)
index += 1
time.sleep(3)
def get_value(q):
while True:
print(q.get())
def main():
q = Queue(4)
t1 = threading.Thread(target=set_value,args=[q])
t2 = threading.Thread(target=get_value,args=[q])
t1.start()
t2.start()
if __name__ == ‘__main__‘:
main()
import threading
import time
class CodingThread(threading.Thread):
def run(self):
for i in range(3):
print("正在写代码%s"%threading.current_thread())
time.sleep(1)
class DrawingThread(threading.Thread):
def run(self):
for i in range(3):
print("正在画画%s"%threading.current_thread())
time.sleep(1)
def main():
# 创建一个子线程
t1 = CodingThread()
t1.start()
t2 = DrawingThread()
t2.start()
if __name__ == ‘__main__‘:
main()
from selenium import webdriver
import time
driver_path = r"G:\Crawler and Data\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get(‘https://www.baidu.com/‘)
# 通过id 的方式获取
inputTag = driver.find_element_by_id(‘kw‘)
inputTag.send_keys(‘python‘)
time.sleep(3)
driver.close() # 关闭页面
# driver.quit() # 关闭整个浏览器
from selenium import webdriver
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver_path = r"G:\Crawler and Data\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get(‘https://www.baidu.com/‘)
# 等待10秒后找这个对应的id标签, 因为是错误的所以等待10秒后报错
# 如果是正确的id 标签, 找到后直接继续执行, 不会等10 秒
WebDriverWait(driver,10).until(
EC.presence_of_element_located((By.ID,‘shjdkah‘))
)
from selenium import webdriver
import time
driver_path = r"G:\Crawler and Data\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get(‘https://www.baidu.com/‘)
# 打开豆瓣
driver.execute_script(‘window.open("https://www.douban.com/")‘)
# 但是当前的driver还是停留在baidu页面
print(driver.current_url)
print(driver.window_handles) # 窗口句柄 看看现在打开的窗口有什么
driver.switch_to.window(driver.window_handles[1]) # 切换窗口
print(driver.current_url)
driver.close() # 关闭页面
driver.switch_to.window(driver.window_handles[0]) # 切换窗口
import threading
VALUE = 0
gLock = threading.Lock() # 创建锁
def add_value():
global VALUE
gLock.acquire()
for x in range(1000000):
VALUE += 1
gLock.release()
print("value,%d"%VALUE)
def main():
for x in range(2):
t = threading.Thread(target=add_value)
t.start()
if __name__ == ‘__main__‘:
main()
from selenium import webdriver
options = webdriver.ChromeOptions()
# 设置代理
options.add_argument("--proxy-server-http://1.197.203.158:9999")
driver_path = r"G:\Crawler and Data\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path,chrome_options=options)
driver.get(‘http://httpbin.org/ip‘)