python爬虫:爬取京东商品信息
2020-12-13 05:01
标签:close odi selenium css common enc send format windows python爬虫:爬取京东商品信息 标签:close odi selenium css common enc send format windows 原文地址:https://www.cnblogs.com/Auraro997/p/11128158.html‘‘‘
初级版
‘‘‘
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome(r‘C:\Users\Auraro\Desktop/chromedriver.exe‘)
num = 1
try:
driver.implicitly_wait(10)
driver.get(‘https://www.jd.com/‘)
input_tag = driver.find_element_by_id(‘key‘)
input_tag.send_keys(‘墨菲定律‘)
input_tag.send_keys(Keys.ENTER)
time.sleep(5)
good_list = driver.find_elements_by_class_name(‘gl-item‘)
for good in good_list:
# print(good)
# 商品名称
good_name = good.find_element_by_css_selector(‘.p-name em‘).text
print(good_name)
# 商品链接
good_url = good.find_element_by_css_selector(‘.p-name a‘).get_attribute(‘href‘)
print(good_url)
# 商品价格
good_price = good.find_element_by_class_name(‘p-price‘).text
print(good_price)
# 商品评价
good_commit = good.find_element_by_class_name(‘p-commit‘).text
good_content = ‘‘‘
num={}
商品名称:{}
商品链接:{}
商品价格:{}
商品的评价条数:{}
\n
‘‘‘.format(num,good_name,good_url,good_price,good_commit)
print(good_content)
with open(‘jd.txt‘,‘a‘,encoding=‘utf-8‘) as f:
f.write(good_content)
print(‘商品信息写入成功!‘)
finally:
driver.close()
‘‘‘
终极版
‘‘‘
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome(r‘C:\Users\Auraro\Desktop/chromedriver.exe‘)
num = 1
try:
driver.implicitly_wait(10)
driver.get(‘https://www.jd.com/‘)
input_tag = driver.find_element_by_id(‘key‘)
input_tag.send_keys(‘墨菲定律‘)
input_tag.send_keys(Keys.ENTER)
time.sleep(5)
# 下拉滑动5000px
js_code = ‘‘‘
windows.scrollTo(0,5000)
‘‘‘
driver.execute_script(js_code)
# 等待5s待商品数据加载
time.sleep(5)
good_list = driver.find_elements_by_class_name(‘gl-item‘)
for good in good_list:
# print(good)
# 商品名称
good_name = good.find_element_by_css_selector(‘.p-name em‘).text
print(good_name)
# 商品链接
good_url = good.find_element_by_css_selector(‘.p-name a‘).get_attribute(‘href‘)
print(good_url)
# 商品价格
good_price = good.find_element_by_class_name(‘p-price‘).text
print(good_price)
# 商品评价
good_commit = good.find_element_by_class_name(‘p-commit‘).text
good_content = ‘‘‘
num={}
商品名称:{}
商品链接:{}
商品价格:{}
商品的评价条数:{}
\n
‘‘‘.format(num,good_name,good_url,good_price,good_commit)
print(good_content)
with open(‘jd.txt‘,‘a‘,encoding=‘utf-8‘) as f:
f.write(good_content)
num += 1
print(‘商品信息写入成功!‘)
next_tag = driver.find_element_by_class_name(‘pn-next‘)
next_tag.click
time.sleep(10)
finally:
driver.close()
‘‘‘
狂暴版
‘‘‘
from selenium import webdriver
from selenium.webdriver.common.keys import Keys # 键盘按键操作
import time
#
def get_good(driver):
num = 1
try:
time.sleep(5)
#下拉滑动5000px
js_code = ‘‘‘
window.scrollTo(0,5000)
‘‘‘
driver.execute_script(js_code)
time.sleep(5) # 商品信息加载,等待5s
good_list = driver.find_elements_by_class_name(‘gl-item‘)
for good in good_list:
# 商品名称
good_name = good.find_element_by_css_selector(‘.p-name em‘).text
# 商品链接
good_url = good.find_element_by_css_selector(‘.p-name a‘).get_attribute(‘href‘)
# 商品价格
good_price = good.find_element_by_class_name(‘p-price‘).text
# 商品评价
good_commit = good.find_element_by_class_name(‘p-commit‘).text
good_content = ‘‘‘
num:{}
商品名称:{}
商品链接:{}
商品价格:{}
商品评论:{}
\n
‘‘‘.format(num,good_name,good_url,good_price,good_commit)
print(good_content)
# 保存数据写入文件
with open(‘京东商品信息爬取.txt‘, ‘a‘, encoding=‘utf-8‘) as f:
f.write(good_content)
num += 1
# 找到页面下一页点击
next_tag = driver.find_element_by_class_name(‘pn-next‘)
next_tag.click()
time.sleep(5)
#递归调用函数本身
get_good(driver)
finally:
driver.close()
if __name__ == ‘__main__‘:
driver = webdriver.Chrome(r‘C:\Users\Auraro\Desktop/chromedriver.exe‘)
try:
driver.implicitly_wait(10)
driver.get(‘https://www.jd.com/‘)
input = driver.find_element_by_id(‘key‘)
input.send_keys(‘人间失格‘)
input.send_keys(Keys.ENTER)
get_good(driver)
print(‘商品信息写入完成‘)
finally:
driver.close()