python爬虫实践——爬取京东商品信息
2020-12-13 02:40
标签:sel webdriver exec comm 查找 pen import 滚轮 click python爬虫实践——爬取京东商品信息 标签:sel webdriver exec comm 查找 pen import 滚轮 click 原文地址:https://www.cnblogs.com/lweiser/p/11047871.html 1 ‘‘‘
2 爬取京东商品信息:
3 请求url:
4 https://www.jd.com/
5 提取商品信息:
6 1.商品详情页
7 2.商品名称
8 3.商品价格
9 4.评价人数
10 5.商品商家
11 ‘‘‘
12 from selenium import webdriver
13 from selenium.webdriver.common.keys import Keys
14 import time
15
16
17 def get_good(driver):
18 try:
19
20 # 通过JS控制滚轮滑动获取所有商品信息
21 js_code = ‘‘‘
22 window.scrollTo(0,5000);
23 ‘‘‘
24 driver.execute_script(js_code) # 执行js代码
25
26 # 等待数据加载
27 time.sleep(2)
28
29 # 3、查找所有商品div
30 # good_div = driver.find_element_by_id(‘J_goodsList‘)
31 good_list = driver.find_elements_by_class_name(‘gl-item‘)
32 n = 1
33 for good in good_list:
34 # 根据属性选择器查找
35 # 商品链接
36 good_url = good.find_element_by_css_selector(
37 ‘.p-img a‘).get_attribute(‘href‘)
38
39 # 商品名称
40 good_name = good.find_element_by_css_selector(
41 ‘.p-name em‘).text.replace("\n", "--")
42
43 # 商品价格
44 good_price = good.find_element_by_class_name(
45 ‘p-price‘).text.replace("\n", ":")
46
47 # 评价人数
48 good_commit = good.find_element_by_class_name(
49 ‘p-commit‘).text.replace("\n", " ")
50
51 good_content = f‘‘‘
52 商品链接: {good_url}
53 商品名称: {good_name}
54 商品价格: {good_price}
55 评价人数: {good_commit}
56 \n
57 ‘‘‘
58 print(good_content)
59 with open(‘jd.txt‘, ‘a‘, encoding=‘utf-8‘) as f:
60 f.write(good_content)
61
62 next_tag = driver.find_element_by_class_name(‘pn-next‘)
63 next_tag.click()
64
65 time.sleep(2)
66
67 # 递归调用函数
68 get_good(driver)
69
70 time.sleep(10)
71
72 finally:
73 driver.close()
74
75
76 if __name__ == ‘__main__‘:
77
78 good_name = input(‘请输入爬取商品信息:‘).strip()
79
80 driver = webdriver.Chrome()
81 driver.implicitly_wait(10)
82 # 1、往京东主页发送请求
83 driver.get(‘https://www.jd.com/‘)
84
85 # 2、输入商品名称,并回车搜索
86 input_tag = driver.find_element_by_id(‘key‘)
87 input_tag.send_keys(good_name)
88 input_tag.send_keys(Keys.ENTER)
89 time.sleep(2)
90
91 get_good(driver)