1 urllib库(了解)
2021-02-09 08:14
标签:网页 下载 net 数据 用法 readlines eve ror retrieve 1、urllib库之urlopen函数 2、urllib库之urlretrieve下载数据 3、urllib库之解码与编码 4、urllib库之urlparse 1 urllib库(了解) 标签:网页 下载 net 数据 用法 readlines eve ror retrieve 原文地址:https://www.cnblogs.com/sruzzg/p/13062881.htmlurllib
是python
的基本库之一,内置四大模块,即request
,error
,parse
,robotparser
,常用的request
,error
,一个用于发送HTTP请求,一个用于处理请求的错误。parse
用于对URL的处理,拆分,合并等 1 """urllib库之urlopen函数"""
2
3
4 #from urllib import request
5 import urllib.request
6
7 # 使用urlopen获得网页代码
8 resp = urllib.request.urlopen(‘http://www.baidu.com/‘)
9
10 # print(resp.read()) #读取的是编码数据
11 print(resp.read().decode(‘utf-8‘))
12 #
1 """urllib库之urlretrieve下载数据"""
2
3
4 import urllib.request
5
6 # 使用urlretrieve下载数据
7 urllib.request.urlretrieve(‘http://www.baidu.com‘, ‘baidu.html‘)
8
9 # 使用urlretrieve下载图片
10 urllib.request.urlretrieve(‘http://img01.tooopen.com/Downs/images/2011/10/30/sy_20111030205827520061.jpg‘, ‘image.jpg‘)
1 """urllib库之解码与编码"""
2
3
4 import urllib.parse
5 import urllib.request
6
7 #urlencode函数的用法
8 params = {‘name‘:‘张三‘, ‘age‘:18, ‘年级‘:‘一年级‘}
9 # 编码
10 result = urllib.parse.urlencode(params)
11 #name=%E5%BC%A0%E4%B8%89&age=18&%E5%B9%B4%E7%BA%A7=%E4%B8%80%E5%B9%B4%E7%BA%A7
12 print(result)
13
14 # url = ‘https://www.baidu.com/s?wd=刘德华‘
15 url = ‘https://www.baidu.com/s?‘
16 param = {‘wd‘:‘刘德华‘}
17 qs = urllib.parse.urlencode(param)
18 url += qs
19 print(url)
20 resp = urllib.request.urlopen(url)
21 print(resp.read().decode(‘utf-8‘))
22
23
24 params = {‘name‘:‘张三‘, ‘age‘:18, ‘年级‘:‘一年级‘}
25 # 编码
26 result = urllib.parse.urlencode(params)
27 # name=%E5%BC%A0%E4%B8%89&age=18&%E5%B9%B4%E7%BA%A7=%E4%B8%80%E5%B9%B4%E7%BA%A7
28 print(result)
29 # 解码
30 result2 = urllib.parse.parse_qs(result)
31 #{‘name‘: [‘张三‘], ‘age‘: [‘18‘], ‘年级‘: [‘一年级‘]}
32 print(result2)
33
34
35 params2 = "张三李四"
36 # 编码
37 rs = urllib.parse.quote(params2)
38 print(‘rs=%s‘, rs)
39 # 解码
40 rs2 = urllib.parse.unquote(rs)
41 print(‘rs2=%s‘, rs2)
1 """urllib库之urlparse"""
2
3
4 import urllib.parse
5
6 url = ‘http://www.google.com/search;hello?hl=en&q=urlparse&btnG=Google+Search#1‘
7 result = urllib.parse.urlparse(url)
8 print(‘result = {}‘.format(result))
9 print(result.scheme) # http
10 print(result.netloc) # www.google.com
11 print(result.path) # /search
12 print(result.params) # hello (用的极少)
13 print(result.query) # hl=en&q=urlparse&btnG=Google+Search
14 print(result.fragment) # 1
15 result2 = urllib.parse.urlunparse(result)
16 print(‘result2 = {}‘.format(result2))
17
18
19 url2 = ‘http://www.google.com/search;hello?hl=en&q=urlparse&btnG=Google+Search#1‘
20 result3 = urllib.parse.urlsplit(url2)
21 print(‘result3 = {}‘.format(result3))
22 print(result.scheme) # http
23 print(result.netloc) # www.google.com
24 print(result.path) # /search
25 print(result.query) # hl=en&q=urlparse&btnG=Google+Search
26 print(result.fragment) # 1
27 result4 = urllib.parse.urlunsplit(result3)
28 print(‘result4 = {}‘.format(result4))
29
30
31 url3 = urllib.parse.urljoin(‘http://www.google.com/search?‘, ‘hl=en&q=urlparse&btnG=Google+Search#1‘)
32 print(‘url3 = {}‘.format(url3))
33 url4 = urllib.parse.urljoin(‘http://www.google.com/search?/‘, ‘hl=en&q=urlparse&btnG=Google+Search#1‘)
34 print(‘url4 = {}‘.format(url4))
35 url5 = urllib.parse.urljoin(‘http://www.google.com/search‘, ‘?hl=en&q=urlparse&btnG=Google+Search#1‘)
36 print(‘url5 = {}‘.format(url5))
上一篇:5.线性回归算法