1. Get URL parameters
from urllib import parse
url = 'https://docs.python.org/3.5/search.html?q=parse&check_keywords=yes&area=default'
parseResult = parse.urlparse(url)
print(parseResult)
# ParseResult(scheme='https', netloc='docs.python.org', path='/3.5/search.html', params='', query='q=parse&check_keywords=yes&area=default', fragment='')
param_dict = parse.parse_qs(parseResult.query)
print(param_dict)
# {'q': ['parse'], 'check_keywords': ['yes'], 'area': ['default']}
q = param_dict['q'][0]
print(q)
'parse'
# # Note: The plus sign will be decoded and may sometimes not be what we want
d = parse.parse_qs('proxy=183.222.102.178:8080&task=XXXXX|5-3+2')
print(d)
2, urlencode
from urllib import parse
query = {"name": "walker", "age": 99}
d = parse.urlencode(query)
print(d)
# name=walker&age=99
3、quote/quote_plus
from urllib import parse
d = parse.quote('a&b/c') #Uncoded slash
print(d)
# a%26b/c
d1 = parse.quote_plus('a&b/c') #Slash is coded
print(d1)
# a%26b%2Fc
4、unquote/unquote_plus
from urllib import parse
d = parse.unquote('1+2') # No decoding of the plus sign
print(d)
# 1+2
d1 = parse.unquote_plus('1+2') # Decode the plus sign into a space
print(d1)
# 1 2
5. The parameter after the URL is dict
from urllib import parse
def qs(url):
query = parse.urlparse(url).query
return dict([(k, v[0]) for k, v in parse.parse_qs(query).items()])
print(qs('http://url/api?param=2¶m2=4'))
# {'param': '2', 'param2': '4'}
6. URL parameter splicing
from urllib import parse
def url_add_params(url, **params):
pr = parse.urlparse(url)
query = dict(parse.parse_qsl(pr.query))
query.update(params)
pr_list = list(pr)
pr_list[4] = parse.urlencode(query)
return parse.ParseResult(*pr_list).geturl()
if __name__ == "__main__":
url = 'http://bbs.163.com/viewthread.php'
data = {"name": "hero", "111": "222"}
print(url_add_params(url, **data))
# result : http://bbs.163.com/viewthread.php?111=222&name=hero
7, other functions
rows
from urllib import parse
d = parse.urljoin('http://www.oschina.com/tieba', 'index.php')
print(d)
# http://www.oschina.com/index.php
d1 = parse.urljoin('http://www.oschina.com/tieba/', 'index.php')
print(d1)
# http://www.oschina.com/tieba/index.php
urlsplit
Urlplit is similar to urlparse, but it doesn’t segment URL parameters. For URLs that follow rfc2396, each path segment supports parameters. This returns a tuple of only five elements
from urllib import parse
url = parse.urlsplit('http://www.baidu.com/index.php?username=guol')
print(url)
# SplitResult(scheme='http', netloc='www.baidu.com', path='/index.php', query='username=guol', fragment='')
urlunsplit
Use urlplit format to combine into a URL, the elements passed must be 5, or directly recombine the decomposed tuples
from urllib import parse
d = parse.urlunsplit(("https", "i.cnblogs.com", "EditPosts.aspx", "a=a", "b=b"))
print(d)
# https://i.cnblogs.com/EditPosts.aspx?a=a#b=b
urlparse
Urlparse (parse URL to component, URL must start with http: /)
from urllib import parse
d = parse.urlparse("https://i.cnblogs.com/EditPosts.aspx?opt=1")
print(d)
# ParseResult(scheme='https', netloc='i.cnblogs.com', path='/EditPosts.aspx', params='', query='opt=1', fragment='')
urlunparse
Using the format of urlparse to form a URL, the return of urlparse can be combined directly
from urllib import parse
data = parse.urlparse("https://i.cnblogs.com:80/EditPosts.aspx?opt=1")
print(parse.urlunparse(data))
# https://i.cnblogs.com:80/EditPosts.aspx?opt=1