Core code:
def ipPools(numPage):
headers = randomHeads()
url = 'http://www.xicidaili.com/nn/'
saveFsvFile = open('ips.csv', 'wb')
writer = csv.writer(saveFsvFile)
for num in range(1, numPage + 1):
full_url = url + str(num)
re = requests.get(full_url, headers=headers)
soup = BeautifulSoup(re.text, 'lxml')
res = soup.find(id="ip_list").find_all('tr')
for item in res:
try:
temp = []
tds = item.find_all('td')
proxyIp = tds[1].text.encode("utf-8")
proxyPort = tds[2].text.encode("utf-8")
temp.append(proxyIp)
temp.append(proxyPort)
writer.writerow(temp)
print('保存为excel成功!')
except IndexError:
pass
Points to note.
Be sure to convert str to bytes :
str.encode("utf-8")
python36 file method to open
open('ips.csv', 'wb') change wb to w I got an error right here. If there is the same error can, as a reference it!