自动获取代理ip小脚本
一个自动爬取代理ip的脚本分享给大家
import requests
from lxml import etree
import csv
import time
def pc(url):
header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}
response = requests.get(url=url, headers=header)
selector = etree.HTML(response.content.decode())
time.sleep(1)
for i in range(1,16):
time.sleep(1)
ip = selector.xpath('//*[@id="list"]/table/tbody/tr[%d]/td[1]/text()' %(i))[0]
port = selector.xpath('//*[@id="list"]/table/tbody/tr[%d]/td[2]/text()' %(i))[0]
write([ip,port])
def write(lis):
with open('代理.txt', 'a', newline="",encoding='utf-8') as f:
csw = csv.writer(f)
csw.writerow(lis)
if __name__=='__main__':
for i in range(1,3517):
print("正在打印第" str(i) "页ip")
url = 'https://www.kuaidaili.com/free/inha/' str(i)
pc(url)运行截图
注:
本文只提供技术分享,请勿用作其他非法用途。如果造成任何法律部后果 与本文作者无关
张