1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
| import requests import re import os
for i in range(0,4): url = 'https://www.mzitu.com/page/%s/'%(i)
headers = { 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', 'cache-control': 'max-age=0', 'cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1571725635,1571743086; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1571743406', 'if-modified-since': 'Sun, 20 Oct 2019 15:13:50 GMT', 'referer': 'https://www.mzitu.com/page/2/', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36' }
response = requests.get(url,headers=headers) html = response.text meizi_url_list = re.findall('', html)
if not os.path.exists('download'): os.mkdir('download')
for meizili in meizi_url_list: for i in range(1, 10): urlli = meizili + '/%s' % (i)
headers = { 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', 'cache-control': 'max-age=0', 'cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1571725635,1571743086; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1571743406', 'if-modified-since': 'Sun, 20 Oct 2019 15:13:50 GMT', 'referer': 'https://www.mzitu.com/page/2/', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36' }
responseli = requests.get(urlli,headers=headers) htmlli = responseli.text
pattern = r'img\ssrc="(.+?)"\sa' try: meizili_url = re.findall(pattern, htmlli)[0] print(meizili_url, type(meizili_url)) headers = { 'Referer': meizili, 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36', } aaa = requests.get(meizili_url, headers=headers).content
print(aaa) filename = 'download/' + meizili_url.split('/')[-1] with open(filename, 'wb') as pic: pic.write(aaa) except: pass
|