网站翻页失败
爬行https://www.turners.co.nz/Cars/Used-Cars-for-Sale 网站上的图片数据在翻页时出现问题。无论设置多少页,都只返回第一页的结果。代码如下# -*- coding:utf8 -*-
import requests
from bs4 import BeautifulSoup
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
def RequestWithPageno(pageno=1):
form_data = {
'sortorder': 0,
'pagesize': 24,
'pageno': pageno}
# url = 'https://www.turners.co.nz/Cars/Used-Cars-for-Sale/?sortorder=0&pagesize=24&pageno='+str(pageno)
response = requests.get(url, headers=headers, data=form_data)
soup = BeautifulSoup(response.text, 'html.parser')
divs = soup.select('#searchResultsContainer > div')
for div in divs:
#显示第一个车辆的信息即可
goodnumber = div.get("data-goodnumber")
print(goodnumber)
break
# url = 'https://www.turners.co.nz/Cars/Used-Cars-for-Sale/?sortorder=0&pagesize=24&pageno=1'
url = 'https://www.turners.co.nz/Cars/Used-Cars-for-Sale'
if __name__ == "__main__":
pageno = 1
while True:
print(pageno)
RequestWithPageno(pageno=pageno)
pageno += 1
其实请求的不是https://www.turners.co.nz/Cars/Used-Cars-for-Sale/?sortorder=0&pagesize=24&pageno=2这个地址实际上是https://www.turners.co.nz/Client/car/SearchList这个地址。
post 的data
{
"pageno": 3,
"filters": {
"sortorder": "0",
"pagesize": "24",
"pageno": 3,
"searchfor": null,
"trans": null,
"drivetype": null,
"featurestechnology": null,
"featuresinterior": null,
"featuresperformance": null,
"featuresother": null,
"financefrom": null,
"financeto": null,
"fuelefficiencyrating": null,
"safetyrating": null,
"hasonroadcosts": null,
"isnznew": null,
"hasnoreserve": null,
"hascga": null,
"isdiscounted": null,
"iscertified": null,
"fuels": null,
"seats": null,
"colours": null,
"types": null,
"bodystyles": null,
"make": null,
"models": null,
"locations": null,
"industry": null,
"category": null,
"subcategories": null,
"yearfrom": null,
"yearto": null,
"odofrom": null,
"odoto": null,
"pricefrom": null,
"priceto": null,
"enginefrom": null,
"engineto": null,
"salemethods": null,
"regstatus": null,
"custom": null
}
} # -*- coding:utf8 -*-
import requests
from bs4 import BeautifulSoup
import json
import time
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
def RequestWithPageno(pageno=1):
form_data = {
"pageno": pageno,
"filters": {
"sortorder": "0",
"pagesize": "24",
"pageno": pageno}
}
print(form_data)
url = 'https://www.turners.co.nz/Client/car/SearchList'
response = requests.post(url=url, headers=headers, data=form_data)
jsondata = json.loads(response.text)
print('totalResults:'+str(jsondata['totalResults']))
print('startIndex:'+str(jsondata['startIndex']))
print('endIndex:'+str(jsondata['endIndex']))
text = response.text.encode('utf-8').decode('unicode_escape')
# print(text)
soup = BeautifulSoup(text, 'html.parser')
# print(soup)
divs = soup.findAll('div', class_='car-summary-images')
# print(divs)
for div in divs:
# 显示第一个车辆的信息即可
print(div.a['href'])
print(div.a.img['alt'])
break
# url = 'https://www.turners.co.nz/Cars/Used-Cars-for-Sale/?sortorder=0&pagesize=24&pageno=1'
if __name__ == "__main__":
pageno = 5
while True:
RequestWithPageno(pageno=pageno)
time.sleep(60)
pageno += 1
页:
[1]