我是懒虫 发表于 2021-3-9 17:16:27

网站翻页失败

爬行https://www.turners.co.nz/Cars/Used-Cars-for-Sale 网站上的图片数据在翻页时出现问题。无论设置多少页,都只返回第一页的结果。代码如下
# -*- coding:utf8 -*-
import requests
from bs4 import BeautifulSoup

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
def RequestWithPageno(pageno=1):
    form_data = {
      'sortorder': 0,
      'pagesize': 24,
      'pageno': pageno}
    # url = 'https://www.turners.co.nz/Cars/Used-Cars-for-Sale/?sortorder=0&pagesize=24&pageno='+str(pageno)
    response = requests.get(url, headers=headers, data=form_data)
    soup = BeautifulSoup(response.text, 'html.parser')
    divs = soup.select('#searchResultsContainer > div')
    for div in divs:
      #显示第一个车辆的信息即可
      goodnumber = div.get("data-goodnumber")
      print(goodnumber)
      break
# url = 'https://www.turners.co.nz/Cars/Used-Cars-for-Sale/?sortorder=0&pagesize=24&pageno=1'
url = 'https://www.turners.co.nz/Cars/Used-Cars-for-Sale'
if __name__ == "__main__":
    pageno = 1
    while True:
      print(pageno)
      RequestWithPageno(pageno=pageno)
      pageno += 1

现实星空 发表于 2021-3-13 00:48:36

其实请求的不是https://www.turners.co.nz/Cars/Used-Cars-for-Sale/?sortorder=0&pagesize=24&pageno=2这个地址实际上是https://www.turners.co.nz/Client/car/SearchList这个地址。
post 的data
{
    "pageno": 3,
    "filters": {
      "sortorder": "0",
      "pagesize": "24",
      "pageno": 3,
      "searchfor": null,
      "trans": null,
      "drivetype": null,
      "featurestechnology": null,
      "featuresinterior": null,
      "featuresperformance": null,
      "featuresother": null,
      "financefrom": null,
      "financeto": null,
      "fuelefficiencyrating": null,
      "safetyrating": null,
      "hasonroadcosts": null,
      "isnznew": null,
      "hasnoreserve": null,
      "hascga": null,
      "isdiscounted": null,
      "iscertified": null,
      "fuels": null,
      "seats": null,
      "colours": null,
      "types": null,
      "bodystyles": null,
      "make": null,
      "models": null,
      "locations": null,
      "industry": null,
      "category": null,
      "subcategories": null,
      "yearfrom": null,
      "yearto": null,
      "odofrom": null,
      "odoto": null,
      "pricefrom": null,
      "priceto": null,
      "enginefrom": null,
      "engineto": null,
      "salemethods": null,
      "regstatus": null,
      "custom": null
    }
}

茬兀呰秂 发表于 2021-4-4 01:00:47

# -*- coding:utf8 -*-
import requests
from bs4 import BeautifulSoup
import json
import time
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
def RequestWithPageno(pageno=1):
    form_data = {
    "pageno": pageno,
    "filters": {
      "sortorder": "0",
      "pagesize": "24",
      "pageno": pageno}
    }
    print(form_data)
    url = 'https://www.turners.co.nz/Client/car/SearchList'
    response = requests.post(url=url, headers=headers, data=form_data)
    jsondata = json.loads(response.text)
    print('totalResults:'+str(jsondata['totalResults']))
    print('startIndex:'+str(jsondata['startIndex']))
    print('endIndex:'+str(jsondata['endIndex']))
    text = response.text.encode('utf-8').decode('unicode_escape')
    # print(text)
    soup = BeautifulSoup(text, 'html.parser')
    # print(soup)
    divs = soup.findAll('div', class_='car-summary-images')
    # print(divs)
    for div in divs:
      # 显示第一个车辆的信息即可
      print(div.a['href'])
      print(div.a.img['alt'])
      break
# url = 'https://www.turners.co.nz/Cars/Used-Cars-for-Sale/?sortorder=0&pagesize=24&pageno=1'
if __name__ == "__main__":
    pageno = 5
    while True:
      RequestWithPageno(pageno=pageno)
      time.sleep(60)
      pageno += 1
页: [1]
查看完整版本: 网站翻页失败