Selenium Web刮擦给了我错误的行，我在哪里做错了？

发布于 2025-02-13 15:17:17 字数 1347 浏览 2 评论 0原文

您好，我是网站报废网站，

这是我的代码

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

search_url = "https://dealt.ae/collections/laptops"
driver.get(search_url)
time.sleep(3)

ALL_ITEMS = []
item = ["", "", "", ""]
all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
grades = all_laptops[0].find_elements(By.XPATH, '//div[@class="product-top"]/span[@class="product-metafild"]')
for i in range(len(grades)):
    item[0] = grades[i].text
    description = all_laptops[0].find_elements(By.CLASS_NAME, 'product-title')
    item[1] = description[i].text
    old_price= all_laptops[0].find_elements(By.CLASS_NAME, 'old-price')
    item[2] = old_price[i].text
    special_price= all_laptops[0].find_elements(By.CLASS_NAME, 'special-price')
    item[3] = special_price[i].text
    print(item)
    ALL_ITEMS.append(item.copy())    
                
for item in ALL_ITEMS:
    print(item)

my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('laptop.csv', index=False, header=False)

，它给我一个错误的输出（行），其他人的产品和其他产品的价格等级。请告诉我我在哪里做错了。

谢谢

原文

Hello I am web scrapping a site

here is my code

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

search_url = "https://dealt.ae/collections/laptops"
driver.get(search_url)
time.sleep(3)

ALL_ITEMS = []
item = ["", "", "", ""]
all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
grades = all_laptops[0].find_elements(By.XPATH, '//div[@class="product-top"]/span[@class="product-metafild"]')
for i in range(len(grades)):
    item[0] = grades[i].text
    description = all_laptops[0].find_elements(By.CLASS_NAME, 'product-title')
    item[1] = description[i].text
    old_price= all_laptops[0].find_elements(By.CLASS_NAME, 'old-price')
    item[2] = old_price[i].text
    special_price= all_laptops[0].find_elements(By.CLASS_NAME, 'special-price')
    item[3] = special_price[i].text
    print(item)
    ALL_ITEMS.append(item.copy())    
                
for item in ALL_ITEMS:
    print(item)

my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('laptop.csv', index=False, header=False)

It is giving me a wrong output (rows), with grade of someone else product and price of someone else product. Please tell me where i did wrong.

Thanks

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

浪荡不羁 2025-02-20 15:17:18

您不会在循环中为每个笔记本电脑提供信息。这是一种更清洁的方法：

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import pandas as pd
import time


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
search_url = "https://dealt.ae/collections/laptops"
driver.get(search_url)
time.sleep(3)

all_items = []

#find all_laptop containers
all_laptops = driver.find_elements(By.CSS_SELECTOR, '.grid-item.col-6.col-md-4.col-lg-3')

for row in all_laptops:
    # Go through each row and create find the elements inside that container we need(laptop)
    grade = row.find_element(By.CSS_SELECTOR, 'span.product-metafild')
    grade = grade.text.strip() if grade else ''

    description = row.find_elements(By.CLASS_NAME, 'product-title')
    description = description[0].text.strip() if description else ''

    old_price = row.find_elements(By.CSS_SELECTOR, '.old-price')
    old_price = old_price[0].text.strip() if old_price else ''
    
    special_price = row.find_elements(By.CLASS_NAME, 'special-price')
    special_price = special_price[0].text.strip() if special_price else ''
    
    # Create a temporary dictionary and append to the main list
    temp_dict = {'Grade':grade, 'Description': description, 'Old Price': old_price, 'Special Price': special_price}
    all_items.append(temp_dict)

my_df = pd.DataFrame(all_items)
my_df.to_csv('laptop.csv', index=False, header=False)

driver.quit()

You're not pulling the information for each laptop within the loop. Here's a cleaner way of doing it:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import pandas as pd
import time


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
search_url = "https://dealt.ae/collections/laptops"
driver.get(search_url)
time.sleep(3)

all_items = []

#find all_laptop containers
all_laptops = driver.find_elements(By.CSS_SELECTOR, '.grid-item.col-6.col-md-4.col-lg-3')

for row in all_laptops:
    # Go through each row and create find the elements inside that container we need(laptop)
    grade = row.find_element(By.CSS_SELECTOR, 'span.product-metafild')
    grade = grade.text.strip() if grade else ''

    description = row.find_elements(By.CLASS_NAME, 'product-title')
    description = description[0].text.strip() if description else ''

    old_price = row.find_elements(By.CSS_SELECTOR, '.old-price')
    old_price = old_price[0].text.strip() if old_price else ''
    
    special_price = row.find_elements(By.CLASS_NAME, 'special-price')
    special_price = special_price[0].text.strip() if special_price else ''
    
    # Create a temporary dictionary and append to the main list
    temp_dict = {'Grade':grade, 'Description': description, 'Old Price': old_price, 'Special Price': special_price}
    all_items.append(temp_dict)

my_df = pd.DataFrame(all_items)
my_df.to_csv('laptop.csv', index=False, header=False)

driver.quit()

回复收藏 0 原文

瞳孔里扚悲伤 2025-02-20 15:17:18

仅使用API URL，请刮擦所需的数据静态方式，您还可以轻松地使分页

import requests
from bs4 import BeautifulSoup
import pandas as pd
url = "https://dealt.ae/collections/laptops?page=1"

req=requests.get(url)
print(req)
soup = BeautifulSoup(req.content, "lxml")
lst={}

description =[x.get_text().strip() for x in soup.select('.product-title span')][2:][::2]

old_price =[x.get_text().strip() for x in soup.select('.old-price')][2:][::2]

special_price =[x.get_text().strip() for x in soup.select('.special-price')][2:][::2]

df= pd.DataFrame(data=list(zip(description,old_price,special_price)))
print(df)

输出：

BLACK and GOLD PARTY DECORATIONS Perfect Adult...  Dhs. 1,675.00    Dhs. 815.00
1   Lenovo L450 Thinkpad Laptop  - Intel Core i5-5...  Dhs. 1,630.00    Dhs. 760.00
2   Dell Latitude E7470 Laptop - Intel Core I5 6th...  Dhs. 1,700.00    Dhs. 830.00
3   Dell Latitude 3160 11.6 Inch Touchscreen Displ...  Dhs. 2,900.00  Dhs. 1,599.00
4   Lenovo T450 Thinkpad Laptop  - Intel Core i5-4...  Dhs. 2,900.00  Dhs. 1,550.00
5   Hp Elitebook Folio 9480m 14.1" Display Ci5-4th...  Dhs. 2,725.00  Dhs. 1,865.00
6   Macbook Air A1466 (2017) Laptop With 13.3-Inch...  Dhs. 3,314.00  Dhs. 1,465.00
7   Macbook Air A1466 (2017) Laptop With 13.3-Inch...  Dhs. 2,624.00  Dhs. 1,165.00
8   Dell Precision 7510 Laptop - Intel Core I7 6th...  Dhs. 2,509.00  Dhs. 1,115.00
9   Dell Precision 3510  Laptop - Intel Core I5 6t...  Dhs. 2,854.00  Dhs. 1,265.00
10  Dell Latitude E7490 Laptop - Intel Core I5 7th...  Dhs. 2,739.00  Dhs. 1,215.00
11  Dell Latitude E7490 Laptop - Intel Core I5 7th...  Dhs. 3,084.00  Dhs. 1,365.00
12  Dell Latitude E7480 Touch Screen Laptop - Inte...  Dhs. 2,969.00  Dhs. 1,315.00
13  Dell Latitude E7480 Touch Screen Laptop - Inte...  Dhs. 3,084.00  Dhs. 1,365.00
14  Dell Latitude E7480 Laptop - Intel Core I7 6th...  Dhs. 2,969.00  Dhs. 1,315.00
15  Dell Latitude E7480 Laptop - Intel Core I7 6th...  Dhs. 2,739.00  Dhs. 1,215.00
16  Dell Latitude E7480 Laptop - Intel Core I5 7th...  Dhs. 2,624.00  Dhs. 1,165.00
17  Dell Latitude E7480 Laptop - Intel Core I5 7th...  Dhs. 2,739.00  Dhs. 1,215.00
18  Dell Latitude E7480 Laptop - Intel Core I5 6th...  Dhs. 2,325.00  Dhs. 1,035.00
19  Dell Latitude E7480 Laptop - Intel Core I5 6th...  Dhs. 2,210.00    Dhs. 985.00
20  Dell Latitude E7470 Laptop - Intel Core I7 6th...  Dhs. 2,049.00    Dhs. 915.00

Just using API url, scrape the desired data static way where you also can make the pagination easily

import requests
from bs4 import BeautifulSoup
import pandas as pd
url = "https://dealt.ae/collections/laptops?page=1"

req=requests.get(url)
print(req)
soup = BeautifulSoup(req.content, "lxml")
lst={}

description =[x.get_text().strip() for x in soup.select('.product-title span')][2:][::2]

old_price =[x.get_text().strip() for x in soup.select('.old-price')][2:][::2]

special_price =[x.get_text().strip() for x in soup.select('.special-price')][2:][::2]

df= pd.DataFrame(data=list(zip(description,old_price,special_price)))
print(df)

Output:

BLACK and GOLD PARTY DECORATIONS Perfect Adult...  Dhs. 1,675.00    Dhs. 815.00
1   Lenovo L450 Thinkpad Laptop  - Intel Core i5-5...  Dhs. 1,630.00    Dhs. 760.00
2   Dell Latitude E7470 Laptop - Intel Core I5 6th...  Dhs. 1,700.00    Dhs. 830.00
3   Dell Latitude 3160 11.6 Inch Touchscreen Displ...  Dhs. 2,900.00  Dhs. 1,599.00
4   Lenovo T450 Thinkpad Laptop  - Intel Core i5-4...  Dhs. 2,900.00  Dhs. 1,550.00
5   Hp Elitebook Folio 9480m 14.1" Display Ci5-4th...  Dhs. 2,725.00  Dhs. 1,865.00
6   Macbook Air A1466 (2017) Laptop With 13.3-Inch...  Dhs. 3,314.00  Dhs. 1,465.00
7   Macbook Air A1466 (2017) Laptop With 13.3-Inch...  Dhs. 2,624.00  Dhs. 1,165.00
8   Dell Precision 7510 Laptop - Intel Core I7 6th...  Dhs. 2,509.00  Dhs. 1,115.00
9   Dell Precision 3510  Laptop - Intel Core I5 6t...  Dhs. 2,854.00  Dhs. 1,265.00
10  Dell Latitude E7490 Laptop - Intel Core I5 7th...  Dhs. 2,739.00  Dhs. 1,215.00
11  Dell Latitude E7490 Laptop - Intel Core I5 7th...  Dhs. 3,084.00  Dhs. 1,365.00
12  Dell Latitude E7480 Touch Screen Laptop - Inte...  Dhs. 2,969.00  Dhs. 1,315.00
13  Dell Latitude E7480 Touch Screen Laptop - Inte...  Dhs. 3,084.00  Dhs. 1,365.00
14  Dell Latitude E7480 Laptop - Intel Core I7 6th...  Dhs. 2,969.00  Dhs. 1,315.00
15  Dell Latitude E7480 Laptop - Intel Core I7 6th...  Dhs. 2,739.00  Dhs. 1,215.00
16  Dell Latitude E7480 Laptop - Intel Core I5 7th...  Dhs. 2,624.00  Dhs. 1,165.00
17  Dell Latitude E7480 Laptop - Intel Core I5 7th...  Dhs. 2,739.00  Dhs. 1,215.00
18  Dell Latitude E7480 Laptop - Intel Core I5 6th...  Dhs. 2,325.00  Dhs. 1,035.00
19  Dell Latitude E7480 Laptop - Intel Core I5 6th...  Dhs. 2,210.00    Dhs. 985.00
20  Dell Latitude E7470 Laptop - Intel Core I7 6th...  Dhs. 2,049.00    Dhs. 915.00

回复收藏 0 原文

埋情葬爱 2025-02-20 15:17:17

这是因为您没有选择完美的班级名称，这是所有笔记本电脑中常见的事情，并且拥有有关该特定笔记本电脑的所有细节。

这是完整的工作代码

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# set the page parameter its upto 14 for 24 items per page
search_url = "https://dealt.ae/collections/laptops?page=1"
driver.get(search_url)
time.sleep(3)

ALL_ITEMS = []
item = ["", "", "", ""]
all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
all_grids = all_laptops[0].find_elements(By.CLASS_NAME, 'grid-item')
print(f"Getting {len(all_grids)} laptops")
for i in range(len(all_grids)):
    all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
    all_grids = all_laptops[0].find_elements(By.CLASS_NAME, 'grid-item')
    grades = all_grids[i].find_element(By.CLASS_NAME, 'product-metafild')
    item[0] = grades.text
    description =all_grids[i].find_element(By.CLASS_NAME, 'product-title')
    item[1] = description.text
    try:
        old_price= all_grids[i].find_element(By.CLASS_NAME, 'old-price')
        item[2] = old_price.text
        special_price= all_grids[i].find_element(By.CLASS_NAME, 'special-price')
        item[3] = special_price.text
    except NoSuchElementException:
        regular_price= all_grids[i].find_element(By.CLASS_NAME, 'price-regular')
        item[2]= regular_price.text
        item[3]= ""
    print(item)
    ALL_ITEMS.append(item.copy())    
                
for item in ALL_ITEMS:
    print(item)

my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('laptop_13.csv', index=False, header=False)

It is because your are not selecting the perfect class name which is a common thing in all laptops and have all the details about that particular laptop.

Here is the full working code

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# set the page parameter its upto 14 for 24 items per page
search_url = "https://dealt.ae/collections/laptops?page=1"
driver.get(search_url)
time.sleep(3)

ALL_ITEMS = []
item = ["", "", "", ""]
all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
all_grids = all_laptops[0].find_elements(By.CLASS_NAME, 'grid-item')
print(f"Getting {len(all_grids)} laptops")
for i in range(len(all_grids)):
    all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
    all_grids = all_laptops[0].find_elements(By.CLASS_NAME, 'grid-item')
    grades = all_grids[i].find_element(By.CLASS_NAME, 'product-metafild')
    item[0] = grades.text
    description =all_grids[i].find_element(By.CLASS_NAME, 'product-title')
    item[1] = description.text
    try:
        old_price= all_grids[i].find_element(By.CLASS_NAME, 'old-price')
        item[2] = old_price.text
        special_price= all_grids[i].find_element(By.CLASS_NAME, 'special-price')
        item[3] = special_price.text
    except NoSuchElementException:
        regular_price= all_grids[i].find_element(By.CLASS_NAME, 'price-regular')
        item[2]= regular_price.text
        item[3]= ""
    print(item)
    ALL_ITEMS.append(item.copy())    
                
for item in ALL_ITEMS:
    print(item)

my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('laptop_13.csv', index=False, header=False)

回复收藏 0 原文

~没有更多了~