Scrape Google Map提供了这些错误无效参数:' url'必须是字符串

发布于 2025-02-04 21:13:38 字数 1806 浏览 2 评论 0原文

他们将向我提供这些错误selenium.common.exceptions.invalidargumentException:消息:无效参数:'url'必须是字符串当我运行我的代码时,他们会在刮擦时刮擦第一个标题。第二个标题他们将为我提供您的URL为字符串的错误。 =!3m1!4b1“ rel =” nofollow noreferrer“> https://www.google.com/maps/maps/search/uk+ddentist/@31.568888259,74.2388013,12z/data =!

import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

options = webdriver.ChromeOptions()

# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")

chrome_driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)


def supplyvan_scraper():
    with chrome_driver as driver:
        driver.implicitly_wait(15)
        URL = 'https://www.google.com/maps/search/dentist+uk/@31.5688259,74.2388013,12z/data=!3m1!4b1'
        driver.get(URL)
        time.sleep(3)
        page_links = [element.get_attribute('href') for element in
                      driver.find_elements(By.XPATH, "//div[@class='Nv2PK Q2HXcd THOPZb']//a")]

        # visit all the links
        for link in page_links:
            driver.get(link)
            time.sleep(2)
            title = driver.find_element(By.XPATH, "//h1[@class='DUwDvf fontHeadlineLarge']//span").text

            # parse title for all the links
            print(title)
            # driver.back()
            time.sleep(2)

        time.sleep(2)
        driver.quit()


supplyvan_scraper()

They will provide me with these errorsselenium.common.exceptions.InvalidArgumentException: Message: invalid argument: 'URL' must be a string when I run my code they will scrape the first title when they go to scrape the second title they will provide me the error that your URL is a string these is page link https://www.google.com/maps/search/uk+dentist/@31.5688259,74.2388013,12z/data=!3m1!4b1

import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

options = webdriver.ChromeOptions()

# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")

chrome_driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)


def supplyvan_scraper():
    with chrome_driver as driver:
        driver.implicitly_wait(15)
        URL = 'https://www.google.com/maps/search/dentist+uk/@31.5688259,74.2388013,12z/data=!3m1!4b1'
        driver.get(URL)
        time.sleep(3)
        page_links = [element.get_attribute('href') for element in
                      driver.find_elements(By.XPATH, "//div[@class='Nv2PK Q2HXcd THOPZb']//a")]

        # visit all the links
        for link in page_links:
            driver.get(link)
            time.sleep(2)
            title = driver.find_element(By.XPATH, "//h1[@class='DUwDvf fontHeadlineLarge']//span").text

            # parse title for all the links
            print(title)
            # driver.back()
            time.sleep(2)

        time.sleep(2)
        driver.quit()


supplyvan_scraper()

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

幻梦 2025-02-11 21:13:38

就我而言,除了一切正常外,它在标题元素的选择中都显示出一些麻烦。

import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
options = webdriver.ChromeOptions()

# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")

chrome_driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)
data=[]
def supplyvan_scraper():
    with chrome_driver as driver:
        driver.implicitly_wait(15)
        URL = 'https://www.google.com/maps/search/dentist+uk/@31.5688259,74.2388013,12z/data=!3m1!4b1'
        driver.get(URL)
        time.sleep(3)
        page_links = [element.get_attribute('href') for element in driver.find_elements(By.XPATH, '//*[@class="hfpxzc"]')]

        # visit all the links
        for link in page_links:
            print(link)
            driver.get(link)
            time.sleep(2)
            title = driver.find_element(By.XPATH, '//h1[@class="DUwDvf fontHeadlineLarge"]/span[1]').text
            data.append(title)
            # parse title for all the links
            #print(title)
            # driver.back()
            time.sleep(2)

        # time.sleep(2)
        # driver.quit()


supplyvan_scraper()

df = pd.DataFrame(data,columns=['title'])

print(df)

输出:

                                  title
0                          YOR Dental at MediaCityUK
1               Blossom Dental Care & Implant Studio
2                         Blackbrook Dental Practice
3                          Greenwich Dental Practice
4                                        NHS Dentist
5                               London Dental Centre
6                          New Cross Dental Practice
7                                       Dental Works
8                             Huntingdon Dental Care
9  Advance Dental Care - Private & NHS | Invisali...

In my case,it showed a little bit trouble in title element selection,besides everything is working fine.

import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
options = webdriver.ChromeOptions()

# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")

chrome_driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)
data=[]
def supplyvan_scraper():
    with chrome_driver as driver:
        driver.implicitly_wait(15)
        URL = 'https://www.google.com/maps/search/dentist+uk/@31.5688259,74.2388013,12z/data=!3m1!4b1'
        driver.get(URL)
        time.sleep(3)
        page_links = [element.get_attribute('href') for element in driver.find_elements(By.XPATH, '//*[@class="hfpxzc"]')]

        # visit all the links
        for link in page_links:
            print(link)
            driver.get(link)
            time.sleep(2)
            title = driver.find_element(By.XPATH, '//h1[@class="DUwDvf fontHeadlineLarge"]/span[1]').text
            data.append(title)
            # parse title for all the links
            #print(title)
            # driver.back()
            time.sleep(2)

        # time.sleep(2)
        # driver.quit()


supplyvan_scraper()

df = pd.DataFrame(data,columns=['title'])

print(df)

Output:

                                  title
0                          YOR Dental at MediaCityUK
1               Blossom Dental Care & Implant Studio
2                         Blackbrook Dental Practice
3                          Greenwich Dental Practice
4                                        NHS Dentist
5                               London Dental Centre
6                          New Cross Dental Practice
7                                       Dental Works
8                             Huntingdon Dental Care
9  Advance Dental Care - Private & NHS | Invisali...
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文