解决这些错误如何无法混合str和非STR参数

发布于 2025-02-09 01:56:25 字数 4576 浏览 2 评论 0原文

他们向我展示了这些错误,即无法混合str和非STR参数我正在编写一些刮擦代码,并经历了上述错误。我的代码正在关注。但是,通过CMD运行脚本后。我遇到错误。这些错误表明我无法混合使用STR和非STR参数,并且我对如何解决这个问题感到困惑。任何帮助将不胜感激。

import scrapy
from scrapy.http import Request
from selenium import webdriver


class TestSpider(scrapy.Spider):
    name = 'test'
    start_urls = ['https://www.ifep.ro/justice/lawyers/lawyerspanel.aspx']
    custom_settings = {
        'CONCURRENT_REQUESTS_PER_DOMAIN': 1,
        'DOWNLOAD_DELAY': 1,
        'USER_AGENT': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
        }
    
    
    
    def parse(self, response):
        books = response.xpath("//div[@class='list-group']//@href").extract()
        for book in books:
            url = response.urljoin(book)
            if url.endswith('.ro') or url.endswith('.ro/'):
                continue
            yield Request(url, callback=self.parse_book)
            
    def __init__(self):
      self.driver = webdriver.Chrome('C:\Program Files (x86)\chromedriver.exe')
    
    def parse_book(self, response):
        
        title=response.xpath("//span[@id='HeadingContent_lblTitle']//text()").get()
        d1=response.xpath("//div[@class='col-md-10']//p[1]//text()").get()
        d1=d1.strip()
        d2=response.xpath("//div[@class='col-md-10']//p[2]//text()").get()
        d2=d2.strip()
        d3=response.xpath("//div[@class='col-md-10']//p[3]//span//text()").get()
        d3=d3.strip()
        d4=response.xpath("//div[@class='col-md-10']//p[4]//text()").get()
        d4=d4.strip()
        
      
        
      
        yield{
            "title1":title,
            "title2":d1,
            "title3":d2,
            "title4":d3,
            "title5":d4,
        }
        
       
        self.driver.get(response.url)

        while True:
            next = self.driver.find_element_by_xpath("//a[@id='MainContent_PagerTop_NavNext']")
            try:
                next.click()

                # get the data and write it to scrapy items
            except:
                break
            
            
            yield response.follow(next, callback = self.parse)

错误

Traceback (most recent call last):
  File "e:\python39\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "e:\python39\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "e:\python39\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
    for r in iterable:
  File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
    for r in iterable:
  File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 342, in <genexpr>
    return (_set_referer(r) for r in result or ())
  File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
    for r in iterable:
  File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 40, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
    for r in iterable:
  File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
    for r in iterable:
  File "C:\Users\Dell\sample\sample\spiders\test.py", line 64, in parse_book
    yield response.follow(next, callback = self.parse)
  File "e:\python39\lib\site-packages\scrapy\http\response\text.py", line 169, in follow
    return super().follow(
  File "e:\python39\lib\site-packages\scrapy\http\response\__init__.py", line 155, in follow
    url = self.urljoin(url)
  File "e:\python39\lib\site-packages\scrapy\http\response\text.py", line 102, in urljoin
    return urljoin(get_base_url(self), url)
  File "e:\python39\lib\urllib\parse.py", line 532, in urljoin
    base, url, _coerce_result = _coerce_args(base, url)
  File "e:\python39\lib\urllib\parse.py", line 125, in _coerce_args
    raise TypeError("Cannot mix str and non-str arguments")
TypeError: Cannot mix str and non-str arguments

They show me these error that Cannot mix str and non-str argumentsI'm writing some scraping codes and experiencing an error as above. My code is following.However, upon running the script through the cmd. I am getting errors. These errors suggest that I cannot mix str and non-str arguments and I am confused over how to deal with this problem. Any help would be appreciated.

import scrapy
from scrapy.http import Request
from selenium import webdriver


class TestSpider(scrapy.Spider):
    name = 'test'
    start_urls = ['https://www.ifep.ro/justice/lawyers/lawyerspanel.aspx']
    custom_settings = {
        'CONCURRENT_REQUESTS_PER_DOMAIN': 1,
        'DOWNLOAD_DELAY': 1,
        'USER_AGENT': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
        }
    
    
    
    def parse(self, response):
        books = response.xpath("//div[@class='list-group']//@href").extract()
        for book in books:
            url = response.urljoin(book)
            if url.endswith('.ro') or url.endswith('.ro/'):
                continue
            yield Request(url, callback=self.parse_book)
            
    def __init__(self):
      self.driver = webdriver.Chrome('C:\Program Files (x86)\chromedriver.exe')
    
    def parse_book(self, response):
        
        title=response.xpath("//span[@id='HeadingContent_lblTitle']//text()").get()
        d1=response.xpath("//div[@class='col-md-10']//p[1]//text()").get()
        d1=d1.strip()
        d2=response.xpath("//div[@class='col-md-10']//p[2]//text()").get()
        d2=d2.strip()
        d3=response.xpath("//div[@class='col-md-10']//p[3]//span//text()").get()
        d3=d3.strip()
        d4=response.xpath("//div[@class='col-md-10']//p[4]//text()").get()
        d4=d4.strip()
        
      
        
      
        yield{
            "title1":title,
            "title2":d1,
            "title3":d2,
            "title4":d3,
            "title5":d4,
        }
        
       
        self.driver.get(response.url)

        while True:
            next = self.driver.find_element_by_xpath("//a[@id='MainContent_PagerTop_NavNext']")
            try:
                next.click()

                # get the data and write it to scrapy items
            except:
                break
            
            
            yield response.follow(next, callback = self.parse)

Error

Traceback (most recent call last):
  File "e:\python39\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
    yield next(it)
  File "e:\python39\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "e:\python39\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
    return next(self.data)
  File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
    for r in iterable:
  File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
    for r in iterable:
  File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 342, in <genexpr>
    return (_set_referer(r) for r in result or ())
  File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
    for r in iterable:
  File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 40, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
    for r in iterable:
  File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
    for r in iterable:
  File "C:\Users\Dell\sample\sample\spiders\test.py", line 64, in parse_book
    yield response.follow(next, callback = self.parse)
  File "e:\python39\lib\site-packages\scrapy\http\response\text.py", line 169, in follow
    return super().follow(
  File "e:\python39\lib\site-packages\scrapy\http\response\__init__.py", line 155, in follow
    url = self.urljoin(url)
  File "e:\python39\lib\site-packages\scrapy\http\response\text.py", line 102, in urljoin
    return urljoin(get_base_url(self), url)
  File "e:\python39\lib\urllib\parse.py", line 532, in urljoin
    base, url, _coerce_result = _coerce_args(base, url)
  File "e:\python39\lib\urllib\parse.py", line 125, in _coerce_args
    raise TypeError("Cannot mix str and non-str arguments")
TypeError: Cannot mix str and non-str arguments

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文