解决这些错误如何无法混合str和非STR参数
他们向我展示了这些错误,即无法混合str和非STR参数
我正在编写一些刮擦代码,并经历了上述错误。我的代码正在关注。但是,通过CMD运行脚本后。我遇到错误。这些错误表明我无法混合使用STR和非STR参数,并且我对如何解决这个问题感到困惑。任何帮助将不胜感激。
import scrapy
from scrapy.http import Request
from selenium import webdriver
class TestSpider(scrapy.Spider):
name = 'test'
start_urls = ['https://www.ifep.ro/justice/lawyers/lawyerspanel.aspx']
custom_settings = {
'CONCURRENT_REQUESTS_PER_DOMAIN': 1,
'DOWNLOAD_DELAY': 1,
'USER_AGENT': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
}
def parse(self, response):
books = response.xpath("//div[@class='list-group']//@href").extract()
for book in books:
url = response.urljoin(book)
if url.endswith('.ro') or url.endswith('.ro/'):
continue
yield Request(url, callback=self.parse_book)
def __init__(self):
self.driver = webdriver.Chrome('C:\Program Files (x86)\chromedriver.exe')
def parse_book(self, response):
title=response.xpath("//span[@id='HeadingContent_lblTitle']//text()").get()
d1=response.xpath("//div[@class='col-md-10']//p[1]//text()").get()
d1=d1.strip()
d2=response.xpath("//div[@class='col-md-10']//p[2]//text()").get()
d2=d2.strip()
d3=response.xpath("//div[@class='col-md-10']//p[3]//span//text()").get()
d3=d3.strip()
d4=response.xpath("//div[@class='col-md-10']//p[4]//text()").get()
d4=d4.strip()
yield{
"title1":title,
"title2":d1,
"title3":d2,
"title4":d3,
"title5":d4,
}
self.driver.get(response.url)
while True:
next = self.driver.find_element_by_xpath("//a[@id='MainContent_PagerTop_NavNext']")
try:
next.click()
# get the data and write it to scrapy items
except:
break
yield response.follow(next, callback = self.parse)
错误
Traceback (most recent call last):
File "e:\python39\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
yield next(it)
File "e:\python39\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
return next(self.data)
File "e:\python39\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
return next(self.data)
File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
for r in iterable:
File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
for x in result:
File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
for r in iterable:
File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 342, in <genexpr>
return (_set_referer(r) for r in result or ())
File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
for r in iterable:
File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 40, in <genexpr>
return (r for r in result or () if _filter(r))
File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
for r in iterable:
File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
for r in iterable:
File "C:\Users\Dell\sample\sample\spiders\test.py", line 64, in parse_book
yield response.follow(next, callback = self.parse)
File "e:\python39\lib\site-packages\scrapy\http\response\text.py", line 169, in follow
return super().follow(
File "e:\python39\lib\site-packages\scrapy\http\response\__init__.py", line 155, in follow
url = self.urljoin(url)
File "e:\python39\lib\site-packages\scrapy\http\response\text.py", line 102, in urljoin
return urljoin(get_base_url(self), url)
File "e:\python39\lib\urllib\parse.py", line 532, in urljoin
base, url, _coerce_result = _coerce_args(base, url)
File "e:\python39\lib\urllib\parse.py", line 125, in _coerce_args
raise TypeError("Cannot mix str and non-str arguments")
TypeError: Cannot mix str and non-str arguments
They show me these error that Cannot mix str and non-str arguments
I'm writing some scraping codes and experiencing an error as above. My code is following.However, upon running the script through the cmd. I am getting errors. These errors suggest that I cannot mix str and non-str arguments and I am confused over how to deal with this problem. Any help would be appreciated.
import scrapy
from scrapy.http import Request
from selenium import webdriver
class TestSpider(scrapy.Spider):
name = 'test'
start_urls = ['https://www.ifep.ro/justice/lawyers/lawyerspanel.aspx']
custom_settings = {
'CONCURRENT_REQUESTS_PER_DOMAIN': 1,
'DOWNLOAD_DELAY': 1,
'USER_AGENT': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
}
def parse(self, response):
books = response.xpath("//div[@class='list-group']//@href").extract()
for book in books:
url = response.urljoin(book)
if url.endswith('.ro') or url.endswith('.ro/'):
continue
yield Request(url, callback=self.parse_book)
def __init__(self):
self.driver = webdriver.Chrome('C:\Program Files (x86)\chromedriver.exe')
def parse_book(self, response):
title=response.xpath("//span[@id='HeadingContent_lblTitle']//text()").get()
d1=response.xpath("//div[@class='col-md-10']//p[1]//text()").get()
d1=d1.strip()
d2=response.xpath("//div[@class='col-md-10']//p[2]//text()").get()
d2=d2.strip()
d3=response.xpath("//div[@class='col-md-10']//p[3]//span//text()").get()
d3=d3.strip()
d4=response.xpath("//div[@class='col-md-10']//p[4]//text()").get()
d4=d4.strip()
yield{
"title1":title,
"title2":d1,
"title3":d2,
"title4":d3,
"title5":d4,
}
self.driver.get(response.url)
while True:
next = self.driver.find_element_by_xpath("//a[@id='MainContent_PagerTop_NavNext']")
try:
next.click()
# get the data and write it to scrapy items
except:
break
yield response.follow(next, callback = self.parse)
Error
Traceback (most recent call last):
File "e:\python39\lib\site-packages\scrapy\utils\defer.py", line 120, in iter_errback
yield next(it)
File "e:\python39\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
return next(self.data)
File "e:\python39\lib\site-packages\scrapy\utils\python.py", line 353, in __next__
return next(self.data)
File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
for r in iterable:
File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
for x in result:
File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
for r in iterable:
File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 342, in <genexpr>
return (_set_referer(r) for r in result or ())
File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
for r in iterable:
File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 40, in <genexpr>
return (r for r in result or () if _filter(r))
File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
for r in iterable:
File "e:\python39\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "e:\python39\lib\site-packages\scrapy\core\spidermw.py", line 56, in _evaluate_iterable
for r in iterable:
File "C:\Users\Dell\sample\sample\spiders\test.py", line 64, in parse_book
yield response.follow(next, callback = self.parse)
File "e:\python39\lib\site-packages\scrapy\http\response\text.py", line 169, in follow
return super().follow(
File "e:\python39\lib\site-packages\scrapy\http\response\__init__.py", line 155, in follow
url = self.urljoin(url)
File "e:\python39\lib\site-packages\scrapy\http\response\text.py", line 102, in urljoin
return urljoin(get_base_url(self), url)
File "e:\python39\lib\urllib\parse.py", line 532, in urljoin
base, url, _coerce_result = _coerce_args(base, url)
File "e:\python39\lib\urllib\parse.py", line 125, in _coerce_args
raise TypeError("Cannot mix str and non-str arguments")
TypeError: Cannot mix str and non-str arguments
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论