我得到 json.decoder.JSONDecodeError: 期望用双引号括起来的属性名称:第 1 行第 2 列(字符 1)
import json
import re
import scrapy
import ast
class Scraper(scrapy.spiders.Spider):
name = 'scraper'
#mandatory=None
def __init__(self, page=None, config=None, *args, **kwargs):
self.page =page
self.config = json.loads(config)
print(type(self.config))
#self.mandatory_fields = mandatory.split(',')
super(Scraper, self).__init__(*args, **kwargs)
def start_requests(self):
self.logger.info('Start url: %s' % self.page)
yield scrapy.Request(url=self.page, callback=self.parse)
def parse(self, response):
item = dict(url=response.url)
# iterate over all keys in config and extract value for each of thems
for key in self.config:
print("++"+key)
# extract the data for the key from the html response
#print("++++++++++"+type(key))
print("+++"+self.config)
res = response.css(self.config[key]).extract()
# if the label is any type of url then make sure we have an absolute url instead of a relative one
if bool(re.search('url', key.lower())):
res = self.get_absolute_url(response, res)
item[key] = ' '.join(elem for elem in res).strip()
# ensure that all mandatory fields are present, else discard this scrape
mandatory_fileds_present = True
for key in self.mandatory_fields:
if not item[key]:
mandatory_fileds_present = False
if mandatory_fileds_present:
yield dict(data=item)
@staticmethod
def get_absolute_url(response, urls):
final_url = []
for url in urls:
if not bool(re.match('^http', url)):
final_url.append(response.urljoin(url))
else:
final_url.append(url)
return final_url
我收到此错误: json.decoder.JSONDecodeError:期望用双引号括起来的属性名称:第1行第2列(字符1)
我将CSS选择器作为配置中的参数传递:scrapy scraper -a page=appeloffres.com/appels-offres/telecom -a config ='{"Nom":".table_taille td > b::text","des":".desc_text b::text"}'
当我做: self.config = json.loads(config) 有什么解决办法吗???
import json
import re
import scrapy
import ast
class Scraper(scrapy.spiders.Spider):
name = 'scraper'
#mandatory=None
def __init__(self, page=None, config=None, *args, **kwargs):
self.page =page
self.config = json.loads(config)
print(type(self.config))
#self.mandatory_fields = mandatory.split(',')
super(Scraper, self).__init__(*args, **kwargs)
def start_requests(self):
self.logger.info('Start url: %s' % self.page)
yield scrapy.Request(url=self.page, callback=self.parse)
def parse(self, response):
item = dict(url=response.url)
# iterate over all keys in config and extract value for each of thems
for key in self.config:
print("++"+key)
# extract the data for the key from the html response
#print("++++++++++"+type(key))
print("+++"+self.config)
res = response.css(self.config[key]).extract()
# if the label is any type of url then make sure we have an absolute url instead of a relative one
if bool(re.search('url', key.lower())):
res = self.get_absolute_url(response, res)
item[key] = ' '.join(elem for elem in res).strip()
# ensure that all mandatory fields are present, else discard this scrape
mandatory_fileds_present = True
for key in self.mandatory_fields:
if not item[key]:
mandatory_fileds_present = False
if mandatory_fileds_present:
yield dict(data=item)
@staticmethod
def get_absolute_url(response, urls):
final_url = []
for url in urls:
if not bool(re.match('^http', url)):
final_url.append(response.urljoin(url))
else:
final_url.append(url)
return final_url
im getting this error :
json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
im passing css selector as argument in config : scrapy crawl scraper -a page=appeloffres.com/appels-offres/telecom -a config='{"Nom":".table_taille td > b::text","des":".desc_text b::text"}'
when im doing : self.config = json.loads(config)
any solution???
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
这意味着您正在尝试将变量转换为
loads
方法无法转换的字典。json.loads() 将字符串转换为字典。
例如:
这就是 json.loads 的工作原理,即将字符串转换为字典。
但是,如果您尝试这样的操作:
这意味着并非每个字符串都可以转换为字典对象。您应该调试并仔细检查 config 变量保存的内容。
This means that you are trying to convert a variable into dict that
loads
method cannot convert.json.loads() converts a string into a dictionary.
For example:
This is how json.loads work, i.e., converting a string to dict.
However, if you try something like this:
This means that you not every string can be converted into a dictionary object. You should debug and carefully check what
config
variable holds.