用scrapy尝试爬取游戏饰品价格的爬虫 数据没有写入到数据库
用scrapy尝试爬取游戏饰品价格的爬虫,在setting文件配置好的情况下(pipeline=300),似乎pipeline不能连接到数据库(因为连简单的插入语句也不能成功执行),而且pycharm没有报错,有人能帮忙看一下哪里有问题吗?
import pymysql
class DecoratesPipeline(object):
def __init__(self):
self.conn = pymysql.connect(host="127.0.0.1", user="root", password="123456",
db='test_food', charset="utf8")
# 1.建立数据库的连接
# self.conn = pymysql.connect(host="127.0.0.1", user="root", passwd="123456", db="test_food", charset="utf8")
# 2.创建一个游标cursor,用来操作表
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
# try:
# for j in range(len(item['d_name'])):
# d_name = item['d_name'][j]
# h_name = item['h_name'][j]
# # price=item['price']
# sell = item['sell'][j]
# buy = item['buy'][j]
# # 写入数据库
# sql = "INSERT INTO dd(sell,buy) VALUE (%s,%s)", (sell, buy)
# # self.conn.query(sql)
# self.cursor.execute(sql)
# # # 4.提交操作
# self.conn.commit()
# except Exception as error:
# # 出现错误打印错误日志
# log(error)
sql2 = 'INSERT INTO USER1(name, age) VALUES ("abcx", 23)'
self.cursor.execute(sql2)
self.conn.commit()
return item
def close_spider(self,spider):
self.cursor.close()
self.conn.close()### 问题描述
这个是spiders的程序
import scrapy
from decorates.items import DecoratesItem
from scrapy.http import Request
from urllib import request
import re
import win32api
import urllib
class PpigSpider(scrapy.Spider):
name = 'ppig'
allowed_domains = ['buff.163.com']
#start_urls = ['https://buff.163.com/market/goods?goods_id=1#tab=history']
def start_requests(self):
yield Request('https://buff.163.com/market/goods?goods_id=1#tab=history',
headers={'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36(KHMTL,like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0"})
def parse(self, response):
item = DecoratesItem()
item['d_name'] = response.xpath('//h1//text()').extract()[0]
item['h_name'] = response.xpath('//span/a//text()').extract()[2]
item['sell'] = response.xpath('//li[@class="selling on"]//text()').extract()[0]
item['buy'] = response.xpath('//a/span[@id="buy_num"]//text()').extract()[0]
# pat = '[1-9]\d*\.\d\d|0\.\d*[1-9]\d'
# data = re.compile(pat).findall(str(response.body))
item['d_price'] = response.xpath('//td//strong[@class="f_Strong"]"]').extract()[0]
#//td[@class="t_Left"]//strong
#//strong//text()
print(item['d_price'])
# try:
#items['price'] = re.compile(pat).findall(response + '|$')[0]
for i in range(2,10):
nexturl="https://buff.163.com/market/goods?goods_id="+str(i)+"#tab=history"
yield Request(nexturl,callback=self.parse,headers={'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36(KHMTL,like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0"})
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论