用scrapy尝试爬取游戏饰品价格的爬虫数据没有写入到数据库

发布于 2022-09-11 19:42:45 字数 3062 浏览 25 评论 0

用scrapy尝试爬取游戏饰品价格的爬虫，在setting文件配置好的情况下（pipeline=300），似乎pipeline不能连接到数据库（因为连简单的插入语句也不能成功执行），而且pycharm没有报错，有人能帮忙看一下哪里有问题吗？

import pymysql

class DecoratesPipeline(object):

def __init__(self):
    self.conn = pymysql.connect(host="127.0.0.1", user="root", password="123456",
                           db='test_food', charset="utf8")
    # 1.建立数据库的连接
    # self.conn = pymysql.connect(host="127.0.0.1", user="root", passwd="123456", db="test_food", charset="utf8")
    # 2.创建一个游标cursor，用来操作表
    self.cursor = self.conn.cursor()

def process_item(self, item, spider):
    # try:
    #     for j in range(len(item['d_name'])):
    #         d_name = item['d_name'][j]
    #         h_name = item['h_name'][j]
    #         # price=item['price']
    #         sell = item['sell'][j]
    #         buy = item['buy'][j]
    #         # 写入数据库
    #         sql = "INSERT INTO dd(sell,buy) VALUE (%s,%s)", (sell, buy)
    #         # self.conn.query(sql)
    #         self.cursor.execute(sql)
    #         # # 4.提交操作
    #         self.conn.commit()
    # except Exception as error:
    #     # 出现错误打印错误日志
    #     log(error)
    sql2 = 'INSERT INTO USER1(name, age) VALUES ("abcx", 23)'
    self.cursor.execute(sql2)
    self.conn.commit()
    return item
def close_spider(self,spider):
    self.cursor.close()
    self.conn.close()### 问题描述

这个是spiders的程序
import scrapy
from decorates.items import DecoratesItem
from scrapy.http import Request
from urllib import request
import re
import win32api
import urllib

class PpigSpider(scrapy.Spider):

name = 'ppig'
allowed_domains = ['buff.163.com']
#start_urls = ['https://buff.163.com/market/goods?goods_id=1#tab=history']
def start_requests(self):
    yield Request('https://buff.163.com/market/goods?goods_id=1#tab=history',
                   headers={'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36(KHMTL,like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0"})
def parse(self, response):
    item = DecoratesItem()
    item['d_name'] = response.xpath('//h1//text()').extract()[0]
    item['h_name'] = response.xpath('//span/a//text()').extract()[2]
    item['sell'] = response.xpath('//li[@class="selling on"]//text()').extract()[0]
    item['buy'] = response.xpath('//a/span[@id="buy_num"]//text()').extract()[0]
    # pat = '[1-9]\d*\.\d\d|0\.\d*[1-9]\d'
    # data = re.compile(pat).findall(str(response.body))
    item['d_price'] = response.xpath('//td//strong[@class="f_Strong"]"]').extract()[0]
    #//td[@class="t_Left"]//strong
    #//strong//text()

    print(item['d_price'])
    # try:
    #items['price'] = re.compile(pat).findall(response + '|$')[0]
    for i in range(2,10):
        nexturl="https://buff.163.com/market/goods?goods_id="+str(i)+"#tab=history"

        yield Request(nexturl,callback=self.parse,headers={'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36(KHMTL,like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0"})

分享到QQ

分享到微博