简单的 Web Scraper 不返回数据

发布于 2025-01-11 14:33:58 字数 1216 浏览 0 评论 0原文

我试图从网页中抓取数据，但它返回 [“F”] [“F”]，如果没有检索到数据，它应该这样做。请参阅下面的代码

import pandas as pd
import datetime
import requests
from requests.exceptions import ConnectionError
from bs4 import BeautifulSoup

def web_content_div(web_content, class_path):
    web_content_div = web_content.find_all('div', {"class": class_path})
    try:
        spans = web_content_div[0].find_all('span')
        texts =[span.get_text() for span in spans]
    except IndexError:
        texts=[]
        return texts

def real_time_price(stock_code):
    url = 'https://finance.yahoo.com/quote/' + stock_code + '?p=' + stock_code + '%27&.tsrc=fin-srch'
    # 'https://finance.yahoo.com/quote/' + stock_code + '?p=' + stock_code + '&.tsrc=fin-srch'
    try:
        r = requests.get(url)
        web_content = BeautifulSoup(r.text, 'lxml')
        texts = web_content_div(web_content, 'My(6px) Pos(r) smarthphone_Mt(6px) W(100&%')
        if texts != []:
           price, change = texts[0], texts[1] 
        else:
            price, change = ["F"], ["F"]
    except ConnectionError:
        price, change = [""], [""]

    return price, change
    

Stock = ["BRK-B"]

print(real_time_price("BRK-B"))`

原文

Im trying to scrape data from a webpage but its returning ["F"] ["F"] which is what it should do if no data has been retrieved. Please see Code below

import pandas as pd
import datetime
import requests
from requests.exceptions import ConnectionError
from bs4 import BeautifulSoup

def web_content_div(web_content, class_path):
    web_content_div = web_content.find_all('div', {"class": class_path})
    try:
        spans = web_content_div[0].find_all('span')
        texts =[span.get_text() for span in spans]
    except IndexError:
        texts=[]
        return texts

def real_time_price(stock_code):
    url = 'https://finance.yahoo.com/quote/' + stock_code + '?p=' + stock_code + '%27&.tsrc=fin-srch'
    # 'https://finance.yahoo.com/quote/' + stock_code + '?p=' + stock_code + '&.tsrc=fin-srch'
    try:
        r = requests.get(url)
        web_content = BeautifulSoup(r.text, 'lxml')
        texts = web_content_div(web_content, 'My(6px) Pos(r) smarthphone_Mt(6px) W(100&%')
        if texts != []:
           price, change = texts[0], texts[1] 
        else:
            price, change = ["F"], ["F"]
    except ConnectionError:
        price, change = [""], [""]

    return price, change
    

Stock = ["BRK-B"]

print(real_time_price("BRK-B"))`

分享到QQ

分享到微博