title_cover = " ".join(span.text for span in div.select("title absolute serif"))
title_sidebar = " ".join(span.text for span in div.select("bold"))
time_cover_and_sidebar = div.find('span', attrs={'class': 'upper'}).text
news_cover = f" {time_cover_and_sidebar} {place.upper()}, {title_cover} (TMW)"
news_sidebar = f" {time_cover_and_sidebar} {place.upper()}, {title_sidebar} (TMW)"
results_cover.append( [number, time_cover_and_sidebar, place, title_cover, news, link] )
results_sidebar.append( [number, time_cover_and_sidebar, place, title_sidebar, news, link])
import tkinter as tk # PEP8: `import *` is not preferred
from tkinter import ttk
from tkinter.scrolledtext import ScrolledText
import requests
import requests_cache
from bs4 import BeautifulSoup
import pandas as pd
# PEP8: all imports at the beginning
# --- functions --- # PEP8: all functions directly after imports
def get_data_for(place):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
results = []
response = requests.get(f'https://www.tuttomercatoweb.com/{place}/', headers=headers)
print('url:', response.url)
print('status:', response.status_code)
#print('html:', response.text[:1000])
soup = BeautifulSoup(response.content, 'html.parser')
news = soup.find_all('div', attrs={"class": "tcc-list-news"})
for number, each in enumerate(news):
for div in each.find_all("div"):
time = div.find('span', attrs={'class': 'hh serif'}).text
title = " ".join(span.text for span in div.select("a > span"))
news = f" {time} {place.upper()}, {title} (TMW)"
link = div.find('a')['href']
results.append( [number, time, place, title, news, link] )
return results
def all_titles():
global df
allnews = [] # local variable
for place in ['atalanta', 'bologna']:
print('search:', place)
results = get_data_for(place)
print('found:', len(results))
allnews += results
text_download.insert('end', f"search: {place}\nfound: {len(results)}\n")
df = pd.DataFrame(allnews, columns=['number', 'time', 'place', 'title', 'news', 'link'])
df = df.sort_values(by=['number', 'time', 'place', 'title'], ascending=[True, False, True, True])
df = df.reset_index()
listbox_title.delete('0', 'end')
for index, row in df.iterrows():
listbox_title.insert('end', row['news'])
def content(event=None): # `command=` executes without `event`, but `bind` executes with `event` - so it needs default value
# tuple
selection = listbox_title.curselection()
print('selection:', selection)
if selection:
item = df.iloc[selection[-1]]
#print('item:', item)
url = item['link']
#print('url:', url)
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
# keep page in database `SQLite`
# https://github.com/reclosedev/requests-cache
# https://sqlite.org/index.html
session = requests_cache.CachedSession('titles')
response = session.get(url, headers=headers)
#response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
content_download = "\n".join(item.get_text() for item in soup.select("div.text.mbottom"))
text_download.delete('1.0', 'end') # remove previous content)
text_download.insert('end', content_download)
# --- main ---
df = None
window = tk.Tk()
# ---
# [Tkinter: How to display Listbox with Scrollbar — furas.pl](https://blog.furas.pl/python-tkitner-how-to-display-listbox-with-scrollbar-gb.html)
frame_title = tk.Frame(window)
frame_title.pack(fill='both', expand=True, pady=5, padx=5)
listbox_title = tk.Listbox(frame_title, selectbackground="#960000", selectforeground="white", bg="white")
listbox_title.pack(side='left', fill='both', expand=True)
scrollbar_title = tk.Scrollbar(frame_title)
scrollbar_title.pack(side='left', fill='y')
scrollbar_title['command'] = listbox_title.yview
listbox_title.bind('<Double-Button-1>', content) # it executes `content(event)`
# ----
text_download = ScrolledText(window, bg="white")
text_download.pack(fill='both', expand=True, pady=0, padx=5)
# ----
buttons_frame = tk.Frame(window)
button1 = tk.Button(buttons_frame, text="View Titles", command=all_titles) # don't use `[]` to execute functions
button1.pack(side='left', pady=5, padx=5)
button2 = tk.Button(buttons_frame, text="View Content", command=content) # don't use `[]` to execute functions
button2.pack(side='left', pady=5, padx=(0,5))
I have a window with a button that scrapes the news headlines of a site into a listbox, then I have another button that scrapes the news content related to the selected headline and displays them in a textobox
Currently on the page I scrape all the titles, their timetable and their contents, but I would also like to add the title (with the relative content) that is on the cover of the page (this), and also all the titles (with the relative contents) that are on the right column of the page (this).
The ease is that the titles on the right column all always have the same unique html name, but the time is not directly visible because you have to open the link to get the time.
I have tried using these, but without success
title_cover = " ".join(span.text for span in div.select("title absolute serif"))
title_sidebar = " ".join(span.text for span in div.select("bold"))
time_cover_and_sidebar = div.find('span', attrs={'class': 'upper'}).text
news_cover = f" {time_cover_and_sidebar} {place.upper()}, {title_cover} (TMW)"
news_sidebar = f" {time_cover_and_sidebar} {place.upper()}, {title_sidebar} (TMW)"
results_cover.append( [number, time_cover_and_sidebar, place, title_cover, news, link] )
results_sidebar.append( [number, time_cover_and_sidebar, place, title_sidebar, news, link])
The output I would like to get is the same as the one I get with the code already provided (time, place/name team, title), with the difference that I would also like to insert time, titles and content of the cover and sidebar news
Complete executable code:
import tkinter as tk # PEP8: `import *` is not preferred
from tkinter import ttk
from tkinter.scrolledtext import ScrolledText
import requests
import requests_cache
from bs4 import BeautifulSoup
import pandas as pd
# PEP8: all imports at the beginning
# --- functions --- # PEP8: all functions directly after imports
def get_data_for(place):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
results = []
response = requests.get(f'https://www.tuttomercatoweb.com/{place}/', headers=headers)
print('url:', response.url)
print('status:', response.status_code)
#print('html:', response.text[:1000])
soup = BeautifulSoup(response.content, 'html.parser')
news = soup.find_all('div', attrs={"class": "tcc-list-news"})
for number, each in enumerate(news):
for div in each.find_all("div"):
time = div.find('span', attrs={'class': 'hh serif'}).text
title = " ".join(span.text for span in div.select("a > span"))
news = f" {time} {place.upper()}, {title} (TMW)"
link = div.find('a')['href']
results.append( [number, time, place, title, news, link] )
return results
def all_titles():
global df
allnews = [] # local variable
for place in ['atalanta', 'bologna']:
print('search:', place)
results = get_data_for(place)
print('found:', len(results))
allnews += results
text_download.insert('end', f"search: {place}\nfound: {len(results)}\n")
df = pd.DataFrame(allnews, columns=['number', 'time', 'place', 'title', 'news', 'link'])
df = df.sort_values(by=['number', 'time', 'place', 'title'], ascending=[True, False, True, True])
df = df.reset_index()
listbox_title.delete('0', 'end')
for index, row in df.iterrows():
listbox_title.insert('end', row['news'])
def content(event=None): # `command=` executes without `event`, but `bind` executes with `event` - so it needs default value
# tuple
selection = listbox_title.curselection()
print('selection:', selection)
if selection:
item = df.iloc[selection[-1]]
#print('item:', item)
url = item['link']
#print('url:', url)
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
# keep page in database `SQLite`
# https://github.com/reclosedev/requests-cache
# https://sqlite.org/index.html
session = requests_cache.CachedSession('titles')
response = session.get(url, headers=headers)
#response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
content_download = "\n".join(item.get_text() for item in soup.select("div.text.mbottom"))
text_download.delete('1.0', 'end') # remove previous content)
text_download.insert('end', content_download)
# --- main ---
df = None
window = tk.Tk()
# ---
# [Tkinter: How to display Listbox with Scrollbar — furas.pl](https://blog.furas.pl/python-tkitner-how-to-display-listbox-with-scrollbar-gb.html)
frame_title = tk.Frame(window)
frame_title.pack(fill='both', expand=True, pady=5, padx=5)
listbox_title = tk.Listbox(frame_title, selectbackground="#960000", selectforeground="white", bg="white")
listbox_title.pack(side='left', fill='both', expand=True)
scrollbar_title = tk.Scrollbar(frame_title)
scrollbar_title.pack(side='left', fill='y')
scrollbar_title['command'] = listbox_title.yview
listbox_title.bind('<Double-Button-1>', content) # it executes `content(event)`
# ----
text_download = ScrolledText(window, bg="white")
text_download.pack(fill='both', expand=True, pady=0, padx=5)
# ----
buttons_frame = tk.Frame(window)
button1 = tk.Button(buttons_frame, text="View Titles", command=all_titles) # don't use `[]` to execute functions
button1.pack(side='left', pady=5, padx=5)
button2 = tk.Button(buttons_frame, text="View Content", command=content) # don't use `[]` to execute functions
button2.pack(side='left', pady=5, padx=(0,5))
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

将来,将您的帖子/问题调整为仅包括最少的代码来回答问题。 TKINTER并不是您想在此处修复的一部分。
&lt; script&gt;
标签中提供了一个不错的JSON格式以获取该格式。In the future, trim down your posts/question to only include minimal amount of code to answer the question. Tkinter isn't really part of what you want to fix here.
With that being said, I only updated code pertaining to getting those cover and side news. You are correct, you need to go to the link to get the data. You could parse then from the html, or they do provide a nice json format in the
tags on the site to get that.