来自多个用户的推文
我在 Github 中找到了以下代码,可以一次提取多个用户的推文。
from pandas import DataFrame
from tweepy import Cursor
import numpy as np
import tweepy
import os
# Using config
# from decouple import config
# Using dotenv
#from dotenv import load_dotenv
#load_dotenv()
# Using os/env.py
# import env
# Authentication and connection to Twitter API.
# Using config
consumer_key = config("consumer_key")
consumer_secret = config("consumer_secret")
access_token = config("access_token")
access_token_secret = config("access_token_secret")
# Using dotenv/env.py
#consumer_key = 'XXXXXXXXXXXXXXX'
#consumer_secret = 'XXXXXXXXXXXXXXXXXXX'
#access_token = 'XXXXXXXXXXXXXXXXXXXXXXXXXXX'
#access_token_secret = 'XXXXXXXXXXXXXXXXXXXX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
def tweets_extractor(usernames: list):
TWEETS_PER_QUERY = 10
MAX_ID = -1
# Giving the user some feed back that the script is running
print("Tweets Extractor is starting")
# extract tweets from timeline of targeted politicians of the major political parties
try:
# loop through all the users and extract tweets from their relative timelines
for username in usernames:
print("Downloading %s's tweets:" % username)
# Declare API call function
tweets = api.user_timeline(screen_name=username,
# 200 is the maximum allowed count
count=TWEETS_PER_QUERY,
exclude_replies=True,
include_rts=True,
# Necessary to keep full_text
# otherwise only the first 140 words are extracted
tweet_mode='extended'
)
all_tweets = []
all_tweets.extend(tweets)
oldest_id = tweets[MAX_ID].id
while True:
# Declare API call function
tweets = api.user_timeline(screen_name=username,
# 200 is the maximum allowed count
count=TWEETS_PER_QUERY,
max_id=oldest_id - 1,
exclude_replies=True,
include_rts=False,
# Necessary to keep full_text
# otherwise only the first 140 words are extracted
tweet_mode='extended'
)
if len(tweets) == 0:
break
oldest_id = tweets[MAX_ID].id
all_tweets.extend(tweets)
print('N of {0} tweets downloaded till now: {1}'.format(
username, len(all_tweets)))
# Transform the tweepy tweets into a 2D array that will populate the csv
outtweets = [[
tweet.id_str,
tweet.created_at,
tweet.lang,
tweet.is_quote_status,
# Raw tweets
tweet.full_text.encode(
"utf-8").decode("utf-8")
]
for idx, tweet in enumerate(all_tweets)]
# df = DataFrame(outtweets, columns=[
# "ID", "Date Created", "Text"])
df = DataFrame(outtweets, columns=[
"ID", "Date Created", "Lang", "Quote Status", "Text"])
# Remove any rows with empty strings
df.replace(r'^\s*$', np.nan, inplace=True, regex=True)
df.dropna(how="any", axis=0, inplace=True)
df.to_csv('csv/08_10_2021/%s_tweets.csv' %
username, index=False)
print("Raw number of {0}'s tweets collected: {1}".format(
username, len(all_tweets)))
print("Filtered number of {0}'s tweets written to CSV: {1}\n".format(
username, len(df.index)))
except tweepy.TweepError as e:
print("There was an error, find details below, else check your internet connection or your " +
" credentials in the credentials.py file \n")
print("If this is not your first time running this particular script, then there is a possibility that the "
"maximum rate limit has been exceeded. wait a few more minutes and re run the script.\n")
print(f"Error Details: {str(e)}")
tweets_extractor(usernames=[
'pseudo_tazia',
'friedsamyang',
# 'KeertiWasHere',
# 'ahmadahlami99',
# 'ShazrielYusoff',
# 'ejenneo',
# 'jaspreetaulakh_',
# 'hlminr',
# 'allirmzni',
# 'luqmnhdri',
# 'jimmimiji',
# 'falan4j',
# 'syawiers',
# 'nyenyeljemen',
])
我在使用 dotenv/env.py
验证凭据时收到 module not found
错误。因此,我使用 config 方法进行身份验证(将凭据保存在名为 config.py 的文件中)。
我收到附加的屏幕截图错误
其次:有没有办法提到日期范围来拉推文而不是指定要拉的推文数量?
I found the below code in Github to pull multiple users tweets at a time.
from pandas import DataFrame
from tweepy import Cursor
import numpy as np
import tweepy
import os
# Using config
# from decouple import config
# Using dotenv
#from dotenv import load_dotenv
#load_dotenv()
# Using os/env.py
# import env
# Authentication and connection to Twitter API.
# Using config
consumer_key = config("consumer_key")
consumer_secret = config("consumer_secret")
access_token = config("access_token")
access_token_secret = config("access_token_secret")
# Using dotenv/env.py
#consumer_key = 'XXXXXXXXXXXXXXX'
#consumer_secret = 'XXXXXXXXXXXXXXXXXXX'
#access_token = 'XXXXXXXXXXXXXXXXXXXXXXXXXXX'
#access_token_secret = 'XXXXXXXXXXXXXXXXXXXX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
def tweets_extractor(usernames: list):
TWEETS_PER_QUERY = 10
MAX_ID = -1
# Giving the user some feed back that the script is running
print("Tweets Extractor is starting")
# extract tweets from timeline of targeted politicians of the major political parties
try:
# loop through all the users and extract tweets from their relative timelines
for username in usernames:
print("Downloading %s's tweets:" % username)
# Declare API call function
tweets = api.user_timeline(screen_name=username,
# 200 is the maximum allowed count
count=TWEETS_PER_QUERY,
exclude_replies=True,
include_rts=True,
# Necessary to keep full_text
# otherwise only the first 140 words are extracted
tweet_mode='extended'
)
all_tweets = []
all_tweets.extend(tweets)
oldest_id = tweets[MAX_ID].id
while True:
# Declare API call function
tweets = api.user_timeline(screen_name=username,
# 200 is the maximum allowed count
count=TWEETS_PER_QUERY,
max_id=oldest_id - 1,
exclude_replies=True,
include_rts=False,
# Necessary to keep full_text
# otherwise only the first 140 words are extracted
tweet_mode='extended'
)
if len(tweets) == 0:
break
oldest_id = tweets[MAX_ID].id
all_tweets.extend(tweets)
print('N of {0} tweets downloaded till now: {1}'.format(
username, len(all_tweets)))
# Transform the tweepy tweets into a 2D array that will populate the csv
outtweets = [[
tweet.id_str,
tweet.created_at,
tweet.lang,
tweet.is_quote_status,
# Raw tweets
tweet.full_text.encode(
"utf-8").decode("utf-8")
]
for idx, tweet in enumerate(all_tweets)]
# df = DataFrame(outtweets, columns=[
# "ID", "Date Created", "Text"])
df = DataFrame(outtweets, columns=[
"ID", "Date Created", "Lang", "Quote Status", "Text"])
# Remove any rows with empty strings
df.replace(r'^\s*
I was getting module not found
error while authenticating the credentials using dotenv/env.py
. Hence I used config
method to authenticate (saved the credentials in a file called config.py
).
I am getting the attached screen shot error
Secondly: Is there a way to mention date range to pull tweets rather than specifying the number of tweets to pull?
, np.nan, inplace=True, regex=True)
df.dropna(how="any", axis=0, inplace=True)
df.to_csv('csv/08_10_2021/%s_tweets.csv' %
username, index=False)
print("Raw number of {0}'s tweets collected: {1}".format(
username, len(all_tweets)))
print("Filtered number of {0}'s tweets written to CSV: {1}\n".format(
username, len(df.index)))
except tweepy.TweepError as e:
print("There was an error, find details below, else check your internet connection or your " +
" credentials in the credentials.py file \n")
print("If this is not your first time running this particular script, then there is a possibility that the "
"maximum rate limit has been exceeded. wait a few more minutes and re run the script.\n")
print(f"Error Details: {str(e)}")
tweets_extractor(usernames=[
'pseudo_tazia',
'friedsamyang',
# 'KeertiWasHere',
# 'ahmadahlami99',
# 'ShazrielYusoff',
# 'ejenneo',
# 'jaspreetaulakh_',
# 'hlminr',
# 'allirmzni',
# 'luqmnhdri',
# 'jimmimiji',
# 'falan4j',
# 'syawiers',
# 'nyenyeljemen',
])
I was getting module not found
error while authenticating the credentials using dotenv/env.py
. Hence I used config
method to authenticate (saved the credentials in a file called config.py
).
I am getting the attached screen shot error
Secondly: Is there a way to mention date range to pull tweets rather than specifying the number of tweets to pull?
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论