来自多个用户的推文

发布于 2025-01-10 06:56:36 字数 5415 浏览 4 评论 0原文

我在 Github 中找到了以下代码,可以一次提取多个用​​户的推文。

from pandas import DataFrame
from tweepy import Cursor
import numpy as np
import tweepy
import os
# Using config
# from decouple import config
# Using dotenv
#from dotenv import load_dotenv
#load_dotenv()
# Using os/env.py
# import env
# Authentication and connection to Twitter API.
# Using config
consumer_key = config("consumer_key")
consumer_secret = config("consumer_secret")
access_token = config("access_token")
access_token_secret = config("access_token_secret")
# Using dotenv/env.py
#consumer_key = 'XXXXXXXXXXXXXXX'
#consumer_secret = 'XXXXXXXXXXXXXXXXXXX'
#access_token = 'XXXXXXXXXXXXXXXXXXXXXXXXXXX'
#access_token_secret = 'XXXXXXXXXXXXXXXXXXXX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
def tweets_extractor(usernames: list):
    TWEETS_PER_QUERY = 10
    MAX_ID = -1
    # Giving the user some feed back that the script is running
    print("Tweets Extractor is starting")
    # extract tweets from timeline of targeted politicians of the major political parties
    try:
        # loop through all the users and extract tweets from their relative timelines
        for username in usernames:
            print("Downloading %s's tweets:" % username)
            # Declare API call function
            tweets = api.user_timeline(screen_name=username,
                                       # 200 is the maximum allowed count
                                       count=TWEETS_PER_QUERY,
                                       exclude_replies=True,
                                       include_rts=True,
                                       # Necessary to keep full_text
                                       # otherwise only the first 140 words are extracted
                                       tweet_mode='extended'
                                       )
            all_tweets = []
            all_tweets.extend(tweets)
            oldest_id = tweets[MAX_ID].id
            while True:
                # Declare API call function
                tweets = api.user_timeline(screen_name=username,
                                           # 200 is the maximum allowed count
                                           count=TWEETS_PER_QUERY,
                                           max_id=oldest_id - 1,
                                           exclude_replies=True,
                                           include_rts=False,
                                           # Necessary to keep full_text
                                           # otherwise only the first 140 words are extracted
                                           tweet_mode='extended'
                                           )
                if len(tweets) == 0:
                    break
                oldest_id = tweets[MAX_ID].id
                all_tweets.extend(tweets)
                print('N of {0} tweets downloaded till now: {1}'.format(
                    username, len(all_tweets)))
            # Transform the tweepy tweets into a 2D array that will populate the csv
            outtweets = [[
                tweet.id_str,
                tweet.created_at,
                tweet.lang,
                tweet.is_quote_status,
                # Raw tweets
                tweet.full_text.encode(
                    "utf-8").decode("utf-8")
            ]
                for idx, tweet in enumerate(all_tweets)]
            # df = DataFrame(outtweets, columns=[
            #                "ID", "Date Created", "Text"])
            df = DataFrame(outtweets, columns=[
                "ID", "Date Created", "Lang", "Quote Status", "Text"])
            # Remove any rows with empty strings
            df.replace(r'^\s*$', np.nan, inplace=True, regex=True)
            df.dropna(how="any", axis=0, inplace=True)
            df.to_csv('csv/08_10_2021/%s_tweets.csv' %
                        username, index=False)
            print("Raw number of {0}'s tweets collected: {1}".format(
                username, len(all_tweets)))
            print("Filtered number of {0}'s tweets written to CSV: {1}\n".format(
                username, len(df.index)))
    except tweepy.TweepError as e:
        print("There was an error, find details below, else check your internet connection or your " +
              " credentials in the credentials.py file \n")
        print("If this is not your first time running this particular script, then there is a possibility that the "
              "maximum rate limit has been exceeded. wait a few more minutes and re run the script.\n")
        print(f"Error Details: {str(e)}")
tweets_extractor(usernames=[
    'pseudo_tazia',
    'friedsamyang',
    # 'KeertiWasHere',
    # 'ahmadahlami99',
    # 'ShazrielYusoff',
    # 'ejenneo',
    # 'jaspreetaulakh_',
    # 'hlminr',
    # 'allirmzni',
    # 'luqmnhdri',
    # 'jimmimiji',
    # 'falan4j',
    # 'syawiers',
    # 'nyenyeljemen',
])

我在使用 dotenv/env.py 验证凭据时收到 module not found 错误。因此,我使用 config 方法进行身份验证(将凭据保存在名为 config.py 的文件中)。

我收到附加的屏幕截图错误

身份验证错误

其次:有没有办法提到日期范围来拉推文而不是指定要拉的推文数量?

I found the below code in Github to pull multiple users tweets at a time.

from pandas import DataFrame
from tweepy import Cursor
import numpy as np
import tweepy
import os
# Using config
# from decouple import config
# Using dotenv
#from dotenv import load_dotenv
#load_dotenv()
# Using os/env.py
# import env
# Authentication and connection to Twitter API.
# Using config
consumer_key = config("consumer_key")
consumer_secret = config("consumer_secret")
access_token = config("access_token")
access_token_secret = config("access_token_secret")
# Using dotenv/env.py
#consumer_key = 'XXXXXXXXXXXXXXX'
#consumer_secret = 'XXXXXXXXXXXXXXXXXXX'
#access_token = 'XXXXXXXXXXXXXXXXXXXXXXXXXXX'
#access_token_secret = 'XXXXXXXXXXXXXXXXXXXX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
def tweets_extractor(usernames: list):
    TWEETS_PER_QUERY = 10
    MAX_ID = -1
    # Giving the user some feed back that the script is running
    print("Tweets Extractor is starting")
    # extract tweets from timeline of targeted politicians of the major political parties
    try:
        # loop through all the users and extract tweets from their relative timelines
        for username in usernames:
            print("Downloading %s's tweets:" % username)
            # Declare API call function
            tweets = api.user_timeline(screen_name=username,
                                       # 200 is the maximum allowed count
                                       count=TWEETS_PER_QUERY,
                                       exclude_replies=True,
                                       include_rts=True,
                                       # Necessary to keep full_text
                                       # otherwise only the first 140 words are extracted
                                       tweet_mode='extended'
                                       )
            all_tweets = []
            all_tweets.extend(tweets)
            oldest_id = tweets[MAX_ID].id
            while True:
                # Declare API call function
                tweets = api.user_timeline(screen_name=username,
                                           # 200 is the maximum allowed count
                                           count=TWEETS_PER_QUERY,
                                           max_id=oldest_id - 1,
                                           exclude_replies=True,
                                           include_rts=False,
                                           # Necessary to keep full_text
                                           # otherwise only the first 140 words are extracted
                                           tweet_mode='extended'
                                           )
                if len(tweets) == 0:
                    break
                oldest_id = tweets[MAX_ID].id
                all_tweets.extend(tweets)
                print('N of {0} tweets downloaded till now: {1}'.format(
                    username, len(all_tweets)))
            # Transform the tweepy tweets into a 2D array that will populate the csv
            outtweets = [[
                tweet.id_str,
                tweet.created_at,
                tweet.lang,
                tweet.is_quote_status,
                # Raw tweets
                tweet.full_text.encode(
                    "utf-8").decode("utf-8")
            ]
                for idx, tweet in enumerate(all_tweets)]
            # df = DataFrame(outtweets, columns=[
            #                "ID", "Date Created", "Text"])
            df = DataFrame(outtweets, columns=[
                "ID", "Date Created", "Lang", "Quote Status", "Text"])
            # Remove any rows with empty strings
            df.replace(r'^\s*

I was getting module not found error while authenticating the credentials using dotenv/env.py. Hence I used config method to authenticate (saved the credentials in a file called config.py).

I am getting the attached screen shot error

Authentication error

Secondly: Is there a way to mention date range to pull tweets rather than specifying the number of tweets to pull?

, np.nan, inplace=True, regex=True) df.dropna(how="any", axis=0, inplace=True) df.to_csv('csv/08_10_2021/%s_tweets.csv' % username, index=False) print("Raw number of {0}'s tweets collected: {1}".format( username, len(all_tweets))) print("Filtered number of {0}'s tweets written to CSV: {1}\n".format( username, len(df.index))) except tweepy.TweepError as e: print("There was an error, find details below, else check your internet connection or your " + " credentials in the credentials.py file \n") print("If this is not your first time running this particular script, then there is a possibility that the " "maximum rate limit has been exceeded. wait a few more minutes and re run the script.\n") print(f"Error Details: {str(e)}") tweets_extractor(usernames=[ 'pseudo_tazia', 'friedsamyang', # 'KeertiWasHere', # 'ahmadahlami99', # 'ShazrielYusoff', # 'ejenneo', # 'jaspreetaulakh_', # 'hlminr', # 'allirmzni', # 'luqmnhdri', # 'jimmimiji', # 'falan4j', # 'syawiers', # 'nyenyeljemen', ])

I was getting module not found error while authenticating the credentials using dotenv/env.py. Hence I used config method to authenticate (saved the credentials in a file called config.py).

I am getting the attached screen shot error

Authentication error

Secondly: Is there a way to mention date range to pull tweets rather than specifying the number of tweets to pull?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文