来自多个用户的推文

发布于 2025-01-10 06:56:36 字数 5415 浏览 4 评论 0原文

我在 Github 中找到了以下代码，可以一次提取多个用户的推文。

from pandas import DataFrame
from tweepy import Cursor
import numpy as np
import tweepy
import os
# Using config
# from decouple import config
# Using dotenv
#from dotenv import load_dotenv
#load_dotenv()
# Using os/env.py
# import env
# Authentication and connection to Twitter API.
# Using config
consumer_key = config("consumer_key")
consumer_secret = config("consumer_secret")
access_token = config("access_token")
access_token_secret = config("access_token_secret")
# Using dotenv/env.py
#consumer_key = 'XXXXXXXXXXXXXXX'
#consumer_secret = 'XXXXXXXXXXXXXXXXXXX'
#access_token = 'XXXXXXXXXXXXXXXXXXXXXXXXXXX'
#access_token_secret = 'XXXXXXXXXXXXXXXXXXXX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
def tweets_extractor(usernames: list):
    TWEETS_PER_QUERY = 10
    MAX_ID = -1
    # Giving the user some feed back that the script is running
    print("Tweets Extractor is starting")
    # extract tweets from timeline of targeted politicians of the major political parties
    try:
        # loop through all the users and extract tweets from their relative timelines
        for username in usernames:
            print("Downloading %s's tweets:" % username)
            # Declare API call function
            tweets = api.user_timeline(screen_name=username,
                                       # 200 is the maximum allowed count
                                       count=TWEETS_PER_QUERY,
                                       exclude_replies=True,
                                       include_rts=True,
                                       # Necessary to keep full_text
                                       # otherwise only the first 140 words are extracted
                                       tweet_mode='extended'
                                       )
            all_tweets = []
            all_tweets.extend(tweets)
            oldest_id = tweets[MAX_ID].id
            while True:
                # Declare API call function
                tweets = api.user_timeline(screen_name=username,
                                           # 200 is the maximum allowed count
                                           count=TWEETS_PER_QUERY,
                                           max_id=oldest_id - 1,
                                           exclude_replies=True,
                                           include_rts=False,
                                           # Necessary to keep full_text
                                           # otherwise only the first 140 words are extracted
                                           tweet_mode='extended'
                                           )
                if len(tweets) == 0:
                    break
                oldest_id = tweets[MAX_ID].id
                all_tweets.extend(tweets)
                print('N of {0} tweets downloaded till now: {1}'.format(
                    username, len(all_tweets)))
            # Transform the tweepy tweets into a 2D array that will populate the csv
            outtweets = [[
                tweet.id_str,
                tweet.created_at,
                tweet.lang,
                tweet.is_quote_status,
                # Raw tweets
                tweet.full_text.encode(
                    "utf-8").decode("utf-8")
            ]
                for idx, tweet in enumerate(all_tweets)]
            # df = DataFrame(outtweets, columns=[
            #                "ID", "Date Created", "Text"])
            df = DataFrame(outtweets, columns=[
                "ID", "Date Created", "Lang", "Quote Status", "Text"])
            # Remove any rows with empty strings
            df.replace(r'^\s*$', np.nan, inplace=True, regex=True)
            df.dropna(how="any", axis=0, inplace=True)
            df.to_csv('csv/08_10_2021/%s_tweets.csv' %
                        username, index=False)
            print("Raw number of {0}'s tweets collected: {1}".format(
                username, len(all_tweets)))
            print("Filtered number of {0}'s tweets written to CSV: {1}\n".format(
                username, len(df.index)))
    except tweepy.TweepError as e:
        print("There was an error, find details below, else check your internet connection or your " +
              " credentials in the credentials.py file \n")
        print("If this is not your first time running this particular script, then there is a possibility that the "
              "maximum rate limit has been exceeded. wait a few more minutes and re run the script.\n")
        print(f"Error Details: {str(e)}")
tweets_extractor(usernames=[
    'pseudo_tazia',
    'friedsamyang',
    # 'KeertiWasHere',
    # 'ahmadahlami99',
    # 'ShazrielYusoff',
    # 'ejenneo',
    # 'jaspreetaulakh_',
    # 'hlminr',
    # 'allirmzni',
    # 'luqmnhdri',
    # 'jimmimiji',
    # 'falan4j',
    # 'syawiers',
    # 'nyenyeljemen',
])

我在使用 dotenv/env.py 验证凭据时收到 module not found 错误。因此，我使用 config 方法进行身份验证（将凭据保存在名为 config.py 的文件中）。

我收到附加的屏幕截图错误

其次：有没有办法提到日期范围来拉推文而不是指定要拉的推文数量？

原文

I found the below code in Github to pull multiple users tweets at a time.

from pandas import DataFrame
from tweepy import Cursor
import numpy as np
import tweepy
import os
# Using config
# from decouple import config
# Using dotenv
#from dotenv import load_dotenv
#load_dotenv()
# Using os/env.py
# import env
# Authentication and connection to Twitter API.
# Using config
consumer_key = config("consumer_key")
consumer_secret = config("consumer_secret")
access_token = config("access_token")
access_token_secret = config("access_token_secret")
# Using dotenv/env.py
#consumer_key = 'XXXXXXXXXXXXXXX'
#consumer_secret = 'XXXXXXXXXXXXXXXXXXX'
#access_token = 'XXXXXXXXXXXXXXXXXXXXXXXXXXX'
#access_token_secret = 'XXXXXXXXXXXXXXXXXXXX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
def tweets_extractor(usernames: list):
    TWEETS_PER_QUERY = 10
    MAX_ID = -1
    # Giving the user some feed back that the script is running
    print("Tweets Extractor is starting")
    # extract tweets from timeline of targeted politicians of the major political parties
    try:
        # loop through all the users and extract tweets from their relative timelines
        for username in usernames:
            print("Downloading %s's tweets:" % username)
            # Declare API call function
            tweets = api.user_timeline(screen_name=username,
                                       # 200 is the maximum allowed count
                                       count=TWEETS_PER_QUERY,
                                       exclude_replies=True,
                                       include_rts=True,
                                       # Necessary to keep full_text
                                       # otherwise only the first 140 words are extracted
                                       tweet_mode='extended'
                                       )
            all_tweets = []
            all_tweets.extend(tweets)
            oldest_id = tweets[MAX_ID].id
            while True:
                # Declare API call function
                tweets = api.user_timeline(screen_name=username,
                                           # 200 is the maximum allowed count
                                           count=TWEETS_PER_QUERY,
                                           max_id=oldest_id - 1,
                                           exclude_replies=True,
                                           include_rts=False,
                                           # Necessary to keep full_text
                                           # otherwise only the first 140 words are extracted
                                           tweet_mode='extended'
                                           )
                if len(tweets) == 0:
                    break
                oldest_id = tweets[MAX_ID].id
                all_tweets.extend(tweets)
                print('N of {0} tweets downloaded till now: {1}'.format(
                    username, len(all_tweets)))
            # Transform the tweepy tweets into a 2D array that will populate the csv
            outtweets = [[
                tweet.id_str,
                tweet.created_at,
                tweet.lang,
                tweet.is_quote_status,
                # Raw tweets
                tweet.full_text.encode(
                    "utf-8").decode("utf-8")
            ]
                for idx, tweet in enumerate(all_tweets)]
            # df = DataFrame(outtweets, columns=[
            #                "ID", "Date Created", "Text"])
            df = DataFrame(outtweets, columns=[
                "ID", "Date Created", "Lang", "Quote Status", "Text"])
            # Remove any rows with empty strings
            df.replace(r'^\s*
I was getting module not found error while authenticating the credentials using dotenv/env.py. Hence I used config method to authenticate (saved the credentials in a file called config.py).
I am getting the attached screen shot error

Secondly: Is there a way to mention date range to pull tweets rather than specifying the number of tweets to pull?
, np.nan, inplace=True, regex=True)
            df.dropna(how="any", axis=0, inplace=True)
            df.to_csv('csv/08_10_2021/%s_tweets.csv' %
                        username, index=False)
            print("Raw number of {0}'s tweets collected: {1}".format(
                username, len(all_tweets)))
            print("Filtered number of {0}'s tweets written to CSV: {1}\n".format(
                username, len(df.index)))
    except tweepy.TweepError as e:
        print("There was an error, find details below, else check your internet connection or your " +
              " credentials in the credentials.py file \n")
        print("If this is not your first time running this particular script, then there is a possibility that the "
              "maximum rate limit has been exceeded. wait a few more minutes and re run the script.\n")
        print(f"Error Details: {str(e)}")
tweets_extractor(usernames=[
    'pseudo_tazia',
    'friedsamyang',
    # 'KeertiWasHere',
    # 'ahmadahlami99',
    # 'ShazrielYusoff',
    # 'ejenneo',
    # 'jaspreetaulakh_',
    # 'hlminr',
    # 'allirmzni',
    # 'luqmnhdri',
    # 'jimmimiji',
    # 'falan4j',
    # 'syawiers',
    # 'nyenyeljemen',
])

I was getting module not found error while authenticating the credentials using dotenv/env.py. Hence I used config method to authenticate (saved the credentials in a file called config.py).

I am getting the attached screen shot error

Secondly: Is there a way to mention date range to pull tweets rather than specifying the number of tweets to pull?

分享到QQ

分享到微博