Python 编码问题

发布于 2024-10-28 16:24:09 字数 4483 浏览 0 评论 0原文

我对 python 相当陌生，并且在编码方面遇到了一些问题。

请查看代码：

# -*- coding: utf-8 -*-
import config   # Ficheiro de configuracao
import twitter
import random
import sqlite3
import time
import bitly_api #https://github.com/bitly/bitly-api-python
import feedparser

class TwitterC:
    def logToDatabase(self, tweet, timestamp):
        # Will log to the database
        database      = sqlite3.connect('database.db') # Create a database file
        cursor        = database.cursor() # Create a cursor
        cursor.execute("CREATE TABLE IF NOT EXISTS twitter(id_tweet INTEGER AUTO_INCREMENT PRIMARY KEY, tweet TEXT, timestamp TEXT);") # Make a table
        # Assign the values for the insert into
        msg_ins       = tweet
        timestamp_ins = timestamp
        values        = [msg_ins, timestamp_ins]
        # Insert data into the table
        cursor.execute("INSERT INTO twitter(tweet, timestamp) VALUES(?, ?)", values)
        database.commit() # Save our changes
        database.close() # Close the connection to the database

    def shortUrl(self, url):
        bit = bitly_api.Connection(config.bitly_username, config.bitly_key) # Instanciar a API
        return bit.shorten(url) # Encurtar o URL

    def updateTwitterStatus(self, update): 
        short   = self.shortUrl(update["url"]) # Vou encurtar o URL
        update_str  = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres
        # I will see how much characters have the message, if more than 140, delete some chars
        length_message = len(update_str)
        if length_message > 140:
            length_url    = len(short['url'])
            count_message = 136 - length_url
            shorten_msg   = update["msg"][0:count_message] + '... '
            update_str    = shorten_msg + short['url']
        # Will post to twitter and print the posted text
        api     = twitter.Api(consumer_key=config.consumer_key, 
                              consumer_secret=config.consumer_secret, 
                              access_token_key=config.access_token_key, 
                              access_token_secret=config.access_token_secret)
        status  = api.PostUpdate(update_str) # Fazer o update
        msg     = status.text # Vou gravar o texto enviado para a variavel 'msg'
        # Vou gravar p a Base de Dados
        self.logToDatabase(msg, time.time())
        print msg # So p mostrar o texto enviado. Comentar esta linha de futuro.

# Exemplo base        
#x = TwitterC()
#x.updateTwitterStatus({"url": "http://xyz.com/?cat=28", "msg": "Some tips about PostgreSQL Administration?"})

# Solucao para um misto de feeds e frases feitas
# Vou escolher uma fonte ao acaso
p = range(2) # tem o 0 e o 1
p = random.choice(p)

if p == 0: # Escolhe TEXT UPDATES
    # Vou escolher um text update ao acaso
    text_a_enviar = random.choice(config.text_updates)
    update_to_send = text_a_enviar    
elif p == 1: # Escolhe FEEDS UPDATES
    '''# Vou escolher um feed ao acaso
    feed_a_enviar = random.choice(config.feeds_updates)
    # Vou apanhar o conteudo do feed
    d = feedparser.parse(feed_a_enviar["feedurl"])
    # Vou definir quantos feeds quero ter no i
    i = range(8)
    # Vou meter para "updates" 10 entradas do feed
    updates = []
    for i in range(8):
        updates.append([{"url": feed_a_enviar["linktoourpage"], "msg": d.entries[i].summary + ", "}])
    # Vou escolher ums entrada ao acaso
    update_to_send = random.choice(updates)'''

# Vou postar p o Twitter    
x = TwitterC()
x.updateTwitterStatus({"url": "http://xyz.com/", "msg": "favoritos à distancia"})

代码有一些行，但问题出在这一行：

x.updateTwitterStatus({"url": "http://xyz.com/", "msg": "favoritos à distancia"})

该行有一个带有重音符号“à”的字符，这导致了这里的问题：

def updateTwitterStatus(self, update): 
    short   = self.shortUrl(update["url"]) # Vou encurtar o URL
    update_str  = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres
    ...

更准确地说，在这一行：

update_str  = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres

错误的输出是这样的：

x.updateTwitterStatus({"url": "http://xyz.com", "msg": "favoritos à distancia"})
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 48: ordinal not in range(128)

关于如何解决这个问题有任何线索吗？

原文

I'm fairly new to python, and I'm with some problems with encoding.

Please see the code:

# -*- coding: utf-8 -*-
import config   # Ficheiro de configuracao
import twitter
import random
import sqlite3
import time
import bitly_api #https://github.com/bitly/bitly-api-python
import feedparser

class TwitterC:
    def logToDatabase(self, tweet, timestamp):
        # Will log to the database
        database      = sqlite3.connect('database.db') # Create a database file
        cursor        = database.cursor() # Create a cursor
        cursor.execute("CREATE TABLE IF NOT EXISTS twitter(id_tweet INTEGER AUTO_INCREMENT PRIMARY KEY, tweet TEXT, timestamp TEXT);") # Make a table
        # Assign the values for the insert into
        msg_ins       = tweet
        timestamp_ins = timestamp
        values        = [msg_ins, timestamp_ins]
        # Insert data into the table
        cursor.execute("INSERT INTO twitter(tweet, timestamp) VALUES(?, ?)", values)
        database.commit() # Save our changes
        database.close() # Close the connection to the database

    def shortUrl(self, url):
        bit = bitly_api.Connection(config.bitly_username, config.bitly_key) # Instanciar a API
        return bit.shorten(url) # Encurtar o URL

    def updateTwitterStatus(self, update): 
        short   = self.shortUrl(update["url"]) # Vou encurtar o URL
        update_str  = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres
        # I will see how much characters have the message, if more than 140, delete some chars
        length_message = len(update_str)
        if length_message > 140:
            length_url    = len(short['url'])
            count_message = 136 - length_url
            shorten_msg   = update["msg"][0:count_message] + '... '
            update_str    = shorten_msg + short['url']
        # Will post to twitter and print the posted text
        api     = twitter.Api(consumer_key=config.consumer_key, 
                              consumer_secret=config.consumer_secret, 
                              access_token_key=config.access_token_key, 
                              access_token_secret=config.access_token_secret)
        status  = api.PostUpdate(update_str) # Fazer o update
        msg     = status.text # Vou gravar o texto enviado para a variavel 'msg'
        # Vou gravar p a Base de Dados
        self.logToDatabase(msg, time.time())
        print msg # So p mostrar o texto enviado. Comentar esta linha de futuro.

# Exemplo base        
#x = TwitterC()
#x.updateTwitterStatus({"url": "http://xyz.com/?cat=28", "msg": "Some tips about PostgreSQL Administration?"})

# Solucao para um misto de feeds e frases feitas
# Vou escolher uma fonte ao acaso
p = range(2) # tem o 0 e o 1
p = random.choice(p)

if p == 0: # Escolhe TEXT UPDATES
    # Vou escolher um text update ao acaso
    text_a_enviar = random.choice(config.text_updates)
    update_to_send = text_a_enviar    
elif p == 1: # Escolhe FEEDS UPDATES
    '''# Vou escolher um feed ao acaso
    feed_a_enviar = random.choice(config.feeds_updates)
    # Vou apanhar o conteudo do feed
    d = feedparser.parse(feed_a_enviar["feedurl"])
    # Vou definir quantos feeds quero ter no i
    i = range(8)
    # Vou meter para "updates" 10 entradas do feed
    updates = []
    for i in range(8):
        updates.append([{"url": feed_a_enviar["linktoourpage"], "msg": d.entries[i].summary + ", "}])
    # Vou escolher ums entrada ao acaso
    update_to_send = random.choice(updates)'''

# Vou postar p o Twitter    
x = TwitterC()
x.updateTwitterStatus({"url": "http://xyz.com/", "msg": "favoritos à distancia"})

The code have some lines but the problem is in this line:

x.updateTwitterStatus({"url": "http://xyz.com/", "msg": "favoritos à distancia"})

This line have a character with an accent "à" and this causes the problem here:

def updateTwitterStatus(self, update): 
    short   = self.shortUrl(update["url"]) # Vou encurtar o URL
    update_str  = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres
    ...

More precisely in this line:

update_str  = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres

The output of the error is this:

x.updateTwitterStatus({"url": "http://xyz.com", "msg": "favoritos à distancia"})
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 48: ordinal not in range(128)

Any clues on how to solve this?

分享到QQ

分享到微博