如何使用Python将多个Excel文件转换为CSV UTF-8编码

发布于 2025-02-01 21:09:11 字数 2392 浏览 4 评论 0 原文

我在同一目录中有30+ xlsx 文件，并且使用 python 我想使用UTF-8编码将所有文件转换为CSV，无论文件中是否存在任何编码。我正在使用python的魔术库获取文件名（下面代码）。对于转换，我尝试了so用户julian there （我使用了发布的代码“ InvalidFileException：OpenPyXl不支持文件格式，请检查您可以首先使用Excel打开。代码>。

谢谢您的帮助

# import a library to detect encodings
import magic
import glob

print("File".ljust(45), "Encoding")
for filename in glob.glob('path*.xlsx'):
    with open(filename, 'rb') as rawdata:
        result = magic.from_buffer(rawdata.read(2048))
    print(filename.ljust(45), result)

！

    from openpyxl import load_workbook
    import csv
    from os import sys
    
    def get_all_sheets(excel_file):
        sheets = []
        workbook = load_workbook(excel_file,read_only=True,data_only=True)
        all_worksheets = workbook.get_sheet_names()
        for worksheet_name in all_worksheets:
            sheets.append(worksheet_name)
        return sheets
    
    def csv_from_excel(excel_file, sheets):
        workbook = load_workbook(excel_file,data_only=True)
        for worksheet_name in sheets:
            print("Export " + worksheet_name + " ...")
    
            try:
                worksheet = workbook.get_sheet_by_name(worksheet_name)
            except KeyError:
                print("Could not find " + worksheet_name)
                sys.exit(1)
    
            your_csv_file = open(''.join([worksheet_name,'.csv']), 'wb')
            wr = csv.writer(your_csv_file, quoting=csv.QUOTE_ALL)
            for row in worksheet.iter_rows():
                lrow = []
                for cell in row:
                    lrow.append(cell.value)
                wr.writerow(lrow)
            print(" ... done")
            your_csv_file.close()
    
    if not 2 <= len(sys.argv) <= 3:
        print("Call with " + sys.argv[0] + " <xlxs file> [comma separated list of sheets to export]")
        sys.exit(1)
    else:
        sheets = []
        if len(sys.argv) == 3:
            sheets = list(sys.argv[2].split(','))
        else:
            sheets = get_all_sheets(sys.argv[1])
        assert(sheets != None and len(sheets

) > 0)
    csv_from_excel(sys.argv[1], sheets)

原文

I have 30+ xlsx files in same directory and using python I would like to convert all files to csv with utf-8 encoding, regardless of whatever encoding is present in the file. I am using python's magic library to get the file names (below code).For conversion, I tried the code mention by SO user Julian here (I used the code posted here), but the code is throwing an error saying "InvalidFileException: openpyxl does not support file format, please check you can open it with Excel first. Supported formats are: .xlsx,.xlsm,.xltx,.xltm. Below is the code that is throwing an error.The second issue is based on my limited python knowledge I believe code will work for one excel file. How should I make it work for multiple files ?

Thanks in advance for your help!

# import a library to detect encodings
import magic
import glob

print("File".ljust(45), "Encoding")
for filename in glob.glob('path*.xlsx'):
    with open(filename, 'rb') as rawdata:
        result = magic.from_buffer(rawdata.read(2048))
    print(filename.ljust(45), result)

Code throwing error from SO User github link mentioned here

    from openpyxl import load_workbook
    import csv
    from os import sys
    
    def get_all_sheets(excel_file):
        sheets = []
        workbook = load_workbook(excel_file,read_only=True,data_only=True)
        all_worksheets = workbook.get_sheet_names()
        for worksheet_name in all_worksheets:
            sheets.append(worksheet_name)
        return sheets
    
    def csv_from_excel(excel_file, sheets):
        workbook = load_workbook(excel_file,data_only=True)
        for worksheet_name in sheets:
            print("Export " + worksheet_name + " ...")
    
            try:
                worksheet = workbook.get_sheet_by_name(worksheet_name)
            except KeyError:
                print("Could not find " + worksheet_name)
                sys.exit(1)
    
            your_csv_file = open(''.join([worksheet_name,'.csv']), 'wb')
            wr = csv.writer(your_csv_file, quoting=csv.QUOTE_ALL)
            for row in worksheet.iter_rows():
                lrow = []
                for cell in row:
                    lrow.append(cell.value)
                wr.writerow(lrow)
            print(" ... done")
            your_csv_file.close()
    
    if not 2 <= len(sys.argv) <= 3:
        print("Call with " + sys.argv[0] + " <xlxs file> [comma separated list of sheets to export]")
        sys.exit(1)
    else:
        sheets = []
        if len(sys.argv) == 3:
            sheets = list(sys.argv[2].split(','))
        else:
            sheets = get_all_sheets(sys.argv[1])
        assert(sheets != None and len(sheets

) > 0)
    csv_from_excel(sys.argv[1], sheets)

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

旧梦荧光笔 2025-02-08 21:09:12

您是否尝试使用 pandas 库？您可以使用 OS 将所有文件存储在列表中。然后，您可以使用 read_excel 循环浏览列表，然后打开每个 excel 文件，然后写入 csv 。因此看起来会像这样：

"""Code to read excel workbooks and output each sheet as a csv""" 
""""with utf-8 encoding"""
#Declare a file path where you will store all your excel workbook. You 
#can update the file path for the ExcelPath variable
#Declare a file path where you will store all your csv output. You can 
#update the file path for the CsvPath variable

import pandas as pd
import os

ExcelPath = "C:/ExcelPath" #Store path for your excel workbooks
CsvPath = "C:/CsvPath" #Store path for you csv outputs

fileList = [f for f in os.listdir(ExcelPath)]

for file in fileList:
    xls = pd.ExcelFile(ExcelPath+'/'+file)
    sheets = xls.sheet_names #Get the names of each and loop to create 
                              #individual csv files 
    for sheet in sheets:
        fileNameCSV = str(file)[:-5]+'.'+str(sheet) #declare the csv 
                      #filename which will be excelWorkbook + SheetName
        df = pd.read_excel(ExcelPath+'/'+file, sheet_name = sheet)
        os.chdir(CsvPath)
        df.to_csv("{}.csv".format(fileNameCSV), encoding="utf-8")

不是最好的，但应该满足您的需求

Have you tried to use Pandas library? You can store all the files in a list using os. You can then loop through the list and open each Excel file using read_excel and then write to a csv. So it will look something like this:

"""Code to read excel workbooks and output each sheet as a csv""" 
""""with utf-8 encoding"""
#Declare a file path where you will store all your excel workbook. You 
#can update the file path for the ExcelPath variable
#Declare a file path where you will store all your csv output. You can 
#update the file path for the CsvPath variable

import pandas as pd
import os

ExcelPath = "C:/ExcelPath" #Store path for your excel workbooks
CsvPath = "C:/CsvPath" #Store path for you csv outputs

fileList = [f for f in os.listdir(ExcelPath)]

for file in fileList:
    xls = pd.ExcelFile(ExcelPath+'/'+file)
    sheets = xls.sheet_names #Get the names of each and loop to create 
                              #individual csv files 
    for sheet in sheets:
        fileNameCSV = str(file)[:-5]+'.'+str(sheet) #declare the csv 
                      #filename which will be excelWorkbook + SheetName
        df = pd.read_excel(ExcelPath+'/'+file, sheet_name = sheet)
        os.chdir(CsvPath)
        df.to_csv("{}.csv".format(fileNameCSV), encoding="utf-8")

Not the best but should meet your needs

回复收藏 0 原文

冬天旳寂寞 2025-02-08 21:09:12

首先，第一个错误很明显：
InvalidFileException：OpenPyXl不支持文件格式，请检查您可以首先使用Excel打开它。

Excel是否成功打开了此文件？如果是，我们需要工作簿（或其中一小部分）。

第二个问题的答案：

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vi:ts=4:et

"""I test to open multiple files."""
import csv
from pathlib import Path

from openpyxl import load_workbook

# find all *.xlsx files into current directory
# and iterate over it
for file in Path('.').glob('*.xlsx'):
    # read the Excel file
    wb = load_workbook(file)
    # small test (optional)
    print(file, wb.active.title)
    # export all sheets to CSV
    for sheetname in wb.sheetnames:
        # Write to utf-8 encoded file with BOM signature
        with open(f'{file.stem}-{sheetname}.csv', 'w',
                  encoding="utf-8-sig") as csvfile:
            # Write to CSV
            spamwriter = csv.writer(csvfile)
            # Iterate over rows in sheet
            for row in wb[sheetname].rows:
                # Write a row
                spamwriter.writerow([cell.value for cell in row])

您也可以明确指定 csv的方言 AS 参数。

In first, the first error is obvious:
InvalidFileException: openpyxl does not support file format, please check you can open it with Excel first.

Does Excel successfully open this file? If yes, we need the workbook (or small part of it).

The answer to the second question:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vi:ts=4:et

"""I test to open multiple files."""
import csv
from pathlib import Path

from openpyxl import load_workbook

# find all *.xlsx files into current directory
# and iterate over it
for file in Path('.').glob('*.xlsx'):
    # read the Excel file
    wb = load_workbook(file)
    # small test (optional)
    print(file, wb.active.title)
    # export all sheets to CSV
    for sheetname in wb.sheetnames:
        # Write to utf-8 encoded file with BOM signature
        with open(f'{file.stem}-{sheetname}.csv', 'w',
                  encoding="utf-8-sig") as csvfile:
            # Write to CSV
            spamwriter = csv.writer(csvfile)
            # Iterate over rows in sheet
            for row in wb[sheetname].rows:
                # Write a row
                spamwriter.writerow([cell.value for cell in row])

Also you can explicitly specify the dialect of csv as csv.writer parameter.