在 Python 中自动将 CSV 插入 MySQL -cursor.execute() 挂起
我正在尝试自动将 csv 插入到 mysql 数据库中。我在 Python 脚本中创建数据库和表,然后在 Jupyter Notebook 中运行它。但是,由于某种原因,最终的
这是我的 csv_import_functions.py
:
import os
import numpy as np
import pandas as pd
import mysql.connector
def csv_files():
# get names of only csv files
csv_files = []
for file in os.listdir(os.getcwd()):
if file.endswith(".csv"):
csv_files.append(file)
return csv_files
def create_df(csv_files):
data_path = os.getcwd()+'/'
# loop through the files and create the dataframe
df = {}
for file in csv_files:
try:
df[file] = pd.read_csv(data_path+file)
except UnicodeDecodeError:
# if utf-8 encoding error
df[file] = pd.read_csv(data_path+file, encoding="ISO-8859-1")
print(file)
return df
def clean_tbl_name(filename):
# rename csv, force lower case, no spaces, no dashes
clean_tbl_name = filename.lower().replace(" ", "").replace(
"-", "_").replace(r"/", "_").replace("\\", "_").replace("$", "").replace("%", "")
tbl_name = '{0}'.format(clean_tbl_name.split('.')[0])
return tbl_name
def clean_colname(dataframe):
# force column names to be lower case, no spaces, no dashes
dataframe.columns = [x.lower().replace(" ", "_").replace("-", "_").replace(r"/", "_").replace(
"\\", "_").replace(".", "_").replace("$", "").replace("%", "") for x in dataframe.columns]
# processing data
replacements = {
'timedelta64[ns]': 'varchar(100)',
'object': 'varchar(100)',
'float64': 'float',
'int64': 'int',
'datetime64': 'timestamp'
}
col_str = ", ".join("{} {}".format(n, d) for (n, d) in zip(
dataframe.columns, dataframe.dtypes.replace(replacements)))
return col_str, dataframe.columns
def upload_to_db(host, database, user, password, tbl_name, col_str, file, dataframe, dataframe_columns):
conn_string = "host=%s, database=%s, user=%s, password=%s, port=%s" % (
host, database, user, password)
print("string is: " + conn_string)
conn = mysql.connector.connect(
host=host, database=database, user=user, password=password)
cursor = conn.cursor()
print('opened database successfully')
print("drop table if exists %s;" % (tbl_name))
print("create table %s (%s);" % (tbl_name, col_str))
# drop table with same name
cursor.execute("drop table if exists %s;" % (tbl_name))
# create table
cursor.execute("create table %s (%s);" % (tbl_name, col_str))
print('{0} was created successfully'.format(tbl_name))
# save df to csv
dataframe.to_csv(file, header=dataframe_columns,
index=False, encoding='utf-8')
col_names = col_str.replace(
' varchar(100)', '').replace(' int', '').replace(' float', '')
# upload to db
SQL_STATEMENT = """
LOAD DATA INFILE '%s' INTO TABLE %s
FIELDS TERMINATED BY ',' ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 LINES
(%s);
""" % (os.getcwd().replace(os.sep, '/') + '/' + file, tbl_name, col_names)
print(SQL_STATEMENT)
cursor.execute(SQL_STATEMENT)
print('file copied to db')
cursor.execute("grant select on table %s to public" % tbl_name)
conn.commit()
cursor.close()
print('table {0} imported to db completed'.format(tbl_name))
return
和我的 Jupyter Notebook (main.ipynb
):
import os
import numpy as np
import pandas as pd
import mysql.connector
#main
from csv_import_functions import *
#settings
dataset_dir = 'datasets'
#db settings
host = 'localhost'
database = 'nba_data'
user = 'user'
password = 'password'
#configure environment and create main df
csv_files = csv_files()
df = create_df( csv_files)
for k in csv_files:
#call dataframe
dataframe = df[k]
#clean table name
tbl_name = clean_tbl_name(k)
#clean column names
col_str, dataframe.columns = clean_colname(dataframe)
#upload data to db
upload_to_db(host,
database,
user,
password,
tbl_name,
col_str,
file=k,
dataframe=dataframe,
dataframe_columns=dataframe.columns)
最后,这是之前的输出挂起:
nba-playbyplay.csv
string is: host=localhost, database=nba_data, user=user, password=password
opened database successfully
drop table if exists nba_playbyplay;
create table nba_playbyplay (url varchar(100), gametype varchar(100), location varchar(100), date varchar(100), time varchar(100), winningteam varchar(100), quarter int, secleft int, awayteam varchar(100), awayplay varchar(100), awayscore int, hometeam varchar(100), homeplay float, homescore int, shooter float, shottype float, shotoutcome float, shotdist float, assister float, blocker float, foultype float, fouler float, fouled float, rebounder float, reboundtype float, violationplayer float, violationtype float, timeoutteam float, freethrowshooter float, freethrowoutcome float, freethrownum float, entergame float, leavegame float, turnoverplayer float, turnovertype float, turnovercause float, turnovercauser float, jumpballawayplayer varchar(100), jumpballhomeplayer varchar(100), jumpballposs varchar(100));
I'm trying to automate csv insertion into a mysql database. I go through creating the database and tables in a Python script and then run it in a Jupyter Notebook. However, for some reason the final cursor.execute(SQL_STATEMENT)
seems to hang and I am not able to insert the csv values into the database. I get no logs suggesting why this is the case:
This is my csv_import_functions.py
:
import os
import numpy as np
import pandas as pd
import mysql.connector
def csv_files():
# get names of only csv files
csv_files = []
for file in os.listdir(os.getcwd()):
if file.endswith(".csv"):
csv_files.append(file)
return csv_files
def create_df(csv_files):
data_path = os.getcwd()+'/'
# loop through the files and create the dataframe
df = {}
for file in csv_files:
try:
df[file] = pd.read_csv(data_path+file)
except UnicodeDecodeError:
# if utf-8 encoding error
df[file] = pd.read_csv(data_path+file, encoding="ISO-8859-1")
print(file)
return df
def clean_tbl_name(filename):
# rename csv, force lower case, no spaces, no dashes
clean_tbl_name = filename.lower().replace(" ", "").replace(
"-", "_").replace(r"/", "_").replace("\\", "_").replace("quot;, "").replace("%", "")
tbl_name = '{0}'.format(clean_tbl_name.split('.')[0])
return tbl_name
def clean_colname(dataframe):
# force column names to be lower case, no spaces, no dashes
dataframe.columns = [x.lower().replace(" ", "_").replace("-", "_").replace(r"/", "_").replace(
"\\", "_").replace(".", "_").replace("quot;, "").replace("%", "") for x in dataframe.columns]
# processing data
replacements = {
'timedelta64[ns]': 'varchar(100)',
'object': 'varchar(100)',
'float64': 'float',
'int64': 'int',
'datetime64': 'timestamp'
}
col_str = ", ".join("{} {}".format(n, d) for (n, d) in zip(
dataframe.columns, dataframe.dtypes.replace(replacements)))
return col_str, dataframe.columns
def upload_to_db(host, database, user, password, tbl_name, col_str, file, dataframe, dataframe_columns):
conn_string = "host=%s, database=%s, user=%s, password=%s, port=%s" % (
host, database, user, password)
print("string is: " + conn_string)
conn = mysql.connector.connect(
host=host, database=database, user=user, password=password)
cursor = conn.cursor()
print('opened database successfully')
print("drop table if exists %s;" % (tbl_name))
print("create table %s (%s);" % (tbl_name, col_str))
# drop table with same name
cursor.execute("drop table if exists %s;" % (tbl_name))
# create table
cursor.execute("create table %s (%s);" % (tbl_name, col_str))
print('{0} was created successfully'.format(tbl_name))
# save df to csv
dataframe.to_csv(file, header=dataframe_columns,
index=False, encoding='utf-8')
col_names = col_str.replace(
' varchar(100)', '').replace(' int', '').replace(' float', '')
# upload to db
SQL_STATEMENT = """
LOAD DATA INFILE '%s' INTO TABLE %s
FIELDS TERMINATED BY ',' ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 LINES
(%s);
""" % (os.getcwd().replace(os.sep, '/') + '/' + file, tbl_name, col_names)
print(SQL_STATEMENT)
cursor.execute(SQL_STATEMENT)
print('file copied to db')
cursor.execute("grant select on table %s to public" % tbl_name)
conn.commit()
cursor.close()
print('table {0} imported to db completed'.format(tbl_name))
return
And my Jupyter Notebook (main.ipynb
):
import os
import numpy as np
import pandas as pd
import mysql.connector
#main
from csv_import_functions import *
#settings
dataset_dir = 'datasets'
#db settings
host = 'localhost'
database = 'nba_data'
user = 'user'
password = 'password'
#configure environment and create main df
csv_files = csv_files()
df = create_df( csv_files)
for k in csv_files:
#call dataframe
dataframe = df[k]
#clean table name
tbl_name = clean_tbl_name(k)
#clean column names
col_str, dataframe.columns = clean_colname(dataframe)
#upload data to db
upload_to_db(host,
database,
user,
password,
tbl_name,
col_str,
file=k,
dataframe=dataframe,
dataframe_columns=dataframe.columns)
Finally, here is the output before it hangs:
nba-playbyplay.csv
string is: host=localhost, database=nba_data, user=user, password=password
opened database successfully
drop table if exists nba_playbyplay;
create table nba_playbyplay (url varchar(100), gametype varchar(100), location varchar(100), date varchar(100), time varchar(100), winningteam varchar(100), quarter int, secleft int, awayteam varchar(100), awayplay varchar(100), awayscore int, hometeam varchar(100), homeplay float, homescore int, shooter float, shottype float, shotoutcome float, shotdist float, assister float, blocker float, foultype float, fouler float, fouled float, rebounder float, reboundtype float, violationplayer float, violationtype float, timeoutteam float, freethrowshooter float, freethrowoutcome float, freethrownum float, entergame float, leavegame float, turnoverplayer float, turnovertype float, turnovercause float, turnovercauser float, jumpballawayplayer varchar(100), jumpballhomeplayer varchar(100), jumpballposs varchar(100));
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论