导入CSV文件到MySQL
一.使用命令行进行导入
LOAD DATA LOCAL INFILE '/data/home/narieliu/titanic.csv'
INTO TABLE tb1
FIELDS TERMINATED BY ','
ENCLOSEDBY '"'
LINES TERMINATED BY '\n'
IGNORE 1 LINES;
加local是在客户端导入,不加是在服务端导入
客户端因为权限无法导入的可以使用脚本进行导入,但是相比较效率会下降
二.使用python脚本进行导入
使用时,可根据需求进行相应的修改,需要环境python3,以及安装pymysql库
import csv
import pymysql# 数据库连接参数
db_config = {'host': 'your_cloud_db_host','user': 'your_username','password': 'your_password','database': 'your_database','charset': 'utf8mb4','cursorclass': pymysql.cursors.DictCursor
}# CSV 文件路径
csv_file_path = 'CSV文件路径'# 插入语句,字段名和占位符要对应
insert_sql = """
INSERT INTO tb1 (PassengerId, Survived, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
)
"""def main():connection = pymysql.connect(**db_config)try:with connection.cursor() as cursor, open(csv_file_path, 'r', encoding='utf-8') as f:reader = csv.reader(f)header = next(reader) # 跳过表头batch_size = 1000batch = []for row in reader:# 处理空值,转换数据类型等(根据需要调整)# 这里简单示范,空字符串转 Nonerow = [None if x == '' else x for x in row]# 转换数值字段类型(示例)# csv中默认为字符串,根据需要进行转换row[0] = int(row[0]) if row[0] is not None else None row[1] = int(row[1]) if row[1] is not None else None row[2] = int(row[2]) if row[2] is not None else None # Name, Sex 保持字符串row[5] = float(row[5]) if row[5] is not None else None row[6] = int(row[6]) if row[6] is not None else None row[7] = int(row[7]) if row[7] is not None else None # Ticket 字符串row[9] = float(row[9]) if row[9] is not None else None # Cabin, Embarked 字符串batch.append(row)if len(batch) >= batch_size:cursor.executemany(insert_sql, batch)connection.commit()batch.clear()# 插入剩余数据if batch:cursor.executemany(insert_sql, batch)connection.commit()print("数据导入完成!")except Exception as e:print("导入出错:", e)finally:connection.close()if __name__ == '__main__':main()