常用的读写Excel的库:
使用它们都能够达到读写Excel的目的,但它们的侧重点又略有不同。
具体如下:
类型 | xlrd/xlwt/xlutils | openpyxl | pandas |
---|---|---|---|
读取/写入/修改 | √ | √ | √ |
xls | √ | × | √ |
xlsx | 高版本支持读 不支持写 | √ | √ |
大文件 | × | √ | √ |
效率 | 快 | 中 | 慢 |
功能 | 较弱 | 一般 | 强大 |
耗时 | 0.35s | 0.47s | 2.6s |
推荐使用xlrd/xlwt
和pandas
利用xlrd和xlwt进行excel读写,这里只能是xls类型excel
pip install xlrd
pip install xlwt
import xlrd
book = xlrd.open_workbook('data.xlsx')
sheet1 = book.sheets()[0]
nrows = sheet1.nrows
print('表格总行数',nrows)
ncols = sheet1.ncols
print('表格总列数',ncols)
# 行的值
row3_values = sheet1.row_values(2)
print('第3行值',row3_values)
# 列的值
col3_values = sheet1.col_values(2)
print('第3列值',col3_values)
# 单元格的值
cell_1_3_1 = sheet1.cell(0,2).value
print('第1行第3列的单元格的值:',cell_1_3_1)
# 或者
cell_1_3_2 = sheet1.row_values(0)[2]
print('第1行第3列的单元格的值:',cell_1_3_2)
# 或者
cell_1_3_3 = sheet1.col_values(2)[0]
print('第1行第3列的单元格的值:',cell_1_3_3)
import xlwt
import datetime
# 创建一个workbook 设置编码
workbook = xlwt.Workbook(encoding='utf-8')
# 创建一个worksheet
worksheet = workbook.add_sheet('Worksheet')
# 写入excel参数对应 行, 列, 值
worksheet.write(0, 0, label='测试')
# 设置单元格宽度
worksheet.col(0).width = 3333
# 设置单元格高度
tall_style = xlwt.easyxf('font:height 520;')
worksheet.row(0).set_style(tall_style)
# 设置对齐方式
alignment = xlwt.Alignment() # Create Alignment
# May be: HORZ_GENERAL, HORZ_LEFT, HORZ_CENTER, HORZ_RIGHT, HORZ_FILLED, HORZ_JUSTIFIED, HORZ_CENTER_ACROSS_SEL, HORZ_DISTRIBUTED
alignment.horz = xlwt.Alignment.HORZ_CENTER
# May be: VERT_TOP, VERT_CENTER, VERT_BOTTOM, VERT_JUSTIFIED, VERT_DISTRIBUTED
alignment.vert = xlwt.Alignment.VERT_CENTER
style = xlwt.XFStyle() # Create Style
style.alignment = alignment # Add Alignment to Style
worksheet.write(2, 0, '居中', style)
# 写入带颜色背景的数据
pattern = xlwt.Pattern() # Create the Pattern
# May be: NO_PATTERN, SOLID_PATTERN, or 0x00 through 0x12
pattern.pattern = xlwt.Pattern.SOLID_PATTERN
pattern.pattern_fore_colour = 5 # May be: 8 through 63. 0 = Black, 1 = White, 2 = Red, 3 = Green, 4 = Blue, 5 = Yellow, 6 = Magenta, 7 = Cyan, 16 = Maroon, 17 = Dark Green, 18 = Dark Blue, 19 = Dark Yellow , almost brown), 20 = Dark Magenta, 21 = Teal, 22 = Light Gray, 23 = Dark Gray, the list goes on...
style = xlwt.XFStyle() # Create the Pattern
style.pattern = pattern # Add Pattern to Style
worksheet.write(0, 1, '颜色', style)
# 写入日期
style = xlwt.XFStyle()
# Other options: D-MMM-YY, D-MMM, MMM-YY, h:mm, h:mm:ss, h:mm, h:mm:ss, M/D/YY h:mm, mm:ss, [h]:mm:ss, mm:ss.0
style.num_format_str = 'M/D/YY'
worksheet.write(0, 2, datetime.datetime.now(), style)
# 写入公式
worksheet.write(0, 3, 5) # Outputs 5
worksheet.write(0, 4, 2) # Outputs 2
# Should output "10" (A1[5] * A2[2])
worksheet.write(1, 3, xlwt.Formula('D1*E1'))
# Should output "7" (A1[5] + A2[2])
worksheet.write(1, 4, xlwt.Formula('SUM(D1,E1)'))
# 写入超链接
worksheet.write(1, 0, xlwt.Formula('HYPERLINK("http://www.baidu.com";"百度一下")'))
# 保存
workbook.save('Excel_test.xls')
import xlrd
import json
import os
def excel_reader(filename, jsonname):
try:
# 打开刚才写入的test.xls文件
wb = xlrd.open_workbook(filename, on_demand=True)
userlist = []
# 遍历所有表单内容
for sh in wb.sheets():
classname = sh.name
# 列数
ncols = sh.ncols
for r in range(2, sh.nrows):
user = {}
user["level"] = sh.row_values(r)[1]
user["grade"] = sh.row_values(r)[2]
user["class"] = sh.row_values(r)[3]
user["name"] = sh.row_values(r)[4]
user["loginname"] = sh.row_values(r)[5]
user["loginpwd"] = "123456"
if (ncols >= 7):
user["sex"] = sh.row_values(r)[7]
userlist.append(user)
content = json.dumps(userlist, ensure_ascii=False)
writeFile(os.path.join(mpath, jsonname), content)
print(userlist)
except:
print("转换失败:" + filename)
def writeFile(filepath, content):
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
f.close()
if __name__ == '__main__':
mpath = os.getcwd()
excel_reader(os.path.join(mpath, '导入表格', 'import.xls'), "export.json")
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
f.close()
第二个参数可选
w
没有创建 ,有则覆盖a
没有创建,有则追加content = json.dumps(userlist, ensure_ascii=False)
默认ensure_ascii
为True
,中文会被编码
#-- coding: utf-8 --
import pandas as pd
file_path = r'./demo.xls'
df = pd.read_excel(file_path, sheet_name = "Sheet1") # sheet_name不指定时默认返回全表数据
# 打印表数据,如果数据太多,会略去中间部分
print(df)
# 打印头部数据,仅查看数据示例时常用
print(df.head())
# 打印列标题
print(df.columns)
# 打印行
print(df.index)
# 打印指定列
print(df["name"])
# 描述数据
print(df.describe())
from pandas import DataFrame
data = { 'name': ['zs', 'ls', 'ww'], 'age': [11, 12, 13], 'gender': ['man', 'man', 'woman']}
df = DataFrame(data)
df.to_excel('new.xlsx')
#-- coding: utf-8 --
import pandas as pd
from pandas import DataFrame
file_path = r'./demo.xls'
df = pd.read_excel(file_path)
df['gender'][df['gender'] == 'girl'] = 'female'
df['gender'][df['gender'] == 'boy'] = 'male'
print(df)
DataFrame(df).to_excel(file_path, sheet_name='Sheet1', index=False, header=True)
# 新增一行
df.loc[6] = [5, 'Eric', 'male', 20, '2021-5-18']
# 新增一列
df['favorite'] = None
# 写入数据文件
DataFrame(df).to_excel(file_path, sheet_name='Sheet1', index=False, header=True)