iTesting,爱测试,爱分享
在做自动化过程中,难免会跟Excel打交道,以前我们读写excel大都用xlrd, xlwt, 但是现在有了更好用的方式 --pandas, 我用了下感觉效果不错,索性写了读和写的一个小例子,希望能帮助到大家。
0.什么是pandas:
pandas 是基于NumPy 的一种工具,该工具是为了解决数据分析任务而创建的。Pandas 纳入了大量库和一些标准的数据模型,提供了高效地操作大型数据集所需的工具。pandas提供了大量能使我们快速便捷地处理数据的函数和方法。你很快就会发现,它是使Python成为强大而高效的数据分析环境的重要因素之一
1. 安装:
pip install pandas
2.Excel 读写实践:
import os
import pandas as pd
import xlsxwriter
from openpyxl import load_workbook
class ExcelFileHelper:
def __init__(self, file_name=None):
if not file_name:
self.file_name = os.path.join(os.path.dirname(__file__), 'my_excel.xlsx')
if not os.path.exists(self.file_name):
with xlsxwriter.Workbook(self.file_name) as f:
pass
print("File '{}' are created".format(self.file_name))
else:
self.file_name = file_name
self.xls = pd.ExcelFile(self.file_name)
self.writer = pd.ExcelWriter(self.file_name, engine='xlsxwriter')
self.openpyxl_writer = pd.ExcelWriter(self.file_name, engine='openpyxl')
def get_excel_sheets(self):
return self.xls.sheet_names
def read_excel_sheet(self, sheet_name): # sheet_name = None, means read whole excel 0 means read first sheet
return_list = []
df = pd.read_excel(self.xls, sheet_name)
# get all columns
# print(df.columns)
# get all columns name
# print(df.columns.values)
# get columns size
# print(df.columns.size)
# get rows size
# print(df.iloc[:, 0].size)
# get the row of one specific value
# print(df[df['ID'].isin([2])])
# get every columns header
for item in df:
temp_dict = dict()
temp_dict.setdefault(item, df[item].values)
return_list.append(temp_dict)
# get values for one column
# for i in df.index:
# print(df['ID'][i])
# put all of the columns in to a list and parse as you wish
# id = df['ID']
# test_name = df['TestName']
# results = df['Results']
return return_list
def write_excel_sheet(self, sheet_name, write_value_dict):
# Create a Pandas dataframe from the data.
df = pd.DataFrame(write_value_dict)
book = load_workbook(self.file_name)
self.openpyxl_writer.book = book
df.to_excel(self.openpyxl_writer, sheet_name)
self.openpyxl_writer.save()
self.openpyxl_writer.close()
def add_cloumns_values_to_sheet(self, sheet_name, column_name, column_value_list=None):
df = pd.read_excel(self.xls, sheet_name)
raw_size = df.iloc[:, 0].size
if column_value_list:
while len(column_value_list) < raw_size:
column_value_list.append('')
while len(column_value_list) > raw_size:
column_value_list = column_value_list[:raw_size]
print("Your column values truncated due to exceed the max length of existing columns ")
df[column_name] = column_value_list
df.to_excel(self.file_name)
def update_column_name(self, sheet_name, old_column_name, new_column_name):
df = pd.read_excel(self.xls, sheet_name)
df.columns = map((lambda x : new_column_name if x ==old_column_name else x), df.columns)
df.to_excel(self.file_name)
def add_row_values(self, sheet_name, row_value_dict, ignore_index=True):
df = pd.read_excel(self.xls, sheet_name)
df = df.append(row_value_dict, ignore_index)
book = load_workbook(self.file_name)
self.openpyxl_writer.book = book
df.to_excel(self.openpyxl_writer, sheet_name)
def update_column_values(self, sheet_name, column_name, old_value, new_value):
df = pd.read_excel(self.xls, sheet_name)
df[column_name].replace(old_value, new_value, inplace=True)
df.to_excel(self.file_name)
if __name__ == "__main__":
exl = ExcelFileHelper()
for item in exl.read_excel_sheet('Sheet1'):
print(item)
看看这个例子,跟以前读写Excel比起来,是不是优雅许多?
Python有很多优秀的第三方库等待着我们去发现,如果你们有比较好的实践,也可以告诉蔡老师 :)