前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >(新手)使用pandas操作EXCEL

(新手)使用pandas操作EXCEL

作者头像
py3study
发布2020-01-19 17:23:07
1.1K0
发布2020-01-19 17:23:07
举报
文章被收录于专栏:python3python3
代码语言:javascript
复制
import pandas as pd
import numpy as np
from pandas import DataFrame,Series
#path = r'C:\Users\tsl\Desktop\数据.xlsx'
#一列数据前面添加字符串
def add_C(village_data):
    village_data['电话'] = ['C%s' % y for y in village_data['电话']]
    return village_data['电话']
#读取excel
data = pd.read_excel(r'C:\Users\tsl\Desktop\数据.xlsx')
#判定某列中是否有null,如果有删除null 行
if data['电话'].isnull().any():
#将excel里面空值修改
    data['电话'] = data['电话'].fillna('999')
#得到999值的索引室号
    data_index = data[data.电话=='999'].index.tolist()
#删除
    data = data.drop(data_index)
#以街道分组
group_by_name = data.groupby('街道')
#需要groups得出具体结果
for i in group_by_name.groups:
    village_data = data.loc[data['街道'] == i ]
    add_C(village_data)
  #以楼栋分组
    group_by_name_build = village_data.groupby('楼栋')
    for build_name in group_by_name_build.groups:
        build_data = village_data.loc[village_data['楼栋'] == build_name]
        build_data = build_data.drop_duplicates(subset='室号', keep='first')
      #更改列的数据类型
        build_data['室号'] = build_data['室号'].astype(np.str)
        row_index = 0
        for house_num in build_data['室号']:
            if '-' in house_num:
                house_num = house_num.replace('-','9') #并没有修改原内存地址的值,固需要赋值给原有内存地址
            #选区某行某列并赋值
                build_data.iat[row_index,2] = house_num
            row_index +=1
        del build_data['街道']
        del build_data['楼栋']
        #修改列名
        build_data.columns = ['用户编码','指定开门','电话号码']
        build_data['指定开门'] = '双门'
        build_data_row_index = 0
        for modify_num in build_data['用户编码']:
            if len(modify_num) < 4:
                modify_num = '0' + modify_num   #并没有修改原内存地址的值,固需要赋值给原有内存地址
                build_data.iat[build_data_row_index,0] = modify_num
            build_data_row_index +=1
#保存/生成新的excle
        DataFrame(build_data).to_excel('C:\\Users\\tsl\Desktop\info\%s.xls' % (build_name),sheet_name='Sheet1',index=False,header=True )

##优化后
代码语言:javascript
复制
import pandas as pd
import numpy as np
from pandas import DataFrame,Series
#读取excel
#path = r'C:\Users\tsl\Desktop\数据.xlsx'
def Build_data(build_data):
    #一个有索引和行内容的迭代器
    for data_line_index,data_line in build_data.iterrows():
            if '-' in data_line['用户编码']:
                data_line['用户编码'] = data_line['用户编码'].replace('-','9')
                # house_num = house
            if len(data_line['用户编码'])<4:
                data_line['用户编码'] = '0'+ data_line['用户编码']
            data_line['电话号码'] = data_line['电话号码'][:-2]
    return build_data
def add_C(village_data):
    village_data['电话'] = ['C%s' % y for y in village_data['电话']]
    return village_data['电话']
data = pd.read_excel(r'C:\Users\tsl\Desktop\数据.xlsx')
data_index = 0
#判定列中是否有null,如果有删除null 行
if data['电话'].isnull().any():
#将excel里面空值修改
    data['电话'] = data['电话'].fillna('999')
#得到999值的索引室号
    data_index_1 = data[data.电话=='999'].index.tolist()
#删除
    data = data.drop(data_index_1)
#已街道分组
group_by_name = data.groupby('街道')
#需要groups得出具体结果
for i in group_by_name.groups:
    village_data = data.loc[data['街道'] == i ]
    add_C(village_data)
    group_by_name_build = village_data.groupby('楼栋')
    for build_name in group_by_name_build.groups:
        build_data = village_data.loc[village_data['楼栋'] == build_name]
        build_data = build_data.drop_duplicates(subset='室号', keep='first')
        del build_data['街道']
        del build_data['楼栋']
        #修改列名
        build_data.columns = ['用户编码','指定开门','电话号码']
        build_data['指定开门'] = '双门'
        build_data  = build_data.astype(np.str)
        build_data = Build_data(build_data)
        DataFrame(build_data).to_excel('C:\\Users\\tsl\Desktop\info\%s.xls' % (build_name),sheet_name='Sheet1',index=False,header=True )
本文参与 腾讯云自媒体分享计划,分享自作者个人站点/博客。
原始发表:2019-05-02 ,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档