前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >原 高效读取Excel

原 高效读取Excel

作者头像
魂祭心
发布2018-05-17 16:02:53
7960
发布2018-05-17 16:02:53
举报
文章被收录于专栏:魂祭心

原理 直接解析excel源文件,仅仅读取关心的数据,样式,公式,合并等等统统丢弃。

代码语言:javascript
复制
public class Option
{
    /// <summary>
    /// 开始行
    /// </summary>
    public int StartRow { get; set; }
    /// <summary>
    /// 结束行
    /// </summary>
    public int EndRow { get; set; }
    public Option()
    {
        StartRow = 0;
        EndRow = int.MaxValue;
    }
}
public class ExcelDataParser
{
    #region Field
    private Option option;
    #endregion

    #region Constructor
    public ExcelDataParser(Option option)
    {
        this.option = option;
    }
    #endregion

    #region Public Method
    /// <summary>
    /// 解析sheet数据
    /// </summary>
    /// <param name="xmlPath"></param>
    /// <returns></returns>
    public DataSet ParserSheets(string xmlPath)
    {
        DataSet ds = new DataSet();
        using (ZipArchive zip = ZipFile.OpenRead(xmlPath))
        {
            var zipEntry = zip.GetEntry(@"xl/sharedStrings.xml");
            var map = new Dictionary<int, string>();
            XmlTextReader readerXml = null;
            if (zipEntry != null)
            {
                readerXml = new XmlTextReader(zipEntry.Open());
                map = GetShareString(readerXml);
            }


            zipEntry = zip.GetEntry(@"xl/workbook.xml");
            readerXml = new XmlTextReader(zipEntry.Open());
            var sheetNames = GetSheetName(readerXml);
            foreach (var sheetName in sheetNames)
            {
                zipEntry = zip.GetEntry(@"xl/worksheets/sheet" + sheetName.Key + ".xml");
                readerXml = new XmlTextReader(zipEntry.Open());
                var table = ParserSheetData(readerXml, map);
                table.TableName = sheetName.Value;
                ds.Tables.Add(table);
            }
        }
        return ds;
    }
    #endregion

    #region Private Method
    /// <summary>
    /// 解析数据
    /// </summary>
    /// <param name="readerXml"></param>
    /// <param name="strMap"></param>
    /// <returns></returns>
    DataTable ParserSheetData(XmlTextReader readerXml, Dictionary<int, string> strMap)
    {
        DataTable dt = new DataTable();
        int row = -1;
        int column = 0;
        DataRow dataRow = null;
        while (readerXml.Read())
        {
            if (row > option.EndRow) break;
            if (readerXml.NodeType == XmlNodeType.Element)
            {
                if (readerXml.Name == "row")
                {
                    row++;
                    column = 0;
                    if (dt.Columns.Count > 0)
                    {
                        dataRow = dt.NewRow();
                        dt.Rows.Add(dataRow);
                        for (; row <= option.StartRow; row++)
                        {
                            readerXml.ReadToNextSibling("row");
                        }
                    }
                }
                else if (readerXml.Name == "v")
                {
                    string data = "";
                    if (string.IsNullOrEmpty(readerXml.GetAttribute("t")))
                    {
                        data = readerXml.ReadInnerXml();
                    }
                    else
                    {
                        var strIndex = int.Parse(readerXml.ReadInnerXml());
                        data = strMap.ContainsKey(strIndex) ? strMap[strIndex] : strIndex.ToString();
                    }

                    if (row == 0)
                    {
                        dt.Columns.Add(new DataColumn(data));
                    }
                    else
                    {
                        dataRow[column] = data;
                    }
                    column++;
                }
                else if (readerXml.Name == "t")
                {//直接取值
                    var data = readerXml.ReadInnerXml();
                    if (row == 0)
                    {
                        dt.Columns.Add(new DataColumn(data));
                    }
                    else
                    {
                        dataRow[column] = data;
                    }
                    column++;
                }
            }
        }
        return dt;
    }

    /// <summary>
    /// 字符串映射的map
    /// </summary>
    /// <param name="readerXml"></param>
    /// <returns></returns>
    Dictionary<int, string> GetShareString(XmlTextReader readerXml)
    {
        var index = 0;
        Dictionary<int, string> strMap = new Dictionary<int, string>();
        while (readerXml.Read())
        {
            if (readerXml.NodeType == XmlNodeType.Element)
            {
                if (readerXml.Name == "t")
                {
                    strMap.Add(index, readerXml.ReadInnerXml());
                    index++;
                }
            }
        }
        return strMap;
    }

    /// <summary>
    /// 获取sheet序号和名称的映射
    /// </summary>
    /// <param name="readerXml"></param>
    /// <returns></returns>
    Dictionary<int, string> GetSheetName(XmlTextReader readerXml)
    {
        Dictionary<int, string> sheetMap = new Dictionary<int, string>();
        while (readerXml.Read())
        {
            if (readerXml.NodeType == XmlNodeType.Element)
            {
                if (readerXml.Name == "sheet")
                {
                    var id = readerXml.GetAttribute("sheetId");
                    var name = readerXml.GetAttribute("name");
                    sheetMap.Add(int.Parse(id), name);
                }
            }
        }
        return sheetMap;
    }

    #endregion
}

测试结果 excel规模:100w*24 读取进DataTable 测试结果:时间约1分半,内存峰值1.6g

输入图片说明
输入图片说明
本文参与 腾讯云自媒体同步曝光计划,分享自作者个人站点/博客。
如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档