Oracle 快速卸载数据到文本文件

用户1148526

发布于 2019-05-25 19:47:44

1.1K0

发布于 2019-05-25 19:47:44

文章被收录于专栏：Hadoop数据仓库

一、需求

有个需求要从oracle表里导出数据，存成csv文本文件。数据量有4亿多行、25g。最普通的解决方案是在sql*plus使用spool。尽管该方案在某些情况下可行，但它的速度太慢，输出大约每秒1m字节，全部导出需要7个多小时，这是不可接受的，需要快速导出数据。

二、解决方案

下面的核心代码出自adrian billington。自定义函数使用utl_file包输出数据，并且使用pipeline函数并行输出。使用这种方案的好处是:

它是很简单的sql，无需大量的sql*plus命令，不用指定行尺寸或on/off切换
因为它是sql，所以可以从几乎任何地方执行它，甚至可以插入到pl/sql里
它既有sql执行结果的内部日志，也有可选的外部unix日志文件
它很快，如果使用并行，可以到达很高的速度（我的并行度设置为10，可以达到每秒50m）

三、实现代码

-- 建立目录  
create or replace directory "mydir" as '/home/oracle/';  
  
-- 建立对象  
create or replace type dump_ot as object  
(  
   file_name varchar2 (128),  
   directory_name varchar2 (128),  
   no_records number,  
   session_id number,  
   start_dttm timestamp with time zone,  
   end_dttm timestamp with time zone  
);  
  
-- 建立对象表  
create or replace type dump_ntt as table of dump_ot;  
  
-- 建立函数  
create or replace function data_unload (  
   p_source            in sys_refcursor,  
   p_filename          in varchar2,  
   p_directory         in varchar2,  
   p_unique_filename   in varchar2 default 'n',  
   p_create_log_file   in varchar2 default 'n')  
   return dump_ntt  
   pipelined  
   parallel_enable(partition p_source by any)  
as  
   /*  
   p_source           sql query you spool  
   p_filename         targe file name  
   p_directory        targe oracle directory  
   p_unique_filename  create unique file name? y/n (appends unique sid to p_filename) - useful only for parallel unload  
   p_create_log_file  create log file? y/n (creates separate log file and logs every 1mm rows) - has very small performance hit on the spool  
   */  
  
   type row_ntt is table of varchar2 (32767);  
  
   v_rows                 row_ntt;  
   v_file                 utl_file.file_type;  
   v_log_file             utl_file.file_type;  
   v_buffer               varchar2 (32767);  
   v_sid                  varchar (255);  
   v_name                 varchar2 (255);  
   v_lines                pls_integer := 0;  
   v_start_dttm           timestamp with time zone := systimestamp;  
   v_end_dttm             timestamp with time zone;  
   v_create_log           boolean := false;  
  
   c_eol         constant varchar2 (1) := chr (10);  
   c_eollen      constant pls_integer := lengthb (c_eol);  
   c_maxline     constant pls_integer := 32767;  
   c_log_limit   constant pls_integer := 1000000;  
begin  
   v_sid := lpad (sys_context ('userenv', 'sid'), 10, '0');  
   v_name := p_filename;  
  
   if trim (upper (p_create_log_file)) = 'y'  
   then  
      v_create_log := true;  
   end if;  
  
   -- add sid (must be used for parallel spooling, single spooling or spooling across db-link creates alsway one file)  
   if upper (p_unique_filename) = 'y'  
   then  
      v_name := v_name || '_' || to_char (v_sid);  
   end if;  
  
   v_file :=  
      utl_file.fopen (p_directory,  
                      v_name,  
                      'w',  
                      c_maxline);  
  
   if v_create_log  
   then  
      v_log_file :=  
         utl_file.fopen (p_directory,  
                         v_name || '.log',  
                         'w',  
                         c_maxline);  
      utl_file.put_line (  
         v_log_file,  
         to_char (v_start_dttm, 'yyyy-mm-dd hh24:mi:ss:ff3') || ' --> start');  
      utl_file.fflush (v_log_file);  
   end if;  
  
   loop  
      fetch p_source  
         bulk collect into v_rows  
         limit 10000;  
  
      for i in 1 .. v_rows.count  
      loop  
         if lengthb (v_buffer) + c_eollen + lengthb (v_rows (i)) <= c_maxline  
         then  
            v_buffer := v_buffer || c_eol || v_rows (i);  
         else  
            if v_buffer is not null  
            then  
               utl_file.put_line (v_file, v_buffer);  
            end if;  
  
            v_buffer := v_rows (i);  
         end if;  
      end loop;  
  
      v_lines := v_lines + v_rows.count;  
  
      if v_create_log and mod (v_lines, c_log_limit) = 0  
      then  
         utl_file.put_line (  
            v_log_file,  
               to_char (systimestamp, 'yyyy-mm-dd hh24:mi:ss:ff3')  
            || ' --> '  
            || v_lines);  
         utl_file.fflush (v_log_file);  
      end if;  
  
      exit when p_source%notfound;  
   end loop;  
  
   close p_source;  
  
   utl_file.put_line (v_file, v_buffer);  
   utl_file.fclose (v_file);  
   v_end_dttm := systimestamp;  
  
   if v_create_log  
   then  
      utl_file.put_line (  
         v_log_file,  
         to_char (v_end_dttm, 'yyyy-mm-dd hh24:mi:ss:ff3') || ' --> ' || v_lines);  
      utl_file.put_line (  
         v_log_file,  
         to_char (v_end_dttm, 'yyyy-mm-dd hh24:mi:ss:ff3') || ' --> end');  
      utl_file.fclose (v_log_file);  
   end if;  
  
   pipe row (dump_ot (v_name,  
                      p_directory,  
                      v_lines,  
                      v_sid,  
                      v_start_dttm,  
                      v_end_dttm));  
   return;  
end;  
/  
  
-- 显示执行时间  
set timing on;  
  
-- 调用函数，并行生成10个csv文件  
select *  
  from table (  
          data_unload (  
             cursor (  
                select /*+ parallel(u,10) */  
                      log_id  
                       || ','  
                       || typeid  
                       || ','  
                       || to_id  
                       || ','  
                       || to_msg  
                       || ','  
                       || userid  
                       || ','  
                       || nickname  
                       || ','  
                       || showing  
                       || ','  
                       || fromip  
                       || ','  
                       || to_char (createtime, 'yyyy-mm-dd hh24:mi:ss')  
                  from u1.user_visit u),  
             'a.txt',  
             'mydir',  
             'y',  
             'n'));

查询输出：

a.txt_0000000677 mydir40583785

67714-aug-15 08.53.20.648485 am +08:00

14-aug-15 09.01.00.522882 am +08:00

a.txt_0000001407 mydir40156213

140714-aug-15 08.53.20.648505 am +08:00

14-aug-15 09.01.02.291799 am +08:00

a.txt_0000000703 mydir40919430

70314-aug-15 08.53.20.648190 am +08:00

14-aug-15 09.01.02.304130 am +08:00

a.txt_0000001535 mydir40475246

153514-aug-15 08.53.20.648586 am +08:00

14-aug-15 09.01.02.322707 am +08:00

a.txt_0000000555 mydir40651496

55514-aug-15 08.53.20.648141 am +08:00

14-aug-15 09.01.03.079116 am +08:00

a.txt_0000001314 mydir40591175

131414-aug-15 08.53.20.648177 am +08:00

14-aug-15 09.01.03.333193 am +08:00

a.txt_0000000506 mydir41898539

50614-aug-15 08.53.20.648098 am +08:00

14-aug-15 09.01.05.103469 am +08:00

a.txt_0000001436 mydir41175847

143614-aug-15 08.53.20.648141 am +08:00

14-aug-15 09.01.08.538071 am +08:00

a.txt_0000000445 mydir41817006

44514-aug-15 08.53.20.648177 am +08:00

14-aug-15 09.01.11.453815 am +08:00

a.txt_0000000518 mydir42811066

51814-aug-15 08.53.20.648097 am +08:00

14-aug-15 09.01.16.936903 am +08:00

elapsed: 00:07:56.41

执行结果：

411079803行、25g数据导出成10个csv文本文件，用时7分56秒。

参考：

oracle fast parallel data unload into ascii file(s)

本文参与腾讯云自媒体同步曝光计划，分享自作者个人站点/博客。

原始发表：2016年12月27日，如有侵权请联系 cloudcommunity@tencent.com 删除

sql

oracle

文件存储

本文分享自作者个人站点/博客前往查看

如有侵权，请联系 cloudcommunity@tencent.com 删除。

本文参与腾讯云自媒体同步曝光计划，欢迎热爱写作的你一起参与！

sql

oracle

文件存储

登录后参与评论

0 条评论

热度

Oracle 快速卸载数据到文本文件

Oracle 快速卸载数据到文本文件

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐