2.1 使用gpadmin用户COPY数据到Greenplum中
2.2使用非gpadmin用户COPY数据到Greenplum中
$ cat source.txt
11111ddd22222
$ cat replaceScript.sh
#!/bin/sh
# 生成隐藏字符 soh=`echo 1 | awk '{printf("%c", $1)}'` replaceWord=$1
# 替换开始 sed -i 's/'$replaceWord'/'$soh'/g' $2
# 插入到greenplum数据库 cd /greenplum/soft/greenplum-db-5.8.0/bin ./psql -d ***** -h 192.168.***.*** -p 5432 -U gpadmin -c "COPY xiaoxu_test.xiaoxu_test FROM '/home/xiaoxu/gpload_test/$2' WITH csv DELIMITER E'\001' LOG ERRORS SEGMENT REJECT LIMIT 3000 ROWS";
标红的部分最好不用,这样可以确保数据的准确性。
E'\001' : 是ascii分隔符,详情请查看:https://blog.csdn.net/xfg0218/article/details/80901752
安装psql命令
#yum install postgresql -y
psql -d ***** -h 192.168.***.** -p 5432 -U gpadmin -c "\\COPY xiaoxu_test.xiaoxu_test FROM '/home/xiaoxu/gpload_test/$2' WITH csv DELIMITER E'\001' LOG ERRORS SEGMENT REJECT LIMIT 3000 ROWS";
注意\\的问题
$ sh psqlInsert.sh ddd source.txt COPY 1
ddd: 是分隔符
source.txt:带有分隔符的文件
http://postgresql.ru.net/manual/sql-copy.html
# vi copyToGreenplum.sh
#!bin/sh
# 获取为文件所在的位置 basepath=$(cd `dirname $0`; pwd)
# scheam名字 scheamname='xiaoxu_test'
# 表名字 tablename='enterprisebaseinfocollect_out_20180812'
# 数据库名字 gpdatabase='******'
# gp服务器ip gpip='192.168.*****.****'
#gp port gpport='5432'
# gp user gpuser='gpadmin'
# 需要插入的文件的名字 dirfilename='mv_enterprisebaseinfo_20180812.csv'
# 替换双引号与隐藏字符 # sh asciiReplaceScriptAll.sh
# gp copy 开始时间 gpinsert_time=`date +'%Y-%m-%d %H:%M:%S'`
echo -e "GP COPY 数据开始...."
# 执行插入操作 psql -d $gpdatabase -h $gpip -p $gpport -U $gpuser -c "COPY $scheamname.$tablename FROM '$basepath/$dirfilename' WITH csv DELIMITER E'\001'";
# gp copy 结束时间
gpinsert_endtime=`date +'%Y-%m-%d %H:%M:%S'` gpinsert_start_seconds=$(date --date="$gpinsert_time" +%s) gpinsert_ent_seconds=$(date --date="$gpinsert_endtime" +%s) time_of_use=$((end_seconds-start_seconds))
echo -e "\t $((time_of_use/3600)) h $((time_of_use%3600/60)) m $((time_of_use%3600%60)) s"
$ du -sh mv_enterprisebaseinfo_20180812.csv 130G mv_enterprisebaseinfo_20180812.csv
$ head -n 1 mv_enterprisebaseinfo_20180812.csv 310000645D0435BED45B39E0531ECDA8C0463F2018020413595200008546上海佐洽房地产经纪有限公司闵行第一分公司3101120018106392151K7032018-02-02 00:00:00200000150214137361900-01-01 00:00:002018-02-02 00:00:002018-02-02 00:00:0031011210房地产经纪,房地产信息咨询,商务信息咨询,设计、制作、代理、发布各类广告,市场信息咨询与调查(不得从事社调查、社会调研、民意调查、民意测验),物业管理,票务代理,会务服务,翻译服务,企业管理咨询,接受隶属企业委托从事相关业务,从事计算机科技领域内的技术开发、技术转让、技术咨询、技术务,办公用品、日用百货的销售。。【依法须经批准的项目,经相关部门批准后方可开展经营活动000000000001900-01-01 00:00:0002018-08-08 09:54:29.000000,E20180204,E20180205,E20180206,E20180207,E20180212,E20180718,E2015030410e635326b318d6881ca0214c0831e6e65P01MA1GBWMN991310112MA1GBWMN9972E3B618BF6B3365E0531ECDA8C05E49E20150304D9FC431BDDA2CF143DDB0A89F76036F7闵行区市场监督管理局1存续(在营、开业、在册)有限责任公司分公司自然人独资
$ sh copyToGreenplum.sh GP COPY 数据开始.... COPY 90700000 GP COPY 总耗时: 2 小时 32 分钟 8 秒
$ sh copyToGreenplum.sh GP COPY 数据开始.... o0COPY 90700000 GP COPY 总耗时: 2 小时 54 分钟 35 秒
$ sh copyToGreenplum.sh GP COPY 数据开始.... o0COPY 90700000 GP COPY 总耗时: 2 小时 46 分钟 35 秒
select count(*) from enterprisebaseinfocollect_out_20180812; 2,7210,0000 select pg_size_pretty(pg_relation_size('enterprisebaseinfocollect_out_20180812')) 137 GB
1 s_ext_nodenum varchar, 2 pripid varchar, 3 s_ext_sequence varchar, 4 entname varchar, 5 oriregno varchar, 6 regno varchar, 7 enttype varchar, 8 ppripid varchar, 9 pentname varchar, 10 pregno varchar, 11 hypotaxis varchar, 12 industryphy varchar, 13 industryco varchar, 14 abuitem varchar, 15 cbuitem varchar, 16 opfrom varchar, 17 opto varchar, 18 postalcode varchar, 19 tel varchar, 20 email varchar, 21 localadm varchar, 22 credlevel varchar, 23 assdate varchar, 24 esdate varchar, 25 apprdate varchar, 26 regorg varchar, 27 entcat varchar, 28 entstatus varchar, 29 regcap varchar, 30 opscope varchar, 31 opform varchar, 32 opscoandform varchar, 33 ptbusscope varchar, 34 domdistrict varchar, 35 dom varchar, 36 ecotecdevzone varchar, 37 domproright varchar, 38 oplocdistrict varchar, 39 oploc varchar, 40 reccap varchar, 41 insform varchar, 42 parnum varchar, 43 parform varchar, 44 exenum varchar, 45 empnum varchar, 46 sconform varchar, 47 forcapindcode varchar, 48 midpreindcode varchar, 49 protype varchar, 50 congro varchar, 51 congrocur varchar, 52 congrousd varchar, 53 regcapusd varchar, 54 regcapcur varchar, 55 regcaprmb varchar, 56 forregcapcur varchar, 57 forregcapusd varchar, 58 forreccapusd varchar, 59 worcap varchar, 60 chamecdate varchar, 61 opracttype varchar, 62 forentname varchar, 63 depincha varchar, 64 country varchar, 65 itemofoporcpro varchar, 66 conofcontrpro varchar, 67 fordom varchar, 68 forregecap varchar, 69 foropscope varchar, 70 s_ext_entproperty varchar, 71 s_ext_timestamp varchar, 72 s_ext_batch varchar, 73 s_ext_validflag varchar, 74 s_ext_induscat varchar, 75 s_ext_enttype varchar, 76 manacate varchar, 77 limparnum varchar, 78 foreignbodytype varchar, 79 entname_old varchar, 80 person_id varchar, 81 name varchar, 82 certype varchar, 83 ancheyear varchar, 84 candate varchar, 85 revdate varchar, 86 licid varchar, 87 credit_code varchar, 88 entid varchar, 89 jobid varchar, 90 tax_code varchar, 91 zspid varchar, 92 regorgdisplay varchar, 93 handle_type varchar , 94 is_new varchar , 95 countrydisplay varchar , 96 statusdisplay varchar , 97 typedisplay varchar, 98 regcapcurdisplay varchar
$ du -sh mv_enterprisebaseinfo_20180812.csv 130G mv_enterprisebaseinfo_20180812.csv
$ head -n 1 mv_enterprisebaseinfo_20180812.csv 310000645D0435BED45B39E0531ECDA8C0463F2018020413595200008546上海佐洽房地产经纪有限公司闵行第一分公司3101120018106392151K7032018-02-02 00:00:00200000150214137361900-01-01 00:00:002018-02-02 00:00:002018-02-02 00:00:0031011210房地产经纪,房地产信息咨询,商务信息咨询,设计、制作、代理、发布各类广告,市场信息咨询与调查(不得从事社调查、社会调研、民意调查、民意测验),物业管理,票务代理,会务服务,翻译服务,企业管理咨询,接受隶属企业委托从事相关业务,从事计算机科技领域内的技术开发、技术转让、技术咨询、技术务,办公用品、日用百货的销售。。【依法须经批准的项目,经相关部门批准后方可开展经营活动000000000001900-01-01 00:00:0002018-08-08 09:54:29.000000,E20180204,E20180205,E20180206,E20180207,E20180212,E20180718,E2015030410e635326b318d6881ca0214c0831e6e65P01MA1GBWMN991310112MA1GBWMN9972E3B618BF6B3365E0531ECDA8C05E49E20150304D9FC431BDDA2CF143DDB0A89F76036F7闵行区市场监督管理局1存续(在营、开业、在册)有限责任公司分公司自然人独资
$ time psql -d stagging -h 192.168.****.**** -p 5432 -U gpadmin -c "COPY xiaoxu.enterprisebaseinfocollect_out_20180814 FROM '/home/xiaoxu/greenplum-load-data/oracle-export-greenplum/mv_enterprisebaseinfo_20180812.csv' WITH csv DELIMITER E'\001' LOG ERRORS SEGMENT REJECT LIMIT 3000 ROWS" COPY 90700000
real 22m13.116s user 0m0.003s sys 0m0.002s
select pg_size_pretty(pg_relation_size('enterprisebaseinfocollect_out_20180812')) 137 GB
select count(*) from enterprisebaseinfocollect_out_20180812; 2,7210,0000
分布键详细信息
select gp_segment_id,count(1) from enterprisebaseinfocollect_out_20180814 group by 1;
1 s_ext_nodenum varchar, 2 pripid varchar, 3 s_ext_sequence varchar, 4 entname varchar, 5 oriregno varchar, 6 regno varchar, 7 enttype varchar, 8 ppripid varchar, 9 pentname varchar, 10 pregno varchar, 11 hypotaxis varchar, 12 industryphy varchar, 13 industryco varchar, 14 abuitem varchar, 15 cbuitem varchar, 16 opfrom varchar, 17 opto varchar, 18 postalcode varchar, 19 tel varchar, 20 email varchar, 21 localadm varchar, 22 credlevel varchar, 23 assdate varchar, 24 esdate varchar, 25 apprdate varchar, 26 regorg varchar, 27 entcat varchar, 28 entstatus varchar, 29 regcap varchar, 30 opscope varchar, 31 opform varchar, 32 opscoandform varchar, 33 ptbusscope varchar, 34 domdistrict varchar, 35 dom varchar, 36 ecotecdevzone varchar, 37 domproright varchar, 38 oplocdistrict varchar, 39 oploc varchar, 40 reccap varchar, 41 insform varchar, 42 parnum varchar, 43 parform varchar, 44 exenum varchar, 45 empnum varchar, 46 sconform varchar, 47 forcapindcode varchar, 48 midpreindcode varchar, 49 protype varchar, 50 congro varchar, 51 congrocur varchar, 52 congrousd varchar, 53 regcapusd varchar, 54 regcapcur varchar, 55 regcaprmb varchar, 56 forregcapcur varchar, 57 forregcapusd varchar, 58 forreccapusd varchar, 59 worcap varchar, 60 chamecdate varchar, 61 opracttype varchar, 62 forentname varchar, 63 depincha varchar, 64 country varchar, 65 itemofoporcpro varchar, 66 conofcontrpro varchar, 67 fordom varchar, 68 forregecap varchar, 69 foropscope varchar, 70 s_ext_entproperty varchar, 71 s_ext_timestamp varchar, 72 s_ext_batch varchar, 73 s_ext_validflag varchar, 74 s_ext_induscat varchar, 75 s_ext_enttype varchar, 76 manacate varchar, 77 limparnum varchar, 78 foreignbodytype varchar, 79 entname_old varchar, 80 person_id varchar, 81 name varchar, 82 certype varchar, 83 ancheyear varchar, 84 candate varchar, 85 revdate varchar, 86 licid varchar, 87 credit_code varchar, 88 entid varchar, 89 jobid varchar, 90 tax_code varchar, 91 zspid varchar, 92 regorgdisplay varchar, 93 handle_type varchar , 94 is_new varchar , 95 countrydisplay varchar , 96 statusdisplay varchar , 97 typedisplay varchar, 98 regcapcurdisplay varchar
$ psql -d **** -h 192.168.*****.**** -p 5432 -U gpadmin -c "copy datafix.xiaoxu_df_entname to '/home/xiaoxu/shujudingzheng/data-dir/entname.csv' WITH DELIMITER AS E'\u0001' " COPY 11807
$ head -n 3 entname.csv 11000020e38b8c4353938c014370906a7e0004中赣核(北京)投资发展有限公司 440000440125125021998031700074许村龙渡电镀厂 440000P-4da509af-be89-4ef8-b24a-4a4e97a96b57海宁市许巷乡郭湾村房屋装潢服务部
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。