前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >Shell遍历HDFS路径统计层级目录大小

Shell遍历HDFS路径统计层级目录大小

原创
作者头像
857技术社区
发布2024-01-10 10:23:25
1460
发布2024-01-10 10:23:25
举报
文章被收录于专栏:857-Bigdata857-Bigdata
代码语言:shell
复制
#!/bin/bash 
 
workdir=$(cd $(dirname $0); pwd)
date=`date +%Y-%m-%d-%H:%M:%S`
 
init(){
    rm -rf $workdir/hdfs_detail.txt
    touch $workdir/hdfs_detail.txt
    chmod 777 $workdir/hdfs_detail.txt
    echo "[Init Time]:$date" >> $workdir/hdfs_detail.txt
    echo "--" >> $workdir/hdfs_detail.txt
    echo "--" >> $workdir/hdfs_detail.txt
 
}
hdfs_collect(){
    echo "                                        ----[ 汇总数据 ]----                                                " >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
    echo "|    总量    |   当前目录   |" >> $workdir/hdfs_detail.txt
    hadoop fs -du / |sort -r -n| awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2);}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2);}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2);}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
}
 
hdfs_detail(){
    echo "                                       ----[ 明细数据 ]----                                                " >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
   #大于1T
   #hdfs1=`hadoop fs -du / |awk '{if($1 >1099511627776 && $2 != "/spark2-history"){print $2}}' >> $workdir/hdfsfirst.txt`
 
   for first in `cat $workdir/hdfsfirst.txt`;
   do
       hadoop fs -du  $first  |sort $1 -r -n |awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2);}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2);}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2);}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
   done
   for second in `cat $workdir/hdfsfirst.txt`;
   do
      #大于80G
    #  hadoop fs -du $second |awk '{if($1 >85899345920){print $2}}' >> $workdir/hdfssecond.txt
      hadoop fs -du  $second  |sort $1 -r -n |awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2);}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2);}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2);}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
   done
   for third in `cat $workdir/hdfssecond.txt`;
   do
      #大于50G
      hadoop fs -du $third  |sort $1 -r -n |awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2);}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2);}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2);}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
   done
:<<!
   for line in $hdfs1;
   do
            hadoop fs -du $line |sort -r -n | awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2,"'$line'");}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2,"'$line'");}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2,"'$line'");}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2,"'$line'");}}}}'|head -10 >> $workdir/hdfs_detail.txt
        for line1 in $hdfs2;
        do
          hadoop fs -du $line1 |sort -r -n | awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2,"'$line1'");}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2,"'$line1'");}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2,"'$line1'");}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2,"'$line1'");}}}}'|head -10 >> $workdir/hdfs_detail.txt
           for line2 in $hdfs3;
            do
                hadoop fs -du $line2  |sort -r -n | awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2,"'$line2'");}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2,"'$line2'");}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2,"'$line2'");}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2,"'$line2'");}}}}'|head -10 >> $workdir/hdfs_detail.txt
            done
        done
       echo "" >> $workdir/hdfs_detail.txt     
   done
    rm -rf $workdir/hdfsfirst.txt
    rm -rf $workdir/hdfssecond.txt
    rm -rf $workdir/hdfsthird.txt
!
}
init
hdfs_collect
hdfs_detail
echo "SUCCESS"

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
相关产品与服务
大数据
全栈大数据产品,面向海量数据场景,帮助您 “智理无数,心中有数”!
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档