shell脚本是一条执行完再执行下一条的。
for i in $(seq 10) for循环
for i in $(seq 10);do
sh ./test.sh
if [ $? -eq 0 ];then
echo "ok"
else
echo "flase"
fi
sh ./testt.sh
done
shell产生测试数据
#/bin/bash
rm -rf ./data1.txt
touch data1.txt
for((i=0;i<400000000;i++))
do
str1=',name';
str2=',addr';
str3=',phone';
str4=',tel';
str5=',mail_addr';
str6=',bank_card_num';
str7=',unit_num';
str8=',over_due';
str9=',flag';
name=${i}${str1}${i}${str2}${i}${str3}${i}${str4}${i}${str5}${i}${str6}${i}${str7}${i}${str8}${i}${str9}${i}
echo $name>>data1.txt
done
echo 'success!'
shell连接hive
#!/bin/bin
source /etc/profile
#/opt/cloudera/parcels/CDH-5.4.3-1.cdh5.4.3.p0.6/lib/hive<<EOF
hive<<EOF
use testdp; #就是hive语句,该怎么写就怎么写。
show tables;
EOF
exit;
day=(date−d‘−0day′‘+echo“(date−d‘−0day′‘+echo“(date -d ‘-0 day’ ‘+%Y-%m-%d’) echo “day” day1=$(date -d ‘-0 day’ ‘+%Y’) echo “**$day1”
shell连hive可以得到hive执行语句返回的结果
#/usr/bin
hive -e 'SHOW DATABASES'
d=$?
echo "$d"
if [ $d -eq 0 ];then
echo "echo"
fi
查询hadoop中hive表的大小 hadoop fs -dus /user/hive/warehouse/raw_datas.db/*20151009|awk ‘{sum+= $1};END{print sum}’ byte/1024 =kb
查询某文件中包括某个字段的行数:
grep -r ugi=hue hadoop-cmf-hive-HIVEMETASTORE-slave4.log.out|grep 2015-10-16|wc -l
shell产生一定大小的测试数据
#!/bin/sh
a="$(date +%s)"
echo "开始时间:$a"
if [ -e /root/test111 ]
then
while [ true ]
do
{
size=`ls -l /root/test11|awk '{print $5}'`
# echo "$size"
if [ $size -lt 10737418240 ]
then
{
echo "Hello wolrd!" >>/root/test11
}
else {
echo "完成"
break;
}
fi
}&#特别是要注意:无限循环的产生线程,会占满cpu
done
fi
b="$(date +%s)"
echo "结束时间:$b"
#let d=$b-$a
#echo "时间:$d"
exit 0
模拟多个线程:
for i in $(seq 100);do
{ hadoop fs -put ./test /test
if [ $? -eq 0 ];then
echo "ok"
else
echo "flase"
fi
}&
done
wait
echo "全部上传完毕!"
exit 0
删除后台一直在运行的shell(删掉之后就不能再重启了) ps -ef |grep test.sh ps axu|grep test.sh
shell之while日期
a="$(date +%s)"
echo "$a"
n=0
while [ $n -lt 1000 ]
do
n=`expr $n + 1`
echo "$n"
done
b="$(date +%s)"
echo "$b"
let c=$b-$a
echo "$c"ongoing
shell中拼接字符串:
#!/bin/sh
for i in $(seq 10);do
{
hadoop fs -mkdir /"test""$i"
}
done
exit 0
#!/bin/sh
for i in $(seq 100);do
{
hadoop fs -put ./test /"test""$i"
echo "$i"
}&
done
exit 0
比较两个数不相等 特别是在判断上一个程序的执行状况时,要用不等于0,因为非正常退出是不等于0。
#/usr/bin
a=0
if [ 1 -ne 0 ];then
echo 's'
fi
查找某个字段: grep -i “2015-11-30 17:36:05.343” ./000000_0
对比两个文件,将相同的部分重定向到另外一个文件
#!/bin/bash
file=$1
sourcefile=$2
outPath=$3
while read line
do
echo $line
grep "$line" $2 >>$3
done < $file
每天跑批从hdfs上将数据重定向到一文件
#!/bin/bash
source /etc/profile
#存放文件的目录
path1=/home/datacenter/txanyihua/
#get表的目录
path=/home/datacenter/txanyihua/blacklist/
rm -rf "$path1"/blacklist*
hadoop fs -get /user/hive/warehouse/appq.db/blacklist "$path1"
i=0
for file2 in `ls -a $path`
do
if [ x"$file2" != x"." -a x"$file2" != x".." ];then
if [ -f "$path$file2" ];then
let i=$i+1
echo "$path$file2"
mv "$path$file2" "$path1"/blacklist_tmp$i
cat "$path$file2" >> "$path1"/blacklist_tmp
fi
fi
done
if [ $? -ne 0 ];then
exit 1
fi
时间操作:
#!/bin/bash
if [ $# -eq 1 ];then
n_date=`date -d $1 +"%Y%m%d"`
echo "$n_date +o"
else
n_date=`date -d yesterday +"%Y%m%d"`
echo "$n_date +o0"
fi
遍历文件
for file in ./*
do
if test -f $file
then
echo $file 是文件
else
echo $file 是目录
fi
done
是否包含某个字符串
if [[ $file =~ $strA ]]
then
echo `hadoop fs -ls "$file"/chain_status=active`
fi
将数据上传至hdfs
for i in {19978..432000};
do
cp date\=2016-11/part date\=2016-11/part-00000$i
echo $i
hadoop fs -put date\=2016-11/part-00000$i /user/hive/warehouse/scan_details/ecodeid=xmht01/date=2016-11 &
# rm -rf date\=2016-11/part-00000*
sleep 2s
done
获取当前目录
FWDIR="$(cd `dirname $0`/..; pwd)"
nohup sh $FWDIR/bin/scanSpark.sh >>$FWDIR/log/scanSpark.log>&1 &(>/dev/null 2>&1 &
)
history显示时间
export HISTTIMEFORMAT="%F %T `whoami` "
某条命令的执行时间及耗时
/usr/bin
export HADOOP_USER_NAME=hdfs
startTime=$(date +%s)
sleep 2
endTime=$(date +%s)
time=$(($endTime-$startTime))
echo $time
echo "$(date +%Y-%m-%d%t%H:%M:%S) ----> tbl_tc_tmpfileassign --->$time s" >>./sqoop.log
echo over
远程ssh执行命令并退出
for kylinIp in cdh02 cdh03 cdh04
do
echo $kylinIp
ssh $kylinIp > /dev/null 2>&1 << eeooff
touch abcdefg.txt
echo `/sbin/ifconfig|sed -n '/inet addr/s/^[^:]*:\([0-9.]\{7,15\}\) .*/\1/p'|awk 'NR==1{print}'`
exit
eeooff
echo done!
# echo `/sbin/ifconfig|sed -n '/inet addr/s/^[^:]*:\([0-9.]\{7,15\}\) .*/\1/p'|awk 'NR==1{print}'`
done
后台启动
nohup spark2-submit --master yarn-client --jars $(echo sparklib/*.jar | tr ' ' ',') --class com.ishansong.bigdata.Infos ./dynamic-premium-1.0-SNAPSHOT.jar > /dev/null 2>&1 &