awk是一门解释性文本处理语言,它在文本处理领域中非常强大和方便。awk有三个主要的类型是:
它一般和sed一起处理文件日志等等,我们先看一下它的工作流是如何工作的:
由图看出,awk主要由这下面几部分组成:
awk程序结构主要由BEGIN、BODY和END这三部分组成。
为了顺利的学习awk各种淫技需要准备如下数据集,分别为stat.txt和score.txt。写入如下数据:
netstat -a >> stat.txt
cat stat.txt
Proto Recv-Q Send-Q Local-Address Foreign-Address State
tcp4 0 0 192.168.1.100.56710 223.6.251.45.http LAST_ACK
tcp4 0 0 192.168.1.100.56709 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56708 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56707 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56706 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56705 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56704 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56703 111.202.114.77.https ESTABLISHED
tcp4 0 0 192.168.1.100.56702 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56701 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56694 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56690 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56686 180.76.22.33.https ESTABLISHED
tcp4 0 0 192.168.1.100.56682 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56681 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56678 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56676 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56675 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56674 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56673 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56672 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56671 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56654 124.193.165.230.https ESTABLISHED
tcp4 0 0 192.168.1.100.56652 ti-in-f100.1e100.https SYN_SENT
tcp4 0 0 192.168.1.100.56650 123.58.182.251.https ESTABLISHED
tcp4 0 0 192.168.1.100.56649 17.188.132.72.5223 ESTABLISHED
tcp4 0 0 192.168.1.100.56643 17.252.236.158.5223 ESTABLISHED
tcp4 0 0 192.168.1.100.56620 melpa-3.milkbox..https ESTABLISHED
tcp4 0 37 192.168.1.100.56605 melpa-stable-2.m.https LAST_ACK
tcp4 0 263 192.168.1.100.56593 melpa-3.milkbox..https FIN_WAIT_1
tcp4 0 0 localhost.d-data *.* LISTEN
tcp4 0 0 localhost.corelccam *.* LISTEN
tcp4 0 0 192.168.1.100.56348 121.51.36.139.https ESTABLISHED
tcp4 0 0 192.168.1.100.54803 melpa-stable-2.m.https ESTABLISHED
再填充score.txt,如下:
nano score.txt
cat score.txt
Marry 2143 78 84 77
Jack 2321 66 78 45
Tom 2122 48 77 71
Mike 2537 87 97 95
Bob 2415 40 57 62
1. 打印所有数据
**awk按照行进行输出和处理**
awk "{print}" stat.txt
Proto Recv-Q Send-Q Local-Address Foreign-Address State
tcp4 0 0 192.168.1.100.56710 223.6.251.45.http LAST_ACK
tcp4 0 0 192.168.1.100.56709 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56708 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56707 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56706 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56705 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56704 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56703 111.202.114.77.https ESTABLISHED
tcp4 0 0 192.168.1.100.56702 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56701 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56694 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56690 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56686 180.76.22.33.https ESTABLISHED
tcp4 0 0 192.168.1.100.56682 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56681 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56678 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56676 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56675 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56674 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56673 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56672 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56671 93-46-8-89.ip105.https SYN_SENT
tcp4 0 0 192.168.1.100.56654 124.193.165.230.https ESTABLISHED
tcp4 0 0 192.168.1.100.56652 ti-in-f100.1e100.https SYN_SENT
tcp4 0 0 192.168.1.100.56650 123.58.182.251.https ESTABLISHED
tcp4 0 0 192.168.1.100.56649 17.188.132.72.5223 ESTABLISHED
tcp4 0 0 192.168.1.100.56643 17.252.236.158.5223 ESTABLISHED
tcp4 0 0 192.168.1.100.56620 melpa-3.milkbox..https ESTABLISHED
tcp4 0 37 192.168.1.100.56605 melpa-stable-2.m.https LAST_ACK
tcp4 0 263 192.168.1.100.56593 melpa-3.milkbox..https FIN_WAIT_1
tcp4 0 0 localhost.d-data *.* LISTEN
tcp4 0 0 localhost.corelccam *.* LISTEN
tcp4 0 0 192.168.1.100.56348 121.51.36.139.https ESTABLISHED
tcp4 0 0 192.168.1.100.54803 melpa-stable-2.m.https ESTABLISHED
2.打印第一列和第三列数据
**awk采用$1,$2,$3...$6...代表需要打印的行数**
awk '{print $1,$3}' stat.txt
Proto Send-Q
tcp4 0
tcp4 0
tcp4 0
tcp4 0
tcp4 0
tcp4 0
tcp4 0
tcp4 0
tcp4 0
tcp4 0
.....
.....
3.格式化输出
**awk采用printf对需要处理的数据进行格式化打印,printf的控制和c语言的占位符控制基本上是兼容的。**
#printf和c语言差别不是很大,还可以对齐。
awk '{printf "%-8s %-8s %-8s %-18s %-22s %-15s\n",$1,$2,$3,$4,$5,$6}'
Proto Recv-Q Send-Q Local-Address Foreign-Address State
tcp4 0 0 192.168.1.100.56710 223.6.251.45.http LAST_ACK
tcp4 0 0 192.168.1.100.56709 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56708 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56707 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56706 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56705 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56704 223.6.248.220.http LAST_ACK
tcp4 0 0 192.168.1.100.56703 111.202.114.77.https ESTABLISHED
tcp4 0 0 192.168.1.100.56702 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56701 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56694 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56690 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56686 180.76.22.33.https ESTABLISHED
tcp4 0 0 192.168.1.100.56682 119.75.217.109.https ESTABLISHED
tcp4 0 0 192.168.1.100.56681 119.75.217.109.https ESTABLISHED
......
.
.
.
.
.
.
4.过滤记录
**awk采用== 、!=, >, <, >=, <=、||和&&等常用的运算表达式**
#模式筛选第三列为0,第六列等于“LISTEN”的数据行。
awk '$3==0 && $6=="LISTEN" ' stat.txt
tcp4 0 0 localhost.d-data *.* LISTEN
tcp4 0 0 localhost.corelccam *.* LISTEN
#如果想引入表头,可以通过awk的内置参数**NR**来实现。
awk '$3==0 && $6=="LISTEN" || NR==1 ' stat.txt
Proto Recv-Q Send-Q Local-Address Foreign-Address State
tcp4 0 0 localhost.d-data *.* LISTEN
tcp4 0 0 localhost.corelccam *.* LISTEN
#如果还想格式化输出,可以和前面的**printf**来实现。
awk '$3==0 && $6=="LISTEN" || NR==1 {printf "%-8s %-8s %-8s %-18s %-22s %-15s\n",$1,$2,$3,$4,$5,$6}' stat.txt
5.内建变量
6.指定分割符
**awk通过内建变量来FS来指定分隔符**
awk 'BEGIN{FS=":"} {print $1,$3,$6}' /etc/passwd
或者awk -F: '{print $1,$3,$6}' /etc/passwd
_postgres 216 /var/empty
_krbtgt 217 /var/empty
在深入讲解awk神技之前,先说一下awk是通过-f来指定执行awk源文件。awk -f source.awk stat.txt
1. -v 变量赋值选项
awk -v name=Brian 'BEGIN{printf "Name = %s\n", name}'
name = Brian
2. —dump-variables[=file] 选项
#该选项会输出排好序的全局变量列表和它们最终的值到文件中,默认的文件是 awkvars.out.
$ awk --dump-variables ''
$ cat awkvars.out
ARGC: 1
ARGIND: 0
ARGV: array, 1 elements
BINMODE: 0
CONVFMT: "%.6g"
ERRNO: ""
FIELDWIDTHS: ""
FILENAME: ""
FNR: 0
FPAT: "[^[:space:]]+"
FS: " "
IGNORECASE: 0
LINT: 0
NF: 0
NR: 0
OFMT: "%.6g"
OFS: " "
ORS: "\n"
RLENGTH: 0
RS: "\n"
RSTART: 0
RT: ""
SUBSEP: "\034"
TEXTDOMAIN: "messages"
3. —lint[=fatal] 选项
#该选项允许检查程序的不兼容性或者模棱两可的代码,当提供参数 fatal的时候,它会对待Warning消息作为Error。
$ awk --lint '' /bin/ls
awk: cmd. line:1: warning: empty program text on command line
awk: cmd. line:1: warning: source file does not end in newline
awk: warning: no program text at all!
4. —posix 选项 该选项开启严格的POSIX兼容。
5.—profile[=file]选项
#该选项会输出一份格式化之后的程序到文件中,默认文件是 awkprof.out。
$ awk --profile 'BEGIN{printf"---|Header|--\n"} {print}
END{printf"---|Footer|---\n"}' score.txt > /dev/null
$ cat awkprof.out
BEGIN {
printf "---|Header|--\n"
}
{
print $0
}
END {
printf "---|Footer|---\n"
}
如果我们想查找一行中还有FIN的数据,怎么过滤?
awk '$6 ~ /FIN/ || NR==1 {print NR,$4,$5,$6}' OFS="\t" stat.txt
#反例
awk '$6 !~ /WAIT/ || NR==1 {print NR,$4,$5,$6}' OFS="\t" stat.txt
在awk中,匹配是~来表示模式匹配开始,不匹配用!~来表示模式匹配开始。//模式。
$ echo -e "cat\nbat\nfun\nfin\nfan" | awk '/f.n/'
fun
fin
fan
$ echo -e "This\nThat\nThere\nTheir\nthese" | awk '/^The/'
There
Their
$ echo -e "knife\nknow\nfun\nfin\nfan\nnine" | awk '/n$/'
fun
fin
fan
$ echo -e "Call\nTall\nBall" | awk '/[CT]all/'
Call
Tall
$ echo -e "Call\nTall\nBall" | awk '/[^CT]all/'
Ball
$ echo -e "Call\nTall\nBall\nSmall\nShall" | awk '/Call|Ball/'
Call
Ball
$ echo -e "Colour\nColor" | awk '/Colou?r/'
Colour
Color
$ echo -e "ca\ncat\ncatt" | awk '/cat*/'
ca
cat
catt
$ echo -e "111\n22\n123\n234\n456\n222" | awk '/2+/'
22
123
234
222
$ echo -e "Apple Juice\nApple Pie\nApple Tart\nApple Cake" | awk '/Apple (Juice|Cake)/'
Apple Juice
Apple Cake
AWK支持关联数组,也就是说,不仅可以使用数字索引的数组,还可以使用字符串作为索引,而且数字索引也不要求是连续的。数组不需要声明可以直接使用,语法如下: array_name[index] = value 删除数组元素使用delete语句,语法如下: delet array_name[index]
# ex1
awk 'BEGIN {
fruits["mango"] = "yellow";
fruits["orange"] = "orange"
print fruits["orange"] "\n" fruits["mango"]
delete fruits["orange"];
}'
orange
yellow
#ex2
awk 'BEGIN {
array["0,0"] = 100;
array["0,1"] = 200;
array["0,2"] = 300;
array["1,0"] = 400;
array["1,1"] = 500;
array["1,2"] = 600;
# print array elements
print "array[0,0] = " array["0,0"];
print "array[0,1] = " array["0,1"];
print "array[0,2] = " array["0,2"];
print "array[1,0] = " array["1,0"];
print "array[1,1] = " array["1,1"];
print "array[1,2] = " array["1,2"];
}'
array[0,0] = 100
array[0,1] = 200
array[0,2] = 300
array[1,0] = 400
array[1,1] = 500
array[1,2] = 600
awk支持算数运算符+ - * / % ,自增减运算符++/—a,a—/++,三元运算符(a>b)?a:b,一元运算符a=-10….
if (condition)
action
if (condition) {
action-1
action-1
.
.
action-n
}
if (condition)
action-1
else if (condition2)
action-2
else
action-3
awk内建函数很多,需要的可以通过手册查找。执行shell命令是通过system函数来实现的。用户自定义函数的基本语法是:
function function_name(argument1, argument2, ...) {
function body
}
# Returns minimum number
function find_min(num1, num2){
if (num1 < num2)
return num1
return num2
}
# Returns maximum number
function find_max(num1, num2){
if (num1 > num2)
return num1
return num2
}
# Main function
function main(num1, num2){
# Find minimum number
result = find_min(10, 20)
print "Minimum =", result
# Find maximum number
result = find_max(10, 20)
print "Maximum =", result
}
# Script execution starts here
BEGIN {
main(10, 20)
}
### result
Minimum = 10
Maximum = 20
#按连接数查看客户端IP
netstat -ntu | awk '{print $5}' | cut -d: -f1 | sort | uniq -c | sort -nr
#打印99乘法表
seq 9 | sed 'H;g' | awk -v RS='' '{for(i=1;i<=NF;i++)printf("%dx%d=%d%s", i, NR, i*NR, i==NR?"\n":"\t")}'