awk src: https://wizardzines.com/comics/awk/ Print $ # print all of its input on the standard output $ echo 'foo bar' | awk '{ print }' foo bar $ # $0 is the whole line, so same behavior as previous one $ echo 'foo bar' | awk '{ print $0 }' foo bar $ echo 'foo bar' | awk '{ print $1 }' foo $ echo 'foo bar' | awk '{ print $2 }' foo $ echo 'foo bar' | awk '{ print $1 }' foo bar $ # concat using commas to concat with spaces $ echo '1 2 3 4 5 6' | awk '{ print $1,$2 }' 1 2 4 5 $ # concat with content $ echo '1 2 3 4 5 6' | awk '{ print $1 "," $2 }' 1,2 4,5 $ # NF == number of fields == last column $ # NF prints the number of fields $ # $NF prints the value of the last field $ echo '1 2 3 4 5 6' | awk '{ print NF, $NF }' 3 3 3 6 $ # NR == number of rows $ echo 'foo bar one two' | awk '{ print NR " " $2 }' 1 bar 2 two $ # print last line $ echo 'first line second line third line' | awk ' { last = $0 } END { print last }' third line $ # -F: flag to set the field separator, default space $ echo 'foo bar,popo tutu' | awk -F',' '{ print $2 }' popo tutu Text manipulation $ # change comma to tab $ echo 'id,first name,last name 1,foo,bar 2,Patrick,Dupont 3,Chuck,Noris' | awk -F',' '{ print $NF "\t" $(NF-1) }' last name first name bar foo Dupont Patrick Noris Chuck $ # printf with substr (index starts from 1) $ echo 'somelongstring anotherlongstring' | awk '{ printf "%s\n", substr($1,1,5) }' somel anoth $ # using %-Ns to set the desired column with $ echo 'id,first name,last name 1,foo,bar 2,Patrick,Dupont 3,Chuck,Noris' | awk -F',' '{ printf "%-20s \t %-10s \t %s \n", NR, $2, $3 }' 1 first name last name 2 foo bar 3 Patrick Dupont 4 Chuck Noris $ # AWK BEGIN END $ echo '1,foo 2,bar 3,foobar' | awk -F',' ' BEGIN { print "computing average..." } { total = total + $1 } END { print "average is:", total/NR }' computing average... average is: 2 $ # filter and perform mathematical operations $ echo 'Beth 4.00 0 Dan 3.75 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk '$3 > 0 { print $1, $2 * $3 }' Kathy 40 Mark 100 Mary 121 Susie 76.5 $ # fancier output $ # %.2f prints as a number with 2 digits after the decimal point $ # $ is printed as verbatim $ # /!\ do not forget the newline \n $ echo 'Beth 4.00 0 Dan 3.75 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk '$3 > 0 { printf("total pay for %s is $%.2f\n", $1, $2 * $3) }' total pay for Kathy is $40.00 total pay for Mark is $100.00 total pay for Mary is $121.00 total pay for Susie is $76.50 $ # another example $ echo 'Beth 4.00 0 Dan 3.75 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk '{ printf("%-8s $%6.2f\n", $1, $2 * $3) }' Beth $ 0.00 Dan $ 0.00 Kathy $ 40.00 Mark $100.00 Mary $121.00 Susie $ 76.50 $ # string concatenation $ echo 'Beth 4.00 0 Dan 3.75 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk ' { names = names $1 " " } END { print names }' Beth Dan Kathy Mark Mary Susie Filter $ # regexp $ echo 'foo poo foobar hello' | awk '/bar/ { print $2 }' hello $ # match field $ echo 'foo bar popo titi' | awk '$2 == "titi" { print $1 }' popo $ # match within specific field $ echo 'foobar popo titi tutu' | awk '$1~/ba/ { print $2 }' popo $ # combinaisons of patterns $ echo 'Beth 4.00 0 Dan 3.75 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk '$2 >= 4 || $3 >= 20 { print }' Beth 4.00 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18 $ # negation $ echo 'Beth 4.00 0 Dan 3.75 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk '!($2 < 4 && $3 < 20) { print }' Beth 4.00 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18 Data validation $ # awk can be used for checking that data has reasonable values and is in the right format $ # ex: check number of fields $ # if there are no error, there's no output $ echo 'Beth 4.00 0 Dan 3.75 0 Kathy 4.00 10 Mark 5.00 20 another_field Mary 5.50 22 Susie 4.25 18' | awk 'NF != 3 { print $0, "number of fields is not equal to 3" }' Mark 5.00 20 another_field number of fields is not equal to 3 $ echo 'Beth 4.00 0 Dan 3.75 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk '$2 < 4.00 { print $0, "rate is below minimum wage" }' Dan 3.75 0 rate is below minimum wage Built-in functions $ # `length` to count the number of characters in a string $ echo 'Beth 4.00 0 Dan 3.75 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk '{ print $1, length($1) }' Beth 4 Dan 3 Kathy 5 Mark 4 Mary 4 Susie 5 $ # counting lines, words and characters $ echo 'Beth 4.00 0 Dan 3.75 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk ' { nc = nc + length($0) + 1 nw = nw + NF } END { printf("%d lines, %d words, %d characters\n", NR, nw, nc) }' 6 lines, 18 words, 94 characters Control-flow statements $ # if/else $ echo 'Beth 4.00 0 Dan 3.75 0 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk ' $2 > 4 { n = n + 1; pay = pay + $2 * $3 } END { if (n > 0) printf("%d employees, total pay is %d, average pay is %.2f\n", n, pay, pay/n) else print "no employees are paid more than $6/hour" }' 3 employees, total pay is 297, average pay is 99.17 $ # while: condition + body $ echo 'Beth 4.00 1 Dan 3.75 2 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk ' { i = 1 while (i <= 3) { printf("%d\t%.2f\n", i, $2 * (1 + $3) ^ i) i = i + 1 } }' 1 8.00 2 16.00 3 32.00 1 11.25 2 33.75 3 101.25 1 44.00 2 484.00 3 5324.00 1 105.00 2 2205.00 3 46305.00 1 126.50 2 2909.50 3 66918.50 1 80.75 2 1534.25 3 29150.75 $ # for $ echo 'Beth 4.00 1 Dan 3.75 2 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk ' { for (i = 1; i <= 3; i = i + 1) printf("%d\t%.2f\n", i, $2 * (1 + $3) ^ i) }' 1 8.00 2 16.00 3 32.00 1 11.25 2 33.75 3 101.25 1 44.00 2 484.00 3 5324.00 1 105.00 2 2205.00 3 46305.00 1 126.50 2 2909.50 3 66918.50 1 80.75 2 1534.25 3 29150.75 Arrays $ # arrays for storing groups of related values $ # first remember each input line $ # then print lines in reverse order in the END action $ echo 'Beth 4.00 1 Dan 3.75 2 Kathy 4.00 10 Mark 5.00 20 Mary 5.50 22 Susie 4.25 18' | awk ' { line[NR] = $0 } END { i = NR while (i > 0) { print line[i] i = i - 1 } }' Susie 4.25 18 Mary 5.50 22 Mark 5.00 20 Kathy 4.00 10 Dan 3.75 2 Beth 4.00 1 Tips $ # print the total number of input lines $ awk 'END { print NR }' $ # print the tenth input line $ awk 'NR == 10' $ # print the last field of every input line $ awk '{ print $NF }' $ # print the last field of the last input line $ awk ' { field = $NF } END { print field }' $ # print every input line with more than four fields $ awk 'NF > 4' $ # print every input line in which the last field is more than 4 $ awk '$NF > 4' $ # print the total number of fields in all input lines $ awk ' { nf = nf + NF} END { print nf }' $ # print the total number of lines that contains `Betch` $ awk ' /Beth/ { nlines = nlines + 1 } END { print nlines }' $ # print the largest first field and the line that contains it $ awk ' $1 > max { max = $1; maxline = $0 } END { print max, maxline }' $ # print every line longer than 80 characters $ awk 'length($0) > 80' $ # print the number of fields in every line followed by the field itself $ awk '{ print NF, $0 }' $ # exchange the first two fields of every line $ awk '{ temp = $1; $1 = $2; $2 = temp; print }' $ # print every line after erasing the second field $ awk '{ $2 = ""; print }' $ # print every line with line number $ awk '{ print NR, $0 }' $ # print the sums of the fields of every line $ awk ' { sum = 0 for (i = 1; i <= NF; i = i + 1) sum = sum + $i print sum }'