awk

awk

Print

$ # print all of its input on the standard output
$ echo 'foo bar' | awk '{ print }'
foo bar
 
$ # $0 is the whole line, so same behavior as previous one
$ echo 'foo bar' | awk '{ print $0 }'
foo bar
 
$ echo 'foo bar' | awk '{ print $1 }'
foo
 
$ echo 'foo bar' | awk '{ print $2 }'
foo
 
$ echo 'foo
bar' | awk '{ print $1 }'
foo
bar
 
$ # concat using commas to concat with spaces
$ echo '1 2 3
4 5 6' | awk '{ print $1,$2 }'
1 2
4 5
 
$ # concat with content
$ echo '1 2 3
4 5 6' | awk '{ print $1 "," $2 }'
1,2
4,5
 
$ # NF == number of fields == last column
$ # NF prints the number of fields
$ # $NF prints the value of the last field
$ echo '1 2 3
4 5 6' | awk '{ print NF, $NF }'
3 3
3 6
 
$ # NR == number of rows
$ echo 'foo bar
one two' | awk '{ print NR " " $2 }'
1 bar
2 two
 
$ # print last line
$ echo 'first line
second line
third line' | awk '
    { last = $0 }
END { print last }'
third line
 
$ # -F: flag to set the field separator, default space
$ echo 'foo bar,popo tutu' | awk -F',' '{ print $2 }'
popo tutu
 

Text manipulation

$ # change comma to tab
$ echo 'id,first name,last name
1,foo,bar
2,Patrick,Dupont
3,Chuck,Noris' | awk -F',' '{ print $NF "\t" $(NF-1) }'
last name       first name
bar     foo
Dupont  Patrick
Noris   Chuck
 
$ # printf with substr (index starts from 1)
$ echo 'somelongstring
anotherlongstring' | awk '{ printf "%s\n", substr($1,1,5) }'
somel
anoth
 
$ # using %-Ns to set the desired column with
$ echo 'id,first name,last name
1,foo,bar
2,Patrick,Dupont
3,Chuck,Noris' | awk -F',' '{ printf "%-20s \t %-10s \t %s \n", NR, $2, $3 }'
1                        first name      last name
2                        foo             bar
3                        Patrick         Dupont
4                        Chuck           Noris
 
$ # AWK BEGIN END
$ echo '1,foo
2,bar
3,foobar' | awk -F',' '
BEGIN { print "computing average..." }
      { total = total + $1 }
END   { print "average is:", total/NR }'
computing average...
average is: 2
 
$ # filter and perform mathematical operations
$ echo 'Beth   4.00  0
Dan    3.75  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '$3 > 0 { print $1, $2 * $3 }'
Kathy  40
Mark   100
Mary   121
Susie  76.5
 
$ # fancier output
$ # %.2f prints as a number with 2 digits after the decimal point
$ # $ is printed as verbatim
$ # /!\ do not forget the newline \n
$ echo 'Beth   4.00  0
Dan    3.75  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '$3 > 0 { printf("total pay for %s is $%.2f\n", $1, $2 * $3) }'
total pay for Kathy is $40.00
total pay for Mark is $100.00
total pay for Mary is $121.00
total pay for Susie is $76.50
 
$ # another example
$ echo 'Beth   4.00  0
Dan    3.75  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '{ printf("%-8s $%6.2f\n", $1, $2 * $3) }'
Beth     $  0.00
Dan      $  0.00
Kathy    $ 40.00
Mark     $100.00
Mary     $121.00
Susie    $ 76.50
 
$ # string concatenation
$ echo 'Beth   4.00  0
Dan    3.75  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '
    { names = names $1 " " }
END { print names }'
Beth Dan Kathy Mark Mary Susie

Filter

$ # regexp
$ echo 'foo poo
foobar hello' | awk '/bar/ { print $2 }'
hello
 
$ # match field
$ echo 'foo bar
popo titi' | awk '$2 == "titi" { print $1 }'
popo
 
$ # match within specific field
$ echo 'foobar popo
titi tutu' | awk '$1~/ba/ { print $2 }'
popo
 
$ # combinaisons of patterns
$ echo 'Beth   4.00  0
Dan    3.75  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '$2 >= 4 || $3 >= 20 { print }'
Beth   4.00  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18
 
$ # negation
$ echo 'Beth   4.00  0
Dan    3.75  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '!($2 < 4 && $3 < 20) { print }'
Beth   4.00  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18

Data validation

$ # awk can be used for checking that data has reasonable values and is in the right format
$ # ex: check number of fields
$ # if there are no error, there's no output
$ echo 'Beth   4.00  0
Dan    3.75  0
Kathy  4.00  10
Mark   5.00  20  another_field
Mary   5.50  22
Susie  4.25  18' | awk 'NF != 3 { print $0, "number of fields is not equal to 3" }'
Mark   5.00  20  another_field number of fields is not equal to 3
 
$ echo 'Beth   4.00  0
Dan    3.75  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '$2 < 4.00 { print $0, "rate is below minimum wage" }'
Dan    3.75  0 rate is below minimum wage

Built-in functions

$ # `length` to count the number of characters in a string
$ echo 'Beth   4.00  0
Dan    3.75  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '{ print $1, length($1) }'
Beth 4
Dan 3
Kathy 5
Mark 4
Mary 4
Susie 5
 
$ # counting lines, words and characters
$ echo 'Beth   4.00  0
Dan    3.75  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '
    {
      nc = nc + length($0) + 1
      nw = nw + NF
    }
END { printf("%d lines, %d words, %d characters\n", NR, nw, nc) }'
6 lines, 18 words, 94 characters

Control-flow statements

$ # if/else
$ echo 'Beth   4.00  0
Dan    3.75  0
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '
$2 > 4 { n = n + 1; pay = pay + $2 * $3 }
END    {
         if (n > 0)
           printf("%d employees, total pay is %d, average pay is %.2f\n", n, pay, pay/n)
         else
           print "no employees are paid more than $6/hour"
       }'
3 employees, total pay is 297, average pay is 99.17
 
$ # while: condition + body
$ echo 'Beth   4.00  1
Dan    3.75  2
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '
{
  i = 1
  while (i <= 3) {
    printf("%d\t%.2f\n", i, $2 * (1 + $3) ^ i)
    i = i + 1
  }
}'
1       8.00
2       16.00
3       32.00
1       11.25
2       33.75
3       101.25
1       44.00
2       484.00
3       5324.00
1       105.00
2       2205.00
3       46305.00
1       126.50
2       2909.50
3       66918.50
1       80.75
2       1534.25
3       29150.75
 
$ # for
$ echo 'Beth   4.00  1
Dan    3.75  2
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '
{
  for (i = 1; i <= 3; i = i + 1)
    printf("%d\t%.2f\n", i, $2 * (1 + $3) ^ i)
}'
1       8.00
2       16.00
3       32.00
1       11.25
2       33.75
3       101.25
1       44.00
2       484.00
3       5324.00
1       105.00
2       2205.00
3       46305.00
1       126.50
2       2909.50
3       66918.50
1       80.75
2       1534.25
3       29150.75

Arrays

$ # arrays for storing groups of related values
$ # first remember each input line
$ # then print lines in reverse order in the END action
$ echo 'Beth   4.00  1
Dan    3.75  2
Kathy  4.00  10
Mark   5.00  20
Mary   5.50  22
Susie  4.25  18' | awk '
    { line[NR] = $0 }
END {
      i = NR
      while (i > 0) {
        print line[i]
        i = i - 1
      }
    }'
Susie  4.25  18
Mary   5.50  22
Mark   5.00  20
Kathy  4.00  10
Dan    3.75  2
Beth   4.00  1
 

Tips

$ # print the total number of input lines
$ awk 'END { print NR }'
 
$ # print the tenth input line
$ awk 'NR == 10'
 
$ # print the last field of every input line
$ awk '{ print $NF }'
 
$ # print the last field of the last input line
$ awk '
    { field = $NF }
END { print field }'
 
$ # print every input line with  more than four fields
$ awk 'NF > 4'
 
$ # print every input line in which the last field is more than 4
$ awk '$NF > 4'
 
$ # print the total number of fields in all input lines
$ awk '
    { nf = nf + NF}
END { print nf }'
 
$ # print the total number of lines that contains `Betch`
$ awk '
/Beth/ { nlines = nlines + 1 }
END    { print nlines }'
 
$ # print the largest first field and the line that contains it
$ awk '
$1 > max { max = $1; maxline = $0 }
END      { print max, maxline }'
 
$ # print every line longer than 80 characters
$ awk 'length($0) > 80'
 
$ # print the number of fields in every line followed by the field itself
$ awk '{ print NF, $0 }'
 
$ # exchange the first two fields of every line
$ awk '{ temp = $1; $1 = $2; $2 = temp; print }'
 
$ # print every line after erasing the second field
$ awk '{ $2 = ""; print }'
 
$ # print every line with line number
$ awk '{ print NR, $0 }'
 
$ # print the sums of the fields of every line
$ awk '
{
  sum = 0
  for (i = 1; i <= NF; i = i + 1)
    sum = sum + $i
  print sum
}'