Monday, September 29, 2014

Grep commands example

#######################################################################
# This file contains the grep demo commands we did in the pattern
# matching lecture.
#######################################################################




#######################################################################
# the input data files all consist a number of lines, each of which
# has the following format.
#######################################################################
mschreib:02 :Schreiber:Marc H.:10/11/10/60:25.3333:1:1:1:1




#######################################################################
# simplest uses.
#######################################################################

# Looks for the word "David" in the file "rank.txt"
grep David rank.txt

# Looks for the word "Rebecca" in a number of files.
# grep will print the names of the files as well as the lines found.
grep Rebecca ??.txt




#######################################################################
# Interaction with other commands
#######################################################################

# Looks for the total summary line from the "wc" command.
wc ??.txt | grep total

# midterm scores start with a slash ("/") in my file.
# count the number of lines that include "/5", sufficient for 
# finding the number of people who scored above 50 for my purpose.
grep /5 rank.txt | wc

# count the number of people who scored a perfect 60.
grep /6 rank.txt | wc




#######################################################################
# Regular expression special characters
#######################################################################

# Shows what the character "^" does.
# "^s" looks for a line that starts with a login name whose first
# character is "s".
grep s rank.txt
grep ^s rank.txt

# Shows what the character "$" does.
# ":1$" looks for a line that ends with aranking of ":1"
grep 1 rank.txt
grep :1$ rank.txt

# Shows what the character "." does.
# "." allows any letter to take its place in "Cr.nin"
grep Cr.nin rank.txt

# Another example of what "." does.
# "/5.:" is a better way of searching for people who made a score of
# 50 or higher than the one used above because we now require some
# other letter, denoted by the "." to follow the number "5".  This
# prevents a score of "5" being miscounted as above 50.
grep /5.: rank.txt | wc

# Shows what the characters "[]" do.
# Looks for all people who have made scores between 57 and 59.
grep '/5[7-9]:' rank.txt | wc

# Shows what the characters "[^]" do.
# Looks for all people who have not made scores between 57 and 59.
grep '/5[^7-9]:' rank.txt | wc

# Shows what the character "*" does.
# Looks for a login name that repeats the letter "e" zero or more
# times in the form of "anl", "anel", "aneel", or "aneeel".
grep 'ane*l' rank.txt

# Another example of what "*" does.
# Looks for a score between 58-59, followed by a bunch of arbitrary
# letters denoted by ".*", and with a ranking between 1 and 5 at the
# end of the line.
grep '/5[8-9].*:[1-5]$' rank.txt

# Shows the difference between "*" and "+"
# The first one matches a ranking of either 1 or 11.
# The second one matches only a ranking of 11.
egrep ':11*$' rank.txt
egrep ':11+$' rank.txt

# Shows more features of egrep: () |
# This one looks for people who are ranked 5 or 6.
egrep '(:5|:6)$' rank.txt

# Shows the use of escape characters.
# If the intention is to look for numbers of the form "25.XXX", the
# first one doesn't quite work because the dot would be interpreted as
# a special character of regular expressions, which denotes any
# character.  To make it work, it needs to be prefixed by a backward
# slash "\".
grep '25.' rank.txt
grep '25\.' rank.txt




#######################################################################
# More command line options.
#######################################################################

# To find out more information about these commands, type these.
man grep
man egrep


# Shows what "-v" does.
# We look for lines containing the word David, but we want to exclude
# those lines that have the word "Stavens".
grep David rank.txt | grep -v Stavens

# Shows what "-n" does.
# It tells me the line number of a line that is a match.  In this
# case, the line number and the ranking coincides.
grep crloose rank.txt
grep -n crloose rank.txt