1996-11-28 03:52:01 +08:00
|
|
|
#! /usr/bin/env python
|
1995-04-10 19:40:26 +08:00
|
|
|
|
|
|
|
# 1) Regular Expressions Test
|
2003-04-25 01:13:18 +08:00
|
|
|
#
|
|
|
|
# Read a file of (extended per egrep) regular expressions (one per line),
|
1995-04-10 19:40:26 +08:00
|
|
|
# and apply those to all files whose names are listed on the command line.
|
|
|
|
# Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns
|
|
|
|
# against a five /etc/termcap files. Tests using more elaborate patters
|
|
|
|
# would also be interesting. Your code should not break if given hundreds
|
2003-04-25 01:13:18 +08:00
|
|
|
# of regular expressions or binary files to scan.
|
1995-04-10 19:40:26 +08:00
|
|
|
|
|
|
|
# This implementation:
|
|
|
|
# - combines all patterns into a single one using ( ... | ... | ... )
|
|
|
|
# - reads patterns from stdin, scans files given as command line arguments
|
|
|
|
# - produces output in the format <file>:<lineno>:<line>
|
|
|
|
# - is only about 2.5 times as slow as egrep (though I couldn't run
|
|
|
|
# Tom's test -- this system, a vanilla SGI, only has /etc/terminfo)
|
|
|
|
|
|
|
|
import string
|
|
|
|
import sys
|
2003-04-25 01:22:04 +08:00
|
|
|
import re
|
1995-04-10 19:40:26 +08:00
|
|
|
|
|
|
|
def main():
|
2003-04-25 01:13:18 +08:00
|
|
|
pats = map(chomp, sys.stdin.readlines())
|
2003-04-25 01:22:04 +08:00
|
|
|
bigpat = '(' + '|'.join(pats) + ')'
|
|
|
|
prog = re.compile(bigpat)
|
2003-04-25 01:13:18 +08:00
|
|
|
|
|
|
|
for file in sys.argv[1:]:
|
|
|
|
try:
|
|
|
|
fp = open(file, 'r')
|
2007-01-11 00:19:56 +08:00
|
|
|
except IOError as msg:
|
2003-04-25 01:13:18 +08:00
|
|
|
print "%s: %s" % (file, msg)
|
|
|
|
continue
|
|
|
|
lineno = 0
|
|
|
|
while 1:
|
|
|
|
line = fp.readline()
|
|
|
|
if not line:
|
|
|
|
break
|
|
|
|
lineno = lineno + 1
|
2003-04-25 01:22:04 +08:00
|
|
|
if prog.search(line):
|
2003-04-25 01:13:18 +08:00
|
|
|
print "%s:%s:%s" % (file, lineno, line),
|
1995-04-10 19:40:26 +08:00
|
|
|
|
|
|
|
def chomp(s):
|
2003-04-25 01:22:04 +08:00
|
|
|
return s.rstrip('\n')
|
1995-04-10 19:40:26 +08:00
|
|
|
|
2005-11-09 15:07:58 +08:00
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|