mirror of
https://github.com/python/cpython.git
synced 2024-11-25 02:44:06 +08:00
132dce2246
Use != instead of <> since <> is documented as "obsolescent". Use "is" and "is not" when comparing with None or type objects.
129 lines
2.8 KiB
Python
129 lines
2.8 KiB
Python
# Module 'parser'
|
|
#
|
|
# Parse S-expressions output by the Panel Editor
|
|
# (which is written in Scheme so it can't help writing S-expressions).
|
|
#
|
|
# See notes at end of file.
|
|
|
|
|
|
whitespace = ' \t\n'
|
|
operators = '()\''
|
|
separators = operators + whitespace + ';' + '"'
|
|
|
|
|
|
# Tokenize a string.
|
|
# Return a list of tokens (strings).
|
|
#
|
|
def tokenize_string(s):
|
|
tokens = []
|
|
while s:
|
|
c = s[:1]
|
|
if c in whitespace:
|
|
s = s[1:]
|
|
elif c == ';':
|
|
s = ''
|
|
elif c == '"':
|
|
n = len(s)
|
|
i = 1
|
|
while i < n:
|
|
c = s[i]
|
|
i = i+1
|
|
if c == '"': break
|
|
if c == '\\': i = i+1
|
|
tokens.append(s[:i])
|
|
s = s[i:]
|
|
elif c in operators:
|
|
tokens.append(c)
|
|
s = s[1:]
|
|
else:
|
|
n = len(s)
|
|
i = 1
|
|
while i < n:
|
|
if s[i] in separators: break
|
|
i = i+1
|
|
tokens.append(s[:i])
|
|
s = s[i:]
|
|
return tokens
|
|
|
|
|
|
# Tokenize a whole file (given as file object, not as file name).
|
|
# Return a list of tokens (strings).
|
|
#
|
|
def tokenize_file(fp):
|
|
tokens = []
|
|
while 1:
|
|
line = fp.readline()
|
|
if not line: break
|
|
tokens = tokens + tokenize_string(line)
|
|
return tokens
|
|
|
|
|
|
# Exception raised by parse_exr.
|
|
#
|
|
syntax_error = 'syntax error'
|
|
|
|
|
|
# Parse an S-expression.
|
|
# Input is a list of tokens as returned by tokenize_*().
|
|
# Return a pair (expr, tokens)
|
|
# where expr is a list representing the s-expression,
|
|
# and tokens contains the remaining tokens.
|
|
# May raise syntax_error.
|
|
#
|
|
def parse_expr(tokens):
|
|
if (not tokens) or tokens[0] != '(':
|
|
raise syntax_error, 'expected "("'
|
|
tokens = tokens[1:]
|
|
expr = []
|
|
while 1:
|
|
if not tokens:
|
|
raise syntax_error, 'missing ")"'
|
|
if tokens[0] == ')':
|
|
return expr, tokens[1:]
|
|
elif tokens[0] == '(':
|
|
subexpr, tokens = parse_expr(tokens)
|
|
expr.append(subexpr)
|
|
else:
|
|
expr.append(tokens[0])
|
|
tokens = tokens[1:]
|
|
|
|
|
|
# Parse a file (given as file object, not as file name).
|
|
# Return a list of parsed S-expressions found at the top level.
|
|
#
|
|
def parse_file(fp):
|
|
tokens = tokenize_file(fp)
|
|
exprlist = []
|
|
while tokens:
|
|
expr, tokens = parse_expr(tokens)
|
|
exprlist.append(expr)
|
|
return exprlist
|
|
|
|
|
|
# EXAMPLE:
|
|
#
|
|
# The input
|
|
# '(hip (hop hur-ray))'
|
|
#
|
|
# passed to tokenize_string() returns the token list
|
|
# ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
|
|
#
|
|
# When this is passed to parse_expr() it returns the expression
|
|
# ['hip', ['hop', 'hur-ray']]
|
|
# plus an empty token list (because there are no tokens left.
|
|
#
|
|
# When a file containing the example is passed to parse_file() it returns
|
|
# a list whose only element is the output of parse_expr() above:
|
|
# [['hip', ['hop', 'hur-ray']]]
|
|
|
|
|
|
# TOKENIZING:
|
|
#
|
|
# Comments start with semicolon (;) and continue till the end of the line.
|
|
#
|
|
# Tokens are separated by whitespace, except the following characters
|
|
# always form a separate token (outside strings):
|
|
# ( ) '
|
|
# Strings are enclosed in double quotes (") and backslash (\) is used
|
|
# as escape character in strings.
|