#!/usr/bin/env python
"""
Defines LineSplitter and helper functions.
-----
Permission to use, modify, and distribute this software is given under the
terms of the NumPy License. See http://scipy.org.
NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK.
Author: Pearu Peterson <pearu@cens.ioc.ee>
Created: May 2006
-----
"""
__all__ = ['String','string_replace_map','splitquote','splitparen']
import re
class String(str): pass
class ParenString(str): pass
def split2(line, lower=False):
"""
Split line into non-string part and into a start of a string part.
Returns 2-tuple. The second item either is empty string or start
of a string part.
"""
return LineSplitter(line,lower=lower).split2()
_f2py_str_findall = re.compile(r"_F2PY_STRING_CONSTANT_\d+_").findall
_is_name = re.compile(r'\w*\Z',re.I).match
_is_simple_str = re.compile(r'\w*\Z',re.I).match
_f2py_findall = re.compile(r'(_F2PY_STRING_CONSTANT_\d+_|F2PY_EXPR_TUPLE_\d+)').findall
class string_replace_dict(dict):
"""
Dictionary object that is callable for applying map returned
by string_replace_map() function.
"""
def __call__(self, line):
for k in _f2py_findall(line):
line = line.replace(k, self[k])
return line
def string_replace_map(line, lower=False,
_cache={'index':0,'pindex':0}):
"""
1) Replaces string constants with symbol `'_F2PY_STRING_CONSTANT_<index>_'`
2) Replaces (expression) with symbol `(F2PY_EXPR_TUPLE_<index>)`
Returns a new line and the replacement map.
"""
items = []
string_map = string_replace_dict()
rev_string_map = {}
for item in splitquote(line, lower=lower)[0]:
if isinstance(item, String) and not _is_simple_str(item[1:-1]):
key = rev_string_map.get(item)
if key is None:
_cache['index'] += 1
index = _cache['index']
key = "_F2PY_STRING_CONSTANT_%s_" % (index)
it = item[1:-1]
string_map[key] = it
rev_string_map[it] = key
items.append(item[0]+key+item[-1])
else:
items.append(item)
newline = ''.join(items)
items = []
expr_keys = []
for item in splitparen(newline):
if isinstance(item, ParenString) and not _is_name(item[1:-1]):
key = rev_string_map.get(item)
if key is None:
_cache['pindex'] += 1
index = _cache['pindex']
key = 'F2PY_EXPR_TUPLE_%s' % (index)
it = item[1:-1].strip()
string_map[key] = it
rev_string_map[it] = key
expr_keys.append(key)
items.append(item[0]+key+item[-1])
else:
items.append(item)
found_keys = set()
for k in expr_keys:
v = string_map[k]
l = _f2py_str_findall(v)
if l:
found_keys = found_keys.union(l)
for k1 in l:
v = v.replace(k1, string_map[k1])
string_map[k] = v
for k in found_keys:
del string_map[k]
return ''.join(items), string_map
def splitquote(line, stopchar=None, lower=False, quotechars = '"\''):
"""
Fast LineSplitter
"""
items = []
i = 0
while 1:
try:
char = line[i]; i += 1
except IndexError:
break
l = []
l_append = l.append
nofslashes = 0
if stopchar is None:
# search for string start
while 1:
if char in quotechars and not nofslashes % 2:
stopchar = char
i -= 1
break
if char=='\\':
nofslashes += 1
else:
nofslashes = 0
l_append(char)
try:
char = line[i]; i += 1
except IndexError:
break
if not l: continue
item = ''.join(l)
if lower: item = item.lower()
items.append(item)
continue
if char==stopchar:
# string starts with quotechar
l_append(char)
try:
char = line[i]; i += 1
except IndexError:
if l:
item = String(''.join(l))
items.append(item)
break
# else continued string
while 1:
if char==stopchar and not nofslashes % 2:
l_append(char)
stopchar = None
break
if char=='\\':
nofslashes += 1
else:
nofslashes = 0
l_append(char)
try:
char = line[i]; i += 1
except IndexError:
break
if l:
item = String(''.join(l))
items.append(item)
return items, stopchar
class LineSplitterBase:
def __iter__(self):
return self
def next(self):
item = ''
while not item:
item = self.get_item() # get_item raises StopIteration
return item
class LineSplitter(LineSplitterBase):
""" Splits a line into non strings and strings. E.g.
abc=\"123\" -> ['abc=','\"123\"']
Handles splitting lines with incomplete string blocks.
"""
def __init__(self, line,
quotechar = None,
lower=False,
):
self.fifo_line = [c for c in line]
self.fifo_line.reverse()
self.quotechar = quotechar
self.lower = lower
def split2(self):
"""
Split line until the first start of a string.
"""
try:
item1 = self.get_item()
except StopIteration:
return '',''
i = len(item1)
l = self.fifo_line[:]
l.reverse()
item2 = ''.join(l)
return item1,item2
def get_item(self):
fifo_pop = self.fifo_line.pop
try:
char = fifo_pop()
except IndexError:
raise StopIteration
fifo_append = self.fifo_line.append
quotechar = self.quotechar
l = []
l_append = l.append
nofslashes = 0
if quotechar is None:
# search for string start
while 1:
if char in '"\'' and not nofslashes % 2:
self.quotechar = char
fifo_append(char)
break
if char=='\\':
nofslashes += 1
else:
nofslashes = 0
l_append(char)
try:
char = fifo_pop()
except IndexError:
break
item = ''.join(l)
if self.lower: item = item.lower()
return item
if char==quotechar:
# string starts with quotechar
l_append(char)
try:
char = fifo_pop()
except IndexError:
return String(''.join(l))
# else continued string
while 1:
if char==quotechar and not nofslashes % 2:
l_append(char)
self.quotechar = None
break
if char=='\\':
nofslashes += 1
else:
nofslashes = 0
l_append(char)
try:
char = fifo_pop()
except IndexError:
break
return String(''.join(l))
def splitparen(line,paren='()'):
"""
Fast LineSplitterParen.
"""
stopchar = None
startchar, endchar = paren[0],paren[1]
items = []
i = 0
while 1:
try:
char = line[i]; i += 1
except IndexError:
break
nofslashes = 0
l = []
l_append = l.append
if stopchar is None:
# search for parenthesis start
while 1:
if char==startchar and not nofslashes % 2:
stopchar = endchar
i -= 1
break
if char=='\\':
nofslashes += 1
else:
nofslashes = 0
l_append(char)
try:
char = line[i]; i += 1
except IndexError:
break
item = ''.join(l)
else:
nofstarts = 0
while 1:
if char==stopchar and not nofslashes % 2 and nofstarts==1:
l_append(char)
stopchar = None
break
if char=='\\':
nofslashes += 1
else:
nofslashes = 0
if char==startchar:
nofstarts += 1
elif char==endchar:
nofstarts -= 1
l_append(char)
try:
char = line[i]; i += 1
except IndexError:
break
item = ParenString(''.join(l))
items.append(item)
return items
class LineSplitterParen(LineSplitterBase):
""" Splits a line into strings and strings with parenthesis. E.g.
a(x) = b(c,d) -> ['a','(x)',' = b','(c,d)']
"""
def __init__(self, line, paren = '()'):
self.fifo_line = [c for c in line]
self.fifo_line.reverse()
self.startchar = paren[0]
self.endchar = paren[1]
self.stopchar = None
def get_item(self):
fifo_pop = self.fifo_line.pop
try:
char = fifo_pop()
except IndexError:
raise StopIteration
fifo_append = self.fifo_line.append
startchar = self.startchar
endchar = self.endchar
stopchar = self.stopchar
l = []
l_append = l.append
nofslashes = 0
if stopchar is None:
# search for parenthesis start
while 1:
if char==startchar and not nofslashes % 2:
self.stopchar = endchar
fifo_append(char)
break
if char=='\\':
nofslashes += 1
else:
nofslashes = 0
l_append(char)
try:
char = fifo_pop()
except IndexError:
break
item = ''.join(l)
return item
nofstarts = 0
while 1:
if char==stopchar and not nofslashes % 2 and nofstarts==1:
l_append(char)
self.stopchar = None
break
if char=='\\':
nofslashes += 1
else:
nofslashes = 0
if char==startchar:
nofstarts += 1
elif char==endchar:
nofstarts -= 1
l_append(char)
try:
char = fifo_pop()
except IndexError:
break
return ParenString(''.join(l))
def test():
splitter = LineSplitter('abc\\\' def"12\\"3""56"dfad\'a d\'')
l = [item for item in splitter]
assert l==['abc\\\' def','"12\\"3"','"56"','dfad','\'a d\''],`l`
assert splitter.quotechar is None
l,stopchar=splitquote('abc\\\' def"12\\"3""56"dfad\'a d\'')
assert l==['abc\\\' def','"12\\"3"','"56"','dfad','\'a d\''],`l`
assert stopchar is None
splitter = LineSplitter('"abc123&')
l = [item for item in splitter]
assert l==['"abc123&'],`l`
assert splitter.quotechar=='"'
l,stopchar = splitquote('"abc123&')
assert l==['"abc123&'],`l`
assert stopchar=='"'
splitter = LineSplitter(' &abc"123','"')
l = [item for item in splitter]
assert l==[' &abc"','123']
assert splitter.quotechar is None
l,stopchar = splitquote(' &abc"123','"')
assert l==[' &abc"','123']
assert stopchar is None
l = split2('')
assert l==('',''),`l`
l = split2('12')
assert l==('12',''),`l`
l = split2('1"a"//"b"')
assert l==('1','"a"//"b"'),`l`
l = split2('"ab"')
assert l==('','"ab"'),`l`
splitter = LineSplitterParen('a(b) = b(x,y(1)) b\((a)\)')
l = [item for item in splitter]
assert l==['a', '(b)', ' = b', '(x,y(1))', ' b\\(', '(a)', '\\)'],`l`
l = splitparen('a(b) = b(x,y(1)) b\((a)\)')
assert l==['a', '(b)', ' = b', '(x,y(1))', ' b\\(', '(a)', '\\)'],`l`
l = string_replace_map('a()')
print l
print 'ok'
if __name__ == '__main__':
test()
|