# Copyright (C) 2004 Scott W. Dunlop <sdunlop at users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import re, string
from cStringIO import StringIO
from sets import Set
from urllib import quote_plus
from urllib import unquote_plus
whitespace_pattern = re.compile( "[ \t\r\n]+" )
wordflaw_pattern = re.compile( "([-_'])" )
little_words = Set(( "a", "an", "the", "or", "and", "to", "for" "but" ))
little_frags = Set(( "el", "al", "l", "s" ))
def iterpairs( lst ):
idx = 0
while idx < len( lst ):
if idx + 1 == len( lst ):
yield lst[idx], None
break
else:
yield lst[idx], lst[idx + 1]
idx += 2
def capitalize( word, force=False ):
if len( word ) == 0: return word
if not force and ( word in little_words ):
return word
fragments = wordflaw_pattern.split( word )
result = ""
for frag, sep in iterpairs( fragments ):
if frag in little_frags:
result += frag
else:
result += string.capitalize( frag )
if sep:
result += sep
return result
def normalize( title ):
result = ""
for word in title.lower().split():
if result:
result += " "
result += capitalize( word )
else:
result += capitalize( word, True )
return result
html_entities = {
'&': '&',
# "'": ''', Disabled to work around MSIE the Perilous.
'"': '"',
'<': '<'
}
def htmlquote( text ):
result = StringIO()
for ch in text:
result.write( html_entities.get( ch, ch ) )
return result.getvalue()
name_entities = {
'#': '%23',
'/': '%2f',
'?': '%3f',
'&': '%26',
'+': '%2b',
'=': '%3d',
' ': '%20'
}
def namequote( text ):
result = StringIO()
for ch in text:
result.write( name_entities.get( ch, ch ) )
return result.getvalue()
lfm_scanner = re.compile( "([^{[]*)(.+)" )
link_pattern = re.compile( "{([^}]+)}(.*)" )
farlink_pattern = re.compile( "{([A-Za-z]+:[^ }]*) *([^}]*)}(.*)" )
media_pattern = re.compile( "\\[([A-Za-z]+:[^ \\]]*) *([A-Za-z]+:[^ \\]]*)? *([^\\]]*)](.*)")
bullet_pattern = re.compile( "^\\* ?(.*)$" )
heading_pattern = re.compile( "^! ?(.*)$" )
subheading_pattern = re.compile( "^!! ?(.*)$" )
editorial_pattern = re.compile( "^/ ?(.*)$" )
break_pattern = re.compile( "^[ \r\n]*$" )
code_pattern = re.compile( "^> ?(.*)$" )
table_pattern = re.compile( "^\\|.*$" )
def parse_hazeml( wiki, text, static = False ):
html = StringIO()
links = Set()
mode = NormalMode( wiki, html, links, static )
for line in text.splitlines( ):
mode = mode.line( string.rstrip( line ) )
mode.end()
return html.getvalue(), links
class TextMode( object ):
__slots__ = 'wiki', 'output', 'links', 'static'
def __init__( self, wiki, output, links, static = False ):
self.wiki = wiki
self.output = output
self.links = links
self.static = static
self.start()
def getWiki( self ):
return self.wiki
def getDatabase( self ):
return self.getWiki().getDatabase()
def getOutput( self ):
return self.output
def getLinks( self ):
return self.links
def isStatic( self ):
return self.static
def write( self, data ):
self.output.write( data )
def block( self, block ):
while block:
results = lfm_scanner.match( block )
if results:
self.write( htmlquote( results.group( 1 ) ) )
block = results.group( 2 )
else:
self.write( htmlquote( block ) )
break
results = media_pattern.match( block )
if results:
media_url = results.group( 1 )
link_url = results.group( 2 )
alt_text = results.group( 3 )
block = results.group( 4 )
if link_url:
self.write( '<a href="' )
self.write( htmlquote( link_url ) )
self.write( '">' )
self.write( '<img src="' )
self.write( htmlquote( media_url ) )
self.write( '" alt="' )
if alt_text: self.write( htmlquote( alt_text ) )
self.write( '" />' )
if link_url: self.write( '</a>' )
continue
results = farlink_pattern.match( block )
if results:
self.write( '<a class="far" href="' )
self.write( htmlquote( results.group( 1 ) ) )
self.write( '">' )
self.write( htmlquote(
results.group( 2 ) or results.group( 1 )
) )
self.write( "</a>" )
block = results.group( 3 )
continue
results = link_pattern.match( block )
if results:
key = results.group( 1 )
normal_key = normalize( key )
self.write( '<a class="' )
if self.getDatabase().nodeExists( normal_key ):
self.write( 'near' )
else:
self.write( 'new' )
self.write( '" href="' )
link_key = namequote( normal_key )
if self.static:
link_key = link_key + ".html"
self.write( htmlquote( link_key ) )
self.write( '">' )
self.write( htmlquote( key ) )
self.write( '</a>' )
block = results.group( 2 )
self.getLinks().add( normal_key )
continue
self.write( htmlquote( block ) )
break
def transition( self, line ):
results = table_pattern.match( line )
if results is not None:
return self.chain( TableMode ).line( line )
results = code_pattern.match( line )
if results is not None:
return self.chain( CodeMode ).line( line )
results = bullet_pattern.match( line )
if results is not None:
return self.chain( BulletMode ).line( line )
results = subheading_pattern.match( line )
if results is not None:
return self.chain( SubheadingMode ).line( line )
results = heading_pattern.match( line )
if results is not None:
return self.chain( HeadingMode ).line( line )
results = editorial_pattern.match( line )
if results is not None:
return self.chain( EditorialMode ).line( line )
results = break_pattern.match( line )
if results is not None:
return self.chain( NormalMode )
return None
def line( self, line ):
next = self.transition( line )
if next is not None: return next
return self.chain( NormalMode ).line( line )
def chain( self, nextClass ):
self.end()
return nextClass(
self.getWiki(), self.getOutput(), self.getLinks(), self.isStatic()
)
class NormalMode( TextMode ):
__slots__ = 'started',
def __init__( self, *args ):
TextMode.__init__( self, *args )
self.started = False
def start( self ):
return
def end( self ):
if self.started:
self.write( "</p>" )
def line( self, line ):
next = self.transition( line )
if next is not None: return next
if not self.started:
self.started = True
self.write( "<p>" )
else:
self.write( "<br />" )
self.block( line )
self.write( " " )
return self
class CodeMode( TextMode ):
def start( self ):
self.write( '<div class="code"><pre>' )
def end( self ):
self.write( "</pre></div>" )
def line( self, line ):
results = code_pattern.match( line )
if results is not None:
self.write( htmlquote( results.group(1) ) )
self.write( "\n" )
return self
else:
return TextMode.line( self, line )
class BulletMode( TextMode ):
def start( self ):
self.write( "<ul>" )
def end( self ):
self.write( "</ul>" )
def line( self, line ):
results = bullet_pattern.match( line )
if results is not None:
self.write( "<li>" )
self.block( results.group(1) )
self.write( "</li>" )
return self
return TextMode.line( self, line )
class HeadingMode( TextMode ):
def start( self ):
self.write( "<h3>" );
def end( self ):
self.write( "</h3>" )
def line( self, line ):
results = subheading_pattern.match( line )
if results is not None:
return self.chain( SubheadingMode ).line( line )
results = heading_pattern.match( line )
if results is not None:
self.block( results.group(1) )
return self
results = break_pattern.match( line )
if results is not None:
return self
return TextMode.line( self, line )
class SubheadingMode( TextMode ):
def start( self ):
self.write( "<h4>" );
def end( self ):
self.write( "</h4>" )
def line( self, line ):
results = subheading_pattern.match( line )
if results is not None:
self.block( results.group(1) )
return self
results = break_pattern.match( line )
if results is not None:
return self
return TextMode.line( self, line )
class EditorialMode( TextMode ):
def start( self ):
if not self.isStatic():
self.write( "<p class='editorial'>" )
def end( self ):
if not self.isStatic():
self.write( "</p>" )
def line( self, line ):
results = editorial_pattern.match( line )
if results is not None:
if not self.isStatic():
self.block( results.group(1) + " " ) # Added space to break multilines.
return self
return TextMode.line( self, line )
class TableMode( TextMode ):
def __init__( self, *args, **kwargs ):
self.evenRow = False
self.topRow = True
TextMode.__init__( self, *args, **kwargs )
def start( self ):
self.write( '<table cellspacing="0">' )
def end( self ):
self.write( "</table>" )
def line( self, line ):
results = table_pattern.match( line )
if results is not None:
if self.topRow:
self.write( '<tr class="top odd">' )
self.topRow = False
evenRow = True
elif self.evenRow:
self.write( '<tr class="even">' )
self.evenRow = False
else:
self.write( '<tr class="odd">' )
self.evenRow = True
line = line.strip()
if line[-1] == "|":
line = line[1:-1]
else:
line = line[1:]
for cell in line.split( "|" ):
self.write( "<td>" )
if cell:
self.block( cell )
else:
self.write( " " )
self.write( "</td>" )
self.write( "</tr>" )
return self
else:
return TextMode.line( self, line )
|