tinyTextile.py :  » Content-Management-Systems » PyLucid » PyLucid_standalone » pylucid_project » apps » pylucid » markup » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Content Management Systems » PyLucid 
PyLucid » PyLucid_standalone » pylucid_project » apps » pylucid » markup » tinyTextile.py
# -*- coding: utf-8 -*-

"""
    tinyTextile
    ~~~~~~~~~~~

    PyLucid builtin markup engine. Based on the textile markup.

    ToDo
    ~~~~
    Most parts works well, but the code is very old and process in a stream was
    not the best idea: e.g. sourceode parts doesn't work well, there should be
    exctract/insert before the markup applied. So these engine should be
    complete rewritten.
    other todos:
      * lists are not good indented

    links
    ~~~~~
    http://www.pylucid.org/_goto/5/Markup/
    http://dealmeida.net/en/Projects/PyTextile/
    http://www.solarorange.com/projects/textile/mtmanual_textile2.htm

    Last commit info:
    ~~~~~~~~~~~~~~~~~
    $LastChangedDate:2008-05-13 18:26:55 +0200 (Di, 13 Mai 2008) $
    $Rev:1561 $
    $Author:JensDiemer $

    :copyleft: 2007-2008 by the PyLucid team, see AUTHORS for more details.
    :license: GNU GPL v3 or above, see LICENSE for more details.
"""


__version__ = "$Rev:1561 $"

import sys, re

from xml.sax.saxutils import escape


class TinyTextileParser:
    def __init__(self, out_obj, page_msg):
        self.out = out_obj
        self.page_msg = page_msg

        # Blockelements
        self.block_rules = self._compile_rules([
            [ # <h1>-Headlines
                r"\Ah(\d)\. (.+)(?usm)",
                r"<h\1>\2</h\1>"
            ],
        ])

        # Inlineelements
        self.inline_rules = self._compile_rules([
            [ # HTML-Escaping
                r"={2,2}(.+?)={2,2}(?usm)",
                self.escaping
            ],
            [ # Kleiner Text - Bsp.: Ich bin ein --kleines-- Wort.
                r"-{2,2}([^-]+?)-{2,2}",
                r"<small>\1</small>"
            ],
            [ # Fettschrift - Bsp.: Das Wort ist in *fett* geschrieben.
                r"\*([^*\n]+?)\*(?uism)",
                r"<strong>\1</strong>"
            ],
            [ # manuell linebreak
                r"\\{2,2}",
                r"<br />"
            ],
            [ # img-Tag - Bsp.: !/Bilder/MeinBild.jpg!
                r'\!([^!\n ]+?)\!(?uis)',
                r'<img src="\1">'
            ],
            [ # Link + LinkText - e.g.: "LinkText":http://www.beispiel.de
              # old link format!
                r'"([^"]+?)":([^\s\<]+)',
                r'<a href="\2">\1</a>'
            ],
            [ # Link + LinkText - e.g.: "LinkText":http://www.beispiel.de
              # new link format - e.g.: [http://domain.dtl link text]
                r'\[([^\s\<]+) (.+?)\]',
                r'<a href="\1">\2</a>'
            ],
            [ # interne PyLucid Links - Bsp.:
              # Das ist ein [[InternerLink]] zur Seite InternerLink ;)
                r'\[\[(.+?)\]\]',
                self.shortcutLink
            ],
            [
                # Links allein im Text
                # Bsp.: Das wird ein Link: http://www.beispiel.de
                r'''
                    (?<!=") # Ist noch kein HTML-Link
                    (?P<url>(http|ftp|svn|irc)://([^\s\<]+))
                    (?uimx)
                ''',
                r'<a href="\g<url>">\g<url></a>'
            ],
            [ # EMails
                r'mailto:([^\s\<]+)',
                r'<a href="mailto:\1">\1</a>'
            ],
        ])

        # Pre-Process Regeln
        self.pre_process_rules = self._compile_rules([
            [ # Text vor einer "*"-Liste mit noch einem \n trennen
                r"""
                    (^[^*\n].+?$) # Text-Zeile vor einer Liste
                    (\n^\*) # Absatz + erstes List-Zeichen
                    (?uimx)
                """,
                r"\1\n\2",
            ],
            [ # Text vor einer "#"-Liste mit noch einem \n trennen
                r"""
                    (^[^#\n].+?$) # Text-Zeile vor einer Liste
                    (\n^\#) # Absatz + erstes List-Zeichen
                    (?uimx)
                """,
                r"\1\n\2",
            ],
            [ # insert \n before and after a "|"-Table
                r"(?ms)(^\|.*?\|\n(?!\|))", # match the complete table block
                r"\n\1\n",
            ],
            [
                # Text *vor* einem <pre>, <python> oder <code> Block mit noch
                # einem \n trennen
                r"\n(?P<tag><(pre|python|code[^>]*?)>)\n",
                r"\n\n\g<tag>\n",
            ],
            [
                # Text *nach* einem <pre>, <python> oder <code> Block mit noch
                # einem \n trennen
                r"\n(?P<tag></(pre|python|code)>)\n",
                r"\n\g<tag>\n\n",
            ],
#            [
#                "(?ms)(<(pre|python|code[^>]*?)>.*?</(pre|python|code)>)",
#                r"\n\1\n"
#            ]
        ])

        self.area_rules = (
            [
                "==", "==",
                self.escape_area_start, self.escape_area, self.escape_area_end
            ],
            [
                "<pre>", "</pre>",
                self.pre_area, self.pre_area, self.pre_area
            ],
            [
                "<python>", "</python>",
                self.python_area_start, self.python_area, self.python_area_end
            ],
            [
                "<code", "</code>",
                self.code_area_start, self.code_area, self.code_area_end
            ],
        )

    def _compile_rules(self, rules):
        "Kompliliert die RE-Ausdrcke"
        for rule in rules:
            rule[0] = re.compile(rule[0])
        return rules

    def parse(self, txt):
        "Parsed den Text in's out_obj"
        txt = self.pre_process(txt)
        self.make_paragraphs(txt)

    def escaping(self, matchobj):
        return escape(matchobj.group(1))

    def shortcutLink(self, matchobj):
        shortcut = matchobj.group(1)
        url = "/%s/" % shortcut.strip("/")
        link = '<a href="%s">%s</a>' % (
            url, shortcut
        )
        return link

    def pre_process(self, txt):
        "Vorab Verarbeitung des Textes"

        # Zeilenenden vereinheitlichen
        txt = txt.replace("\r\n", "\n").replace("\r", "\n")

        # Leerzeilen vorn und hinten abschneiden
        txt = txt.strip()

        # Preprocess rules anwenden
        for rule in self.pre_process_rules:
            #~ self.page_msg(rule)
            #~ self.page_msg(txt)
            txt = rule[0].sub(rule[1], txt)
            #~ self.page_msg(txt)

        return txt

    def make_paragraphs(self, txt):
        """
        Verarbeitung des Textes.
        Wendet Blockelement-Regeln und Inlineelement-Regeln an.
        """
        blocks = re.split("\n{2,}", txt)
        #~ self.page_msg(escape(str(blocks)))
        current_area = None
        for block in blocks:
            current_area = self.handle_areas(block, current_area)
            if current_area != None:
                # Wir sind in einer Area und der Block wurde schon abgehandelt
                continue

            block = block.strip()
            if len(block) == 0:
                continue

            #~ if self.is_html.findall(block) != []:
            if block[0] == "<":
                # Der Block scheint schon HTML-Code zu sein
                self.out.write("%s\n" % block)
                #~ self.page_msg("Is HTML:", escape(block))
                continue

            # inline-rules Anwenden
            for inlinerule in self.inline_rules:
                block = inlinerule[0].sub(inlinerule[1], block)

            # Block-rules Anwenden
            self.blockelements(block)

    #_________________________________________________________________________
    # Areas

    def handle_areas(self, block, current_area):
        """
        Areas anhandeln
        """
        #~ self.page_msg(current_area, "--", escape(block))

        def handle_end(current_area, block):
            if block.endswith(current_area[1]):
                # Die aktuelle Area ist zuende
                inner_block = block[:-len(current_area[1])].rstrip()
                # Erstmal die restlichen Daten verabeiten
                current_area[3](inner_block)

                current_area[4](current_area[1]) # Endmethode aufrufen
                return False

        if (current_area != None) and (current_area != False):
            # Wir sind gerade in einer area

            if handle_end(current_area, block) == False:
                # Ende erreicht
                return False

            # Methode die fr die area zustndig ist aufrufen
            current_area[3]("\n%s\n" % block)

            # In der area bleiben
            return current_area

        #~ self.page_msg("handle:", escape(block))
        for current_area in self.area_rules:
            #~ self.page_msg(escape(current_area[0]), block)
            if block.startswith(current_area[0]): # Start einer neuen area
                area_tag = current_area[0]

                # Area-Start-Methode aufrufen
                current_area[2](area_tag)

                rest_block = block[len(area_tag):]
                try:
                    if rest_block[0] == "\n":
                        # Evtl. vorhandene Leerzeile ignorieren
                        rest_block = rest_block[1:]
                except IndexError:
                    # Es ist ein Leerzeichen zwischem Tag und Inhalt
                    # (kommt selten vor)
                    pass

                if handle_end(current_area, rest_block) == False:
                    # Das Ende schon erreicht
                    return False

                # Das Ende ist noch nicht erreicht, also
                # den Restlichen Block durch die normale Methode jagen
                current_area[3](rest_block)

                # In-der-Area-Methode "merken"
                return current_area

        # Wir sind nicht in einer Area
        return None

    #_________________________________________________________________________

    def escape_area_start(self, block):
        self.escape_area_first_line = True
        pass

    def escape_area(self, block):
        if self.escape_area_first_line == True:
            block = block.strip()
            self.escape_area_first_line = False

        block = block.splitlines()
        block = "".join(["%s<br />\n" % escape(line) for line in block])
        self.out.write(block)

    def escape_area_end(self, block):
        pass

    #_________________________________________________________________________

    def pre_area(self, block):
        self.out.write(block + "\n")

    #_________________________________________________________________________

    def python_area_start(self, block):
        """
        Python-Source-Code area
        """
        self.sourcecode_data = []

    def python_area(self, block):
        self.sourcecode_data.append(block)
        if not block.endswith("\n"):
            self.sourcecode_data.append("\n")

    def python_area_end(self, dummy):
        self.hightlight("python", self.sourcecode_data)

    #_________________________________________________________________________

    def code_area_start(self, block):
        """
        Sourcecode mit pygments
        """
        self.first_sourcecode_block = True
        self.sourcecode_type = None
        self.sourcecode_data = []

    def code_area(self, block):
        if self.first_sourcecode_block:
            # Aus der ersten Zeile den Typ des Sourcecodes ermitteln:
            # <code=sql> oder <code=.sh> oder <code>
            self.first_sourcecode_block = False
            code_type, block = block.split(">", 1)
            self.sourcecode_type = code_type.lstrip("=.")

        self.sourcecode_data.append("\n%s\n" % block)

    def code_area_end(self, dummy):
        """
        Wir sind, beim Endtag angekommen, dann zeigen wir mal den
        sourcecode... :)
        """
        self.hightlight(self.sourcecode_type, self.sourcecode_data)

    #_________________________________________________________________________

    def table(self, text):
        result = ""
        for line in text.splitlines():
            line = line.strip("|").split("|")
            result_line = ""
            for cell in line:
                if cell.startswith("="):
                    tag = "th"
                    cell = cell[1:]
                else:
                    tag = "td"
                cell = cell.strip()
                result_line += "\t<%(t)s>%(c)s</%(t)s>\n" % {
                    "t": tag, "c": cell
                }

            result += "<tr>\n%s</tr>\n" % result_line

        result = '<table>\n%s</table>\n' % result
        self.out.write(result)

    #_________________________________________________________________________

    def hightlight(self, source_type, code_lines):
        """
        Display Sourcecode.
        Try to use pygments, if exists.
        """
#        self.page_msg("Source type: '%s'" % source_type)

        code = "".join(code_lines)
        code = code.strip()

        from pylucid_project.apps.pylucid.markup.hightlighter import make_html
        html = make_html(code, source_type, django_escape=True)
        self.out.write(html)

    #_________________________________________________________________________

    def blockelements(self, block):
        "Anwenden der Block-rules. Formatieren des Absatzes"

        if block[0] in ("*", "#"):
            # Aktueller Block ist eine Liste
            self.build_list(block)
            return

        if block[0] == "|":
            # current block is a table
            self.table(block)
            return

        for rule in self.block_rules:
            txt, count = rule[0].subn(rule[1], block)

            if count != 0:
                # Ein Blockelement wurde gefunden
                self.out.write("%s\n" % txt)
                return

        # Kein Blockelement gefunden -> Formatierung des Absatzes
        block = block.strip().replace("\n", "<br />\n")
        self.out.write("<p>%s</p>\n" % block)

    def build_list(self, listitems):
        "Erzeugt eine Liste aus einem Absatz"

        def spacer(deep):
            return " " * (deep * 3)

        def write(number, tag, spacer):
            for i in range(number):
                self.out.write(spacer + tag)

        deep = 0
        for item in re.findall("([\*#]+) (.*)", listitems):
            currentlen = len(item[0])
            if item[0][0] == "*":
                # normale Aufzhlungsliste
                pre_tag = "<ul>\n"
                post_tag = "</ul>\n"
            else:
                # Nummerierte Liste
                pre_tag = "<ol>\n"
                post_tag = "</ol>\n"

            if currentlen > deep:
                write(currentlen - deep, pre_tag, spacer(deep))
                deep = currentlen
            elif currentlen < deep:
                write(deep - currentlen, post_tag, spacer(deep))
                deep = currentlen

            self.out.write(
                "%s<li>%s</li>\n" % (spacer(deep), item[1])
            )

        for i in range(deep):
            self.out.write(post_tag)

if __name__ == "__main__":
    # Quick test
    from tests.utils.FakeRequest import get_fake_context
    fake_context = get_fake_context()
    textile = TinyTextileParser(sys.stdout, fake_context)
    textile.parse(r"""
        a windows path:
        C:\windows\foo\bar
        a linux path:
        /usr/bin/python
        a manuel linebreak\\with two backslashes
    """)

www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.