xbel_parser.py :  » Network » Grail-Internet-Browser » grail-0.6 » bookmarks » formats » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Network » Grail Internet Browser 
Grail Internet Browser » grail 0.6 » bookmarks » formats » xbel_parser.py
"""Parser for XML bookmarks using the XBEL DTD."""

__version__ = '$Revision: 1.10 $'


import bookmarks
import bookmarks.iso8601
import bookmarks.nodes
import string


class CaptureError(Exception):
    def __init__(self, msg):
        self.msg = msg
        Exception.__init__(self)


class CaptureMixin:
    def __init__(self):
        pass

    def unknown_starttag(self, tag, attrs):
        if self.__capturing:
            self.capture_starttag(tag, attrs)

    def unknown_endtag(self, tag):
        if self.__capturing:
            self.capture_endtag(tag)

    __capturing = 0

    def capturing(self):
        return self.__capturing and 1

    def capture_bgn(self, tag, attrs):
        if self.__capturing:
            raise CaptureError("capturing already in progress")
        self.__capture = [tag, attrs, []]
        self.__context = [self.__capture[-1]]
        self.__capturing = 1

    def capture_end(self, normalize=0):
        if self.__capturing:
            raise CaptureError("capturing not complete")
        if normalize:
            return normalize_capture(self.__capture)
        else:
            return self.__capture

    def capture_data(self, data):
        if not self.__capturing:
            raise CaptureError("capturing not active")
        # create the smallest number of text nodes possible
        if self.__context[-1] and type(self.__context[-1][-1]) is type(""):
            self.__context[-1][-1] = self.__context[-1][-1] + data
        else:
            self.__context[-1].append(data)

    def capture_starttag(self, tag, attrs):
        if not self.__capturing:
            raise CaptureError("capturing not active")
        element = [tag, attrs, []]
        self.__context[-1].append(element)
        self.__context.append(element[-1])
        self.__capturing = self.__capturing + 1

    def capture_endtag(self, tag):
        if not self.__capturing:
            raise CaptureError("capturing not active")
        self.__capturing = self.__capturing - 1
        del self.__context[-1]
        return self.__capturing


def normalize_capture(data, preserve=0, StringType=type("")):
    queue = [(data, preserve)]
    while queue:
        (tag, attrs, content), preserve = queue[0]
        del queue[0]
        #
        preserve = preserve or attrs.get("xml:space") == "preserve"
        #
        if not preserve:
            # remove leading blanks:
            while (content and type(content[0]) is StringType
                   and string.strip(content[0]) == ""):
                del content[0]
            # remove trailing blanks
            cindexes = range(len(content))
            cindexes.reverse()
            for ci in cindexes:
                citem = content[ci]
                if type(citem) is StringType and not string.strip(citem):
                    del content[ci]
                else:
                    break
            # now, if all remaining strings are blank,
            # assume this is element-only:
            for citem in content:
                if type(citem) is StringType:
                    if string.strip(citem):
                        preserve = 1
        if not preserve:
            # All internal strings are blank; remove them.
            cindexes = range(len(content))
            cindexes.reverse()
            for ci in cindexes:
                if type(content[ci]) is StringType:
                    del content[ci]
        for citem in content:
            if type(citem) is not StringType:
                queue.append((citem, preserve))
    return data


class DocumentHandler:
    __folder = None
    __store_node = None

    def __init__(self, filename):
        self.__filename = filename
        self.__context = []
        self.__idmap = {}
        self.__missing_ids = {}
        self.__root = self.new_folder()

    def get_root(self):
        return self.__root

    def start_xbel(self, attrs):
        root = self.get_root()
        self.__store_date(root, attrs, "added", "set_add_date")
        self.handle_id(root, attrs)
    def end_xbel(self):
        pass

    def start_folder(self, attrs):
        self.new_folder(attrs)
    def end_folder(self):
        self.__store_node = None
        self.__folder = self.__context[-1]
        del self.__context[-1]

    def start_title(self, attrs):
        self.save_bgn()
    def end_title(self):
        self.__store_node.set_title(self.save_end())

    __node = None
    def start_bookmark(self, attrs):
        self.new_bookmark(attrs)
        node = self.__node
        self.handle_id(node, attrs)
        node.set_uri(string.strip(attrs.get("href", "")))
        self.__store_date(node, attrs, "added",    "set_add_date")
        self.__store_date(node, attrs, "visited",  "set_last_visited")
        self.__store_date(node, attrs, "modified", "set_last_modified")
    def end_bookmark(self):
        self.__node = None
        self.__store_node = None

    def start_desc(self, attrs):
        self.save_bgn()
    def end_desc(self):
        desc = string.strip(self.save_end())
        if desc:
            if self.__node:
                self.__node.set_description(desc)
            else:
                self.__folder.set_description(desc)

    def start_alias(self, attrs):
        alias = bookmarks.nodes.Alias()
        self.handle_idref(alias, attrs)
        self.__folder.append_child(alias)
    def end_alias(self):
        pass

    def start_separator(self, attrs):
        self.__folder.append_child(bookmarks.nodes.Separator())
    def end_separator(self):
        pass

    # metadata methods:

    def start_info(self, attrs):
        pass
    def end_info(self):
        pass

    def start_metadata(self, attrs):
        self.capture_bgn("metadata", attrs)
    def end_metadata(self):
        metadata = self.capture_end(normalize=1)
        if not metadata[-1]:
            return
        info = self.__node.info()
        if info is None:
            info = []
            self.__node.set_info(info)
        info.append(metadata)

    # support methods:

    def new_bookmark(self, attrs):
        self.__node = bookmarks.nodes.Bookmark()
        self.__store_node = self.__node
        self.__folder.append_child(self.__node)
        return self.__node

    def new_folder(self, attrs={}):
        if self.__folder is not None:
            self.__context.append(self.__folder)
        folded = string.lower(attrs.get("folded", "no")) == "yes"
        self.__folder = bookmarks.nodes.Folder()
        self.__store_node = self.__folder
        if self.__context:
            self.__context[-1].append_child(self.__folder)
        if folded:
            self.__folder.collapse()
        else:
            self.__folder.expand()
        added = attrs.get("added")
        if added:
            try:
                added = bookmarks.iso8601.parse(added)
            except ValueError:
                pass
            else:
                self.__folder.set_add_date(added)
        self.handle_id(self.__folder, attrs)
        return self.__folder

    def handle_id(self, node, attrs, attrname="id", required=0):
        id = attrs.get(attrname)
        if id:
            node.set_id(id)
            self.__idmap[id] = node
            if self.__missing_ids.has_key(id):
                for n in self.__missing_ids[id]:
                    n.set_idref(node)
                del self.__missing_ids[id]
        elif required:
            raise BookmarkFormatError(self.__filename,
                                      "missing %s attribute" % attrname)

    def handle_idref(self, node, attrs, attrname="ref", required=1):
        idref = attrs.get(attrname)
        if idref:
            if self.__idmap.has_key(idref):
                node.set_refnode(self.__idmap[idref])
            else:
                try:
                    self.__missing_ids[idref].append(node)
                except KeyError:
                    self.__missing_ids[idref] = [node]
        elif required:
            raise BookmarkFormatError(self.__filename,
                                      "missing %s attribute" % attrname)

    def __store_date(self, node, attrs, attrname, nodefuncname):
        date = attrs.get(attrname)
        if date:
            func = getattr(node, nodefuncname)
            try:
                date = bookmarks.iso8601.parse(date)
            except ValueError:
                return
            func(date)

    def __normalize_metadata(self, metadata):
        self.__normalize_thing(metadata)
        return metadata

    __buffer = ""
    def save_bgn(self):
        self.__buffer = ""

    def save_end(self):
        s, self.__buffer = self.__buffer, ""
        return string.join(string.split(s))

    def handle_data(self, data):
        if self.capturing():
            self.capture_data(data)
        else:
            self.__buffer = self.__buffer + data

    def handle_starttag(self, tag, method, attrs):
        if self.capturing():
            self.capture_starttag(tag, attrs)
            return
        method(attrs)

    def handle_endtag(self, tag, method):
        if self.capturing() and self.capture_endtag(tag):
            return
        method()


try:
    from xml.parsers.xmllib import XMLParser
except ImportError:
    from xmllib import XMLParser


class Parser(DocumentHandler, CaptureMixin, XMLParser):
    def __init__(self, filename):
        DocumentHandler.__init__(self, filename)
        CaptureMixin.__init__(self)
        XMLParser.__init__(self)
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.