roget.py :  » Network » NetworkX » networkx-1.1 » examples » graph » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Network » NetworkX 
NetworkX » networkx 1.1 » examples » graph » roget.py
#!/usr/bin/env python
"""
Build a directed graph of 1022 categories and
5075 cross-references as defined in the 1879 version of Roget's Thesaurus
contained in the datafile roget_dat.txt. This example is described in
Section 1.2 in Knuth's book [1,2].

Note that one of the 5075 cross references is a self loop yet
it is included in the graph built here because
the standard networkx DiGraph class allows self loops.
(cf. 400pungency:400 401 403 405).

References.
----------

[1] Donald E. Knuth,
    "The Stanford GraphBase: A Platform for Combinatorial Computing",
    ACM Press, New York, 1993.
[2] http://www-cs-faculty.stanford.edu/~knuth/sgb.html


"""
__author__ = """Brendt Wohlberg\nAric Hagberg (hagberg@lanl.gov)"""
__date__ = "$Date: 2005-04-01 07:56:22 -0700 (Fri, 01 Apr 2005) $"
__credits__ = """"""
__revision__ = ""
#    Copyright (C) 2004 by 
#    Aric Hagberg <hagberg@lanl.gov>
#    Dan Schult <dschult@colgate.edu>
#    Pieter Swart <swart@lanl.gov>
#    All rights reserved.
#    BSD license.

from networkx import *
import re
import sys

def roget_graph():
    """ Return the thesaurus graph from the roget.dat example in
    the Stanford Graph Base.
    """
    # open file roget_dat.txt.gz (or roget_dat.txt)
    try:
        try:
            import gzip
            fh=gzip.open('roget_dat.txt.gz','r')
        except:
            fh=open("roget_dat.txt","r")
    except IOError:
        raise "File roget_dat.txt not found."

    G=DiGraph()

    for line in fh.readlines():
        if line.startswith("*"): # skip comments
            continue
        if line.startswith(" "): # this is a continuation line, append
            line=oldline+line
        if line.endswith("\\\n"): # continuation line, buffer, goto next
            oldline=line.strip("\\\n")
            continue

        (headname,tails)=line.split(":")

        # head
        numfind=re.compile("^\d+") # re to find the number of this word
        head=numfind.findall(headname)[0] # get the number
    
        G.add_node(head)

        for tail in tails.split():
            if head==tail:
                print >>sys.stderr,"skipping self loop",head,tail
            G.add_edge(head,tail)

    return G            

if __name__ == '__main__':
    from networkx import *
    G=roget_graph()
    print "Loaded roget_dat.txt containing 1022 categories."
    print "digraph has %d nodes with %d edges"\
          %(number_of_nodes(G),number_of_edges(G))
    UG=G.to_undirected()
    print number_connected_components(UG),"connected components"

www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.