#!/usr/bin/env python # -*- coding: utf8 -*- # # SpecGen v5, ontology specification generator tool # # # Previous versions of SpecGen: # v1,2,3 by Christopher Schmidt # v4 by Uldis Bojars # # This software is licensed under the terms of the MIT License. # # Copyright (c) 2007 Sergio Fernández # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. import os, sys, time, re import urllib import RDF #global vars classranges = {} classdomains = {} spec_url = None spec_ns = None spec_pre = None ns_list = { 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' : "rdf", 'http://www.w3.org/2000/01/rdf-schema#' : "rdfs", 'http://www.w3.org/2002/07/owl#' : "owl", 'http://www.w3.org/2001/XMLSchema#' : 'xsd', 'http://rdfs.org/sioc/ns#' : "sioc", 'http://xmlns.com/foaf/0.1/' : "foaf", 'http://purl.org/dc/elements/1.1/' : "dc", 'http://purl.org/dc/terms/' : "dcterms", 'http://usefulinc.com/ns/doap#' : 'doap', 'http://www.w3.org/2003/06/sw-vocab-status/ns#' : "status", 'http://purl.org/rss/1.0/modules/content/' : "content", 'http://www.w3.org/2003/01/geo/wgs84_pos#' : "geo", 'http://www.w3.org/2004/02/skos/core#' : "skos", 'http://scot-project.org/scot/ns#' : "scot" } rdf = RDF.NS('http://www.w3.org/1999/02/22-rdf-syntax-ns#') rdfs = RDF.NS('http://www.w3.org/2000/01/rdf-schema#') owl = RDF.NS('http://www.w3.org/2002/07/owl#') vs = RDF.NS('http://www.w3.org/2003/06/sw-vocab-status/ns#') termdir = './doc' #TODO def niceName(uri): regexp = re.compile( "^(.*[/#])([^/#]+)$" ) rez = regexp.search( uri ) pref = rez.group(1) #return ns_list.get(pref, pref) + ":" + rez.group(2) if ns_list.has_key(pref): return ns_list.get(pref, pref) + ":" + rez.group(2) else: return uri def setTermDir(directory): global termdir termdir = directory def termlink(string): """FOAF specific: function which replaces foaf:* with a link to the term in the document.""" return re.sub(r"" + spec_pre + r":(\w+)", r"""""" + spec_pre + r""":\1""", string) def return_name(m, urinode): "Trims the FOAF namespace out of a term to give a name to the term." return str(urinode.uri).replace(spec_url, "") def get_rdfs(m, urinode): "Returns label and comment given an RDF.Node with a URI in it" comment = '' label = '' if (type(urinode)==str): urinode = RDF.Uri(urinode) l = m.find_statements(RDF.Statement(urinode, rdfs.label, None)) if l.current(): label = l.current().object.literal_value['string'] c = m.find_statements(RDF.Statement(urinode, rdfs.comment, None)) if c.current(): comment = c.current().object.literal_value['string'] return label, comment def get_status(m, urinode): "Returns the status text for a term." status = '' s = m.find_statements(RDF.Statement(urinode, vs.term_status, None)) if s.current(): return s.current().object.literal_value['string'] def htmlDocInfo( t ): """Opens a file based on the term name (t) and termdir directory (global). Reads in the file, and returns a linkified version of it.""" doc = "" try: f = open("%s/%s.en" % (termdir, t), "r") doc = f.read() doc = termlink(doc) except: return "" # "

No detailed documentation for this term.

" return doc def owlVersionInfo(m): v = m.find_statements(RDF.Statement(None, owl.versionInfo, None)) if v.current(): return v.current().object.literal_value['string'] else: return "" def rdfsPropertyInfo(term,m): """Generate HTML for properties: Domain, range, status.""" global classranges global classdomains doc = "" range = "" domain = "" #find subPropertyOf information o = m.find_statements( RDF.Statement(term, rdfs.subPropertyOf, None) ) if o.current(): rlist = '' for st in o: k = getTermLink(str(st.object.uri)) rlist += "
%s
" % k doc += "
sub-property-of:
%s" % rlist #domain stuff domains = m.find_statements(RDF.Statement(term, rdfs.domain, None)) domainsdoc = "" for d in domains: collection = m.find_statements(RDF.Statement(d.object, owl.unionOf, None)) if collection.current(): uris = parseCollection(m, collection) for uri in uris: domainsdoc += "
%s
" % getTermLink(uri) add(classdomains, uri, term.uri) else: if not d.object.is_blank(): domainsdoc += "
%s
" % getTermLink(str(d.object.uri)) if (len(domainsdoc)>0): doc += "
Domain:
%s" % domainsdoc #range stuff ranges = m.find_statements(RDF.Statement(term, rdfs.range, None)) rangesdoc = "" for r in ranges: collection = m.find_statements(RDF.Statement(r.object, owl.unionOf, None)) if collection.current(): uris = parseCollection(m, collection) for uri in uris: rangesdoc += "
%s
" % getTermLink(uri) add(classranges, uri, term.uri) else: if not r.object.is_blank(): rangesdoc += "
%s
" % getTermLink(str(r.object.uri)) if (len(rangesdoc)>0): doc += "
Range:
%s" % rangesdoc return doc def parseCollection(model, collection): # #propertyA a rdf:Property ; # rdfs:domain [ # a owl:Class ; # owl:unionOf [ # rdf:parseType Collection ; # #Foo a owl:Class ; # #Bar a owl:Class # ] # ] # # seeAlso "Collections in RDF" uris = [] rdflist = model.find_statements(RDF.Statement(collection.current().object, None, None)) while rdflist and rdflist.current() and not rdflist.current().object.is_blank(): one = rdflist.current() if not one.object.is_blank(): uris.append(str(one.object.uri)) rdflist.next() one = rdflist.current() if one.predicate == rdf.rest: rdflist = model.find_statements(RDF.Statement(one.object, None, None)) return uris def getTermLink(uri): uri = str(uri) if (uri.startswith(spec_url)): return '%s' % (uri.replace(spec_url, ""), niceName(uri)) else: return '%s' % (uri, niceName(uri)) def rdfsClassInfo(term,m): """Generate rdfs-type information for Classes: ranges, and domains.""" global classranges global classdomains doc = "" #patch to control incoming strings (FIXME, why??? drop it!) try: term.uri except: term = RDF.Node(RDF.Uri(term)) # Find subClassOf information o = m.find_statements( RDF.Statement(term, rdfs.subClassOf, None) ) if o.current(): doc += "
sub-class-of:
" rlist = '' for st in o: if not st.object.is_blank(): rlist += "%s " % getTermLink(str(st.object.uri)) doc += "
%s
" % rlist # Find out about properties which have rdfs:domain of t d = classdomains.get(str(term.uri), "") if d: dlist = '' for k in d: dlist += "
%s
" % getTermLink(k) doc += "
in-domain-of:
" + dlist # Find out about properties which have rdfs:range of t r = classranges.get(str(term.uri), "") if r: rlist = '' for k in r: rlist += "
%s
" % getTermLink(k) doc += "
in-range-of:
" + rlist return doc def rdfsInstanceInfo(term,m): """Generate rdfs-type information for instances""" doc = "" t = m.find_statements( RDF.Statement(RDF.Node(RDF.Uri(term)), rdf.type, None) ) if t.current(): doc += "
RDF Type:
" while t.current(): doc += "
%s
" % getTermLink(str(t.current().object.uri)) t.next() return doc def owlInfo(term,m): """Returns an extra information that is defined about a term (an RDF.Node()) using OWL.""" res = '' # Inverse properties ( owl:inverseOf ) o = m.find_statements( RDF.Statement(term, owl.inverseOf, None) ) if o.current(): res += "
Inverse:
" for st in o: res += "
%s
" % getTermLink(str(st.object.uri)) # Datatype Property ( owl.DatatypeProperty ) o = m.find_statements( RDF.Statement(term, rdf.type, owl.DatatypeProperty) ) if o.current(): res += "
OWL Type:
DatatypeProperty
\n" # Object Property ( owl.ObjectProperty ) o = m.find_statements( RDF.Statement(term, rdf.type, owl.ObjectProperty) ) if o.current(): res += "
OWL Type:
ObjectProperty
\n" # IFPs ( owl.InverseFunctionalProperty ) o = m.find_statements( RDF.Statement(term, rdf.type, owl.InverseFunctionalProperty) ) if o.current(): res += "
OWL Type:
InverseFunctionalProperty (uniquely identifying property)
\n" # Symmertic Property ( owl.SymmetricProperty ) o = m.find_statements( RDF.Statement(term, rdf.type, owl.SymmetricProperty) ) if o.current(): res += "
OWL Type:
SymmetricProperty
\n" return res def docTerms(category, list, m): """ A wrapper class for listing all the terms in a specific class (either Properties, or Classes. Category is 'Property' or 'Class', list is a list of term names (strings), return value is a chunk of HTML. """ doc = "" nspre = spec_pre for t in list: if (t.startswith(spec_url)) and (len(t[len(spec_url):].split("/"))<2): term = t t = t.split(spec_url[-1])[1] curie = "%s:%s" % (nspre, t) else: if t.startswith("http://"): term = t curie = getShortName(t) t = getAnchor(t) else: term = spec_ns[t] curie = "%s:%s" % (nspre, t) doc += """
\n

%s: %s

\n""" % (t, category, curie) try: term_uri = term.uri except: term_uri = term doc += """

URI: %s""" % (term_uri, term_uri) label, comment = get_rdfs(m, term) status = get_status(m, term) doc += "

%s - %s

" % (label, comment) doc += "\n
\n" doc += owlInfo(term,m) if category=='Property': doc += rdfsPropertyInfo(term,m) if category=='Class': doc += rdfsClassInfo(term,m) if category=='Instance': doc += rdfsInstanceInfo(term,m) doc += "
\n" doc += htmlDocInfo(t) doc += "

[back to top]

\n\n" doc += "\n\n
\n\n" return doc def getShortName(uri): if ("#" in uri): return uri.split("#")[-1] else: return uri.split("/")[-1] def getAnchor(uri): if (uri.startswith(spec_url)): return uri[len(spec_url):].replace("/","_") else: return getShortName(uri) def buildazlist(classlist, proplist, instalist=None): """ Builds the A-Z list of terms. Args are a list of classes (strings) and a list of props (strings) """ azlist = '
' azlist += "

Classes: " classlist.sort() for c in classlist: if c.startswith(spec_url): c = c.split(spec_url[-1])[1] azlist = """%s %s, """ % (azlist, c, c) azlist = """%s\n

""" % azlist azlist += "

Properties: " proplist.sort() for p in proplist: if p.startswith(spec_url): p = p.split(spec_url[-1])[1] azlist = """%s %s, """ % (azlist, p, p) azlist = """%s\n

""" % azlist if instalist != None: azlist += "

Instances: " for i in instalist: p = getShortName(i) anchor = getAnchor(i) azlist = """%s %s, """ % (azlist, anchor, p) azlist = """%s\n

""" % azlist azlist = """%s\n
""" % azlist return azlist def build_simple_list(classlist, proplist, instalist=None): """ Builds a simple
    A-Z list of terms. Args are a list of classes (strings) and a list of props (strings) """ azlist = """
    """ azlist = """%s\n

    Classes:""" % azlist azlist += """\n

      """ classlist.sort() for c in classlist: azlist += """\n
    • %s
    • """ % (c.replace(" ", ""), c) azlist = """%s\n

    """ % azlist azlist = """%s\n

    Properties:""" % azlist azlist += """\n

      """ proplist.sort() for p in proplist: azlist += """\n
    • %s
    • """ % (p.replace(" ", ""), p) azlist = """%s\n

    """ % azlist #FIXME: instances azlist = """%s\n
    """ % azlist return azlist def add(where, key, value): if not where.has_key(key): where[key] = [] if not value in where[key]: where[key].append(value) def specInformation(m): """ Read through the spec (provided as a Redland model) and return classlist and proplist. Global variables classranges and classdomains are also filled as appropriate. """ global classranges global classdomains # Find the class information: Ranges, domains, and list of all names. classtypes = [rdfs.Class, owl.Class] classlist = [] for onetype in classtypes: for classStatement in m.find_statements(RDF.Statement(None, rdf.type, onetype)): for range in m.find_statements(RDF.Statement(None, rdfs.range, classStatement.subject)): if not m.contains_statement( RDF.Statement( range.subject, rdf.type, owl.DeprecatedProperty )): if not classStatement.subject.is_blank(): add(classranges, str(classStatement.subject.uri), str(range.subject.uri)) for domain in m.find_statements(RDF.Statement(None, rdfs.domain, classStatement.subject)): if not m.contains_statement( RDF.Statement( domain.subject, rdf.type, owl.DeprecatedProperty )): if not classStatement.subject.is_blank(): add(classdomains, str(classStatement.subject.uri), str(domain.subject.uri)) if not classStatement.subject.is_blank(): if return_name(m, classStatement.subject) not in classlist and str(classStatement.subject.uri).startswith(spec_url): classlist.append(return_name(m, classStatement.subject)) # Create a list of properties in the schema. proptypes = [rdf.Property, owl.ObjectProperty, owl.DatatypeProperty] proplist = [] for onetype in proptypes: for propertyStatement in m.find_statements(RDF.Statement(None, rdf.type, onetype)): if not return_name(m, propertyStatement.subject) in proplist: proplist.append(return_name(m, propertyStatement.subject)) return classlist, proplist def getInstances(model, classes, properties): """ Extract all resources instanced in the ontology (aka "everything that is not a class or a property") """ instances = [] for one in classes: for i in model.find_statements(RDF.Statement(None, rdf.type, spec_ns[one])): instance = str(i.subject.uri) if not instance in instances: instances.append(instance) for i in model.find_statements(RDF.Statement(None, rdfs.isDefinedBy, RDF.Uri(spec_url))): instance = str(i.subject.uri) if (not instance in instances) and (not instance in classes) and (not instance in properties): instances.append(instance) return instances def specgen(specloc, template, instances=False, mode="spec"): """The meat and potatoes: Everything starts here.""" global spec_url global spec_ns global ns_list m = RDF.Model() p = RDF.Parser() try: p.parse_into_model(m, specloc) except IOError, e: print "Error reading from ontology:", str(e) usage() except RDF.RedlandError, e: print "Error parsing the ontology" spec_url = getOntologyNS(m) spec_ns = RDF.NS(spec_url) ns_list[spec_url] = spec_pre classlist, proplist = specInformation(m) classlist = sorted(classlist) proplist = sorted(proplist) instalist = None if instances: instalist = getInstances(m, classlist, proplist) instalist.sort(lambda x, y: cmp(getShortName(x).lower(), getShortName(y).lower())) if mode == "spec": # Build HTML list of terms. azlist = buildazlist(classlist, proplist, instalist) elif mode == "list": # Build simple
      list of terms. azlist = build_simple_list(classlist, proplist, instalist) # Generate Term HTML termlist = docTerms('Property', proplist, m) termlist = docTerms('Class', classlist, m) + termlist if instances: termlist += docTerms('Instance', instalist, m) # Generate RDF from original namespace. u = urllib.urlopen(specloc) rdfdata = u.read() rdfdata = re.sub(r"(<\?xml version.*\?>)", "", rdfdata) rdfdata = re.sub(r"()", "", rdfdata) #rdfdata.replace("""""", "") # print template % (azlist.encode("utf-8"), termlist.encode("utf-8"), rdfdata.encode("ISO-8859-1")) template = re.sub(r"^#format \w*\n", "", template) template = re.sub(r"\$VersionInfo\$", owlVersionInfo(m).encode("utf-8"), template) # NOTE: This works with the assumtpion that all "%" in the template are escaped to "%%" and it # contains the same number of "%s" as the number of parameters in % ( ...parameters here... ) template = template % (azlist, termlist.encode("utf-8")); template += "" return template def save(path, text): try: f = open(path, "w") f.write(text) f.flush() f.close() except Exception, e: print "Error writting in file \"" + path + "\": " + str(e) def getOntologyNS(m): ns = None o = m.find_statements(RDF.Statement(None, rdf.type, owl.Ontology)) if o.current(): s = o.current().subject if (not s.is_blank()): ns = str(s.uri) if (ns[-1]!="/" and ns[-1]!="#"): ns += "#" if (ns == None): sys.exit("Impossible to get ontology's namespace") else: return ns def __getScriptPath(): path = sys.argv[0] if path.startswith("./"): return path else: base = "/".join(path.split("/")[:-1]) for one in os.environ["PATH"].split(":"): if base == one: return path.split("/")[-1] return path def usage(): script = __getScriptPath() print """Usage: %s ontology prefix template destination [flags] ontology : path to ontology file prefix : prefix for CURIEs template : HTML template path destination : specification destination (by default) optional flags: -i : add instances on the specification (disabled by default) examples: %s example.owl ex template.html example.owl.html -i """ % (script, script) sys.exit(-1) if __name__ == "__main__": """Ontology specification generator tool""" args = sys.argv[1:] if (len(args) < 4): usage() else: #ontology specloc = "file:" + str(args[0]) spec_pre = args[1] #template temploc = args[2] template = None try: f = open(temploc, "r") template = f.read() except Exception, e: print "Error reading from template \"" + temploc + "\": " + str(e) usage() #destination dest = args[3] #flags instances = False if len(args) > 3: flags = args[3:] if '-i' in flags: instances = True save(dest, specgen(specloc, template, instances=instances))