Logo Search packages:      
Sourcecode: f-spot version File versions  Download package

XmlParser.cs

using System;
using System.Collections;
using System.IO;
using System.Text;
using System.Xml;

using SemWeb.Util;

namespace SemWeb {
      public class RdfXmlReader : RdfReader {
            // TODO: Make some of the errors warnings.
      
            XmlReader xml;
            
            Hashtable blankNodes = new Hashtable();
            UriMap namedNodes = new UriMap();
            Hashtable seenIDs = new Hashtable();
            
            StatementSink storage;
            
            static readonly Entity
                  rdfType = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
                  rdfFirst = "http://www.w3.org/1999/02/22-rdf-syntax-ns#first",
                  rdfRest = "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest",
                  rdfNil = "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil",
                  rdfSubject = "http://www.w3.org/1999/02/22-rdf-syntax-ns#subject",
                  rdfPredicate = "http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate",
                  rdfObject = "http://www.w3.org/1999/02/22-rdf-syntax-ns#object",
                  rdfStatement = "http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement";
            
            public RdfXmlReader(XmlDocument document) {
                  xml = new XmlNodeReader(document);
            }
            
            public RdfXmlReader(XmlReader document) {
                  XmlValidatingReader reader = new XmlValidatingReader(document);
                  reader.ValidationType = ValidationType.None;
                  xml = reader;
            }
            
            public RdfXmlReader(TextReader document) : this(new XmlTextReader(document)) {
            }

            public RdfXmlReader(Stream document) : this(new XmlTextReader(document)) {
            }

            public RdfXmlReader(string file) : this(GetReader(file)) {
            }
            
            public override void Select(StatementSink storage) {
                  // Read past the processing instructions to
                  // the document element.  If it is rdf:RDF,
                  // then process the description nodes within it.
                  // Otherwise, the document element is itself a
                  // description.
                  
                  storage = GetDupCheckSink(storage);
                  this.storage = storage;
                                                      
                  while (xml.Read()) {
                        if (xml.NamespaceURI == NS.RDF && xml.LocalName == "RDF" ) {
                              while (xml.Read()) {
                                    if (xml.NodeType == XmlNodeType.Element)
                                          ParseDescription();
                              }
                              
                        }
                  }

                  xml.Close();
            }
            
            private string CurNode() {
                  return xml.NamespaceURI + xml.LocalName;
            }
            
            private int isset(string attribute) {
                  return attribute != null ? 1 : 0;
            }
            
            private string Unrelativize(string uri) {
                  return GetAbsoluteUri(xml.BaseURI != "" ? xml.BaseURI : BaseUri, uri);
            }
            
            private Entity GetBlankNode(string nodeID) {
                  if (blankNodes.ContainsKey(nodeID))
                        return (Entity)blankNodes[nodeID];
                  
                  Entity entity = new Entity(null);
                  blankNodes[nodeID] = entity;

                  return entity;
            }
            
            private Entity GetNamedNode(string uri) {
                  if (!ReuseEntities)
                        return new Entity(uri);
            
                  Entity ret = (Entity)namedNodes[uri];
                  if (ret != null) return ret;
                  ret = new Entity(uri);
                  namedNodes[uri] = ret;
                  return ret;
            }
            
            private Entity ParseDescription() {
                  // The XmlReader is positioned on an element node
                  // that is a description of an entity.
                  // On returning, the reader is positioned after the
                  // end element of the description node.
                  
                  string nodeID = xml.GetAttribute("nodeID", NS.RDF);
                  string about = xml.GetAttribute("about", NS.RDF);
                  //if (about == null)
                  //    about = xml.GetAttribute("about");
                  string ID = xml.GetAttribute("ID", NS.RDF);
                  if (isset(nodeID) + isset(about) + isset(ID) > 1)
                        OnError("An entity description cannot specify more than one of rdf:nodeID, rdf:about, and rdf:ID");
                        
                  Entity entity;
                  
                  if (about != null)
                        entity = GetNamedNode(Unrelativize(about));
                  else if (ID != null) {
                        entity = GetNamedNode(Unrelativize("#" + ID));
                        
                        if (seenIDs.ContainsKey(entity.Uri))
                              OnError("Two descriptions cannot use the same rdf:ID: <" + entity.Uri + ">");
                        seenIDs[entity.Uri] = seenIDs;
                  } else if (nodeID != null)
                        entity = GetBlankNode(nodeID);
                  else
                        entity = new Entity(null);
                  
                  // If the name of the element is not rdf:Description,
                  // then the name gives its type.
                  if (CurNode() != NS.RDF + "Description") {
                        if (CurNode() == NS.RDF + "li") OnError("rdf:li cannot be the type of a node");
                        storage.Add(new Statement(entity, rdfType, (Entity)CurNode(), Meta));
                  }
                  
                  ParsePropertyAttributes(entity);
                  ParsePropertyNodes(entity);
                  
                  return entity;
            }
            
            private bool ParsePropertyAttributes(Entity entity) {
                  bool foundAttrs = false;
                  
                  if (!xml.MoveToFirstAttribute()) return false;
                  do {
                        // Propery attributes in the default namespace
                        // should be ignored.
                        if (xml.NamespaceURI == "")
                              continue;
                  
                        string curnode = CurNode();
                        
                        // rdf:type is interpreted with an entity object,
                        // not a literal object.
                        if (curnode == NS.RDF + "type") {
                              storage.Add(new Statement(entity, rdfType, (Entity)xml.Value, Meta));
                              foundAttrs = true;
                              continue;
                        }
                        
                        // Properties which are not recognized as property
                        // attributes and should be ignored.
                        if (curnode == NS.RDF + "RDF") continue;
                        if (curnode == NS.RDF + "Description") continue;
                        if (curnode == NS.RDF + "ID") continue;
                        if (curnode == NS.RDF + "about") continue;
                        if (curnode == NS.RDF + "parseType") continue;
                        if (curnode == NS.RDF + "resource") continue;
                        if (curnode == NS.RDF + "nodeID") continue;
                        if (curnode == NS.RDF + "datatype") continue;
                        
                        // Properties which are invalid as attributes.
                        if (curnode == NS.RDF + "li")
                              OnError("rdf:li is not a valid attribute");
                        if (curnode == NS.RDF + "aboutEach" || curnode == NS.RDF + "aboutEachPrefix")
                              OnError("rdf:aboutEach has been removed from the RDF spec");
                        
                        // Unrecognized attributes in the xml namespace should be ignored.
                        if (xml.Prefix == "xml") continue;
                        if (xml.Prefix == "xmlns") continue;
                        if (curnode == "http://www.w3.org/2000/xmlns/xmlns") continue;
                        
                        // This is a literal property attribute.
                        string lang = xml.XmlLang != "" ? xml.XmlLang : null;
                        storage.Add(new Statement(entity, curnode,
                              new Literal(xml.Value, lang, null), Meta));
                        foundAttrs = true;
                              
                  } while (xml.MoveToNextAttribute());
                  
                  xml.MoveToElement();
                  
                  return foundAttrs;
            }
            
            private void ParsePropertyNodes(Entity subject) {
                  // The reader is positioned within a description node.
                  // On returning, the reader is positioned after the
                  // end element of the description node.
                  
                  if (xml.IsEmptyElement) return;
                  
                  int liIndex = 1;
                  
                  while (xml.Read()) {
                        if (xml.NodeType == XmlNodeType.EndElement)
                              break;
                        if (xml.NodeType != XmlNodeType.Element)
                              continue;
                        
                        ParseProperty(subject, ref liIndex);
                  }
            }
            
            private void ParseProperty(Entity subject, ref int liIndex) {
                  // The reader is positioned on a propert node,
                  // and on returning the reader is positioned past
                  // that node.
                  
                  // Get all of the attributes before we move the reader forward.
                  
                  string nodeID = xml.GetAttribute("nodeID", NS.RDF);
                  string resource = xml.GetAttribute("resource", NS.RDF);
                  
                  string parseType = xml.GetAttribute("parseType", NS.RDF);
                  string datatype = xml.GetAttribute("datatype", NS.RDF);
                  
                  string lang = xml.XmlLang != "" ? xml.XmlLang : null;

                  string predicate = CurNode();
                  if (predicate == NS.RDF + "li")
                        predicate = NS.RDF + "_" + (liIndex++);
                        
                  string ID = xml.GetAttribute("ID", NS.RDF);
                  
                  Resource objct = null;
                  if (nodeID != null || resource != null) {
                        if (isset(nodeID) + isset(resource) > 1)
                              OnError("A predicate node cannot specify more than one of rdf:nodeID and rdf:resource");
                              
                        if (parseType != null || datatype != null)
                              OnError("The attributes rdf:parseType and rdf:datatype are not valid on a predicate with a rdf:nodeID or rdf:resource attribute");
                              
                        // Object is an entity given by nodeID or resource.
                        // The 
                        if (nodeID != null)
                              objct = GetBlankNode(nodeID);
                        else if (resource != null)
                              objct = GetNamedNode(Unrelativize(resource));
                              
                        ParsePropertyAttributes((Entity)objct);
                        
                        // No children are allowed in this element.
                        if (!xml.IsEmptyElement)
                        while (xml.Read()) {
                              if (xml.NodeType == XmlNodeType.EndElement) break;
                              if (xml.NodeType == XmlNodeType.Whitespace) continue;
                              if (xml.NodeType == XmlNodeType.Comment) continue;
                              if (xml.NodeType == XmlNodeType.ProcessingInstruction) continue;
                              OnError("Content is not allowed within a property with a rdf:nodeID or rdf:resource attribute");
                        }
                  
                  } else if (parseType != null && parseType == "Literal") {
                        if (datatype == null)
                              datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral";
                        
                        if (ParsePropertyAttributes(new Entity(null)))
                              OnError("Property attributes are not valid when parseType is Literal");
                        
                        objct = new Literal(xml.ReadInnerXml(), null, datatype);
                        
                  } else if (parseType != null && parseType == "Resource") {
                        objct = new Entity(null);
                        
                        ParsePropertyAttributes((Entity)objct);
                        if (!xml.IsEmptyElement)
                              ParsePropertyNodes((Entity)objct);
                        
                  } else if (parseType != null && parseType == "Collection") {
                        Entity collection = new Entity(null);
                        Entity lastnode = collection;
                        bool empty = true;
                        
                        ParsePropertyAttributes(collection);
                        
                        if (!xml.IsEmptyElement)
                        while (xml.Read()) {
                              if (xml.NodeType == XmlNodeType.EndElement) break;
                              if (xml.NodeType != XmlNodeType.Element) continue;
                              
                              if (!empty) {
                                    Entity next = new Entity(null);
                                    storage.Add(new Statement(lastnode, rdfRest, next, Meta));
                                    lastnode = next;
                              }
                              
                              Entity item = ParseDescription();
                              storage.Add(new Statement(lastnode, rdfFirst, item, Meta));
                              
                              empty = false;
                        }

                        storage.Add(new Statement(lastnode, rdfRest, rdfNil, Meta));
                        
                        if (empty)
                              objct = rdfNil;
                        else
                              objct = collection;
                        
                  } else if (datatype != null) {
                        // Forces even xml content to be read as in parseType=Literal?
                        // Note that any xml:lang is discarded.
                        
                        if (ParsePropertyAttributes(new Entity(null)))
                              OnError("Property attributes are not valid when a data type is given");
                              
                        objct = new Literal(xml.ReadInnerXml(), null, datatype);
                  
                  } else {
                        // We don't know whether the contents of this element
                        // refer to a literal or an entity.  If an element is
                        // a child of this node, then it must be an entity.
                        // If the property has predicate attributes, then it
                        // is an anonymous entity.  Otherwise the text content
                        // is the literal value.
                        
                        objct = new Entity(null);
                        if (ParsePropertyAttributes((Entity)objct)) {
                              // Found property attributes.  There should be no other internal content?
                              
                              if (!xml.IsEmptyElement)
                              while (xml.Read()) {
                                    if (xml.NodeType == XmlNodeType.EndElement) break;
                                    if (xml.NodeType == XmlNodeType.Whitespace) continue;
                                    if (xml.NodeType == XmlNodeType.Comment) continue;
                                    if (xml.NodeType == XmlNodeType.ProcessingInstruction) continue;
                                    OnError(xml.NodeType + " is not allowed within a property with property attributes");
                              }
                              
                        } else {
                              StringBuilder textcontent = new StringBuilder();
                              bool hadText = false;
                              bool hadElement = false;
                              
                              if (!xml.IsEmptyElement)
                              while (xml.Read()) {
                                    if (xml.NodeType == XmlNodeType.EndElement) break;
                                    if (xml.NodeType == XmlNodeType.Element) {
                                          if (hadText)
                                                OnError("Both text and elements are present as a property value");
                                          hadElement = true;
                                          
                                          objct = ParseDescription();
                                    } else if (xml.NodeType == XmlNodeType.Text || xml.NodeType == XmlNodeType.SignificantWhitespace) {
                                          if (hadElement)
                                                OnError("Both text and elements are present as a property value");
                                          textcontent.Append(xml.Value);
                                          hadText = true;
                                    } else {
                                          textcontent.Append(xml.Value);
                                    }
                              }
                              
                              if (!hadElement)
                                    objct = new Literal(textcontent.ToString(), lang, null);
                        }
                  }
                        
                  storage.Add(new Statement(subject, predicate, objct, Meta));
                  
                  if (ID != null) {
                        // In addition to adding the statement as normal, also
                        // add a reified statement.
                        Entity statement = GetNamedNode(Unrelativize("#" + ID));;
                        storage.Add(new Statement(statement, rdfType, rdfStatement, Meta));
                        storage.Add(new Statement(statement, rdfSubject, subject, Meta));
                        storage.Add(new Statement(statement, rdfPredicate, (Entity)predicate, Meta));
                        storage.Add(new Statement(statement, rdfObject, objct, Meta));
                  }
            }
            
            private void OnError(string message) {
                  if (xml is IXmlLineInfo && ((IXmlLineInfo)xml).HasLineInfo()) {
                        IXmlLineInfo line = (IXmlLineInfo)xml;
                        message += ", line " + line.LineNumber + " col " + line.LinePosition;
                  }
                  throw new ParserException(message);
            }
      }
}


Generated by  Doxygen 1.6.0   Back to index