Introducing PyXML

python setup.py install

from xml.dom.ext.reader import Sax2

DOC = """<?xml version="1.0" encoding="UTF-8"?>

<verse>

  <attribution>Christopher Okibgo</attribution>

  <line>For he was a shrub among the poplars,</line>

  <line>Needing more roots</line>

  <line>More sap to grow to sunlight,</line>

  <line>Thirsting for sunlight</line>

</verse>

"""

#Create an XML reader object

reader = Sax2.Reader()

#Create a 4DOM document node parsed from XML in a string

doc_node = reader.fromString(DOC)

#You can execute regular DOM operations on the document node

verse_element = doc_node.documentElement

#And you can even use "Pythonic" shortcuts for things like

#Node lists and named node maps

#The first child of the verse element is a white space text node

#The second is the attribution element

attribution_element = verse_element.childNodes[1]

#attribution_string becomes "Christopher Okibgo"

attribution_string = attribution_element.firstChild.data

from xml.dom.ext.reader import HtmlLib

#Create an HTML reader object

reader = HtmlLib.Reader()

#Create a 4DOM document node parsed from HTML at a URL

doc_node = reader.fromUri("http://www.python.org")

#Get the title of the HTML document

title_elem = doc_node.documentElement.getElementsByTagName("TITLE")[0]

#title_string becomes "Python Language Website"

title_string = title_elem.firstChild.data

from xml.dom.ext import StripXml, Print

#Strip the white space nodes in place and return the same node

StripXml(doc_node)

#Print the node as serialized XML to stdout

Print(doc_node)

#Write the node as serialized XML to a file

f = open("tmp.xml", "w")

Print(doc_node, stream=f)

f.close()

from xml.dom import implementation

from xml.dom import EMPTY_NAMESPACE, XML_NAMESPACE

from xml.dom.ext import Print

#Create a document type node using the doctype name "message"

#A blank system ID and blank public ID (i.e. no DTD information)

doctype = implementation.createDocumentType("message", None, None)

#Create a document node, which also creates a document element node

#For the element, use a blank namespace URI and local name "message"

doc = implementation.createDocument(EMPTY_NAMESPACE, "message", doctype)

#Get the document element

msg_elem = doc.documentElement

#Create an xml:lang attribute on the new element

msg_elem.setAttributeNS(XML_NAMESPACE, "xml:lang", "en")

#Create a text node with some data in it

new_text = doc.createTextNode("You need Python")

#Add the new text node to the document element

msg_elem.appendChild(new_text)

#Print out the result

Print(doc)

from xml.ns import XSLT

#The XSLT namespace http://www.w3.org/1999/XSL/Transform

NS = XSLT.BASE

from xml.xpath import Context, Evaluate

#Create an XPath context with the given DOM node

#With no other nodes in the context list

#(list size 1, current position 1)

#And the given prefix/namespace mapping

con = Context.Context(doc, 1, 1, processorNss={"xsl": NS})

#Evaluate the XPath expression and return the resulting

#Python list of nodes

result = Evaluate("//xsl:*", context=con)

from xml.marshal.generic import Marshaller

marshal = Marshaller()

obj = {1: [2, 3], 'a': 'b'}

#Dump to a string

xml_form = marshal.dumps(obj)

<?xml version="1.0"?>

<marshal>

  <dictionary id="i2">

    <int>1</int>

    <list id="i3"><int>2</int><int>3</int></list>

    <string>a</string>

    <string>b</string>

  </dictionary>

</marshal>

Introducing PyXML

Happenings...

Setting up PyXML

SAX

DOM

Canonicalization

XPath

Et cetera

Wrap up