You can use StAX
. The StAX
parser streams like SAX
but maintains a cursor and allows you to extract content at the cursor by using hasNext()
and next()
.
The following code is adapted from this java example. Note this is my first attempt ever with jython, so don't hang me if I did something unconventionally, but the example works.
http://www.javacodegeeks.com/2013/05/parsing-xml-using-dom-sax-and-stax-parser-in-java.html
from javax.xml.stream import XMLStreamConstants, XMLInputFactory, XMLStreamReader
from java.io import ByteArrayInputStream;
from java.lang import String
xml = String(
"""<?xml version="1.0" encoding="ISO-8859-1"?>
<employees>
<employee id="111">
<firstName>Rakesh</firstName>
<lastName>Mishra</lastName>
<location>Bangalore</location>
</employee>
<employee id="112">
<firstName>John</firstName>
<lastName>Davis</lastName>
<location>Chennai</location>
</employee>
<employee id="113">
<firstName>Rajesh</firstName>
<lastName>Sharma</lastName>
<location>Pune</location>
</employee>
</employees>
""")
class Employee:
id = None
firstName = None
lastName = None
location = None
def __str__(self):
return self.firstName + " " + self.lastName + "(" + self.id + ") " + self.location
factory = XMLInputFactory.newInstance();
reader = factory.createXMLStreamReader(ByteArrayInputStream(xml.getBytes()))
employees = []
employee = None
tagContent = None
while reader.hasNext():
event = reader.next();
if event == XMLStreamConstants.START_ELEMENT:
if "employee" == reader.getLocalName():
employee = Employee()
employee.id = reader.getAttributeValue(0)
elif event == XMLStreamConstants.CHARACTERS:
tagContent = reader.getText()
elif event == XMLStreamConstants.END_ELEMENT:
if "employee" == reader.getLocalName():
employees.append(employee)
elif "firstName" == reader.getLocalName():
employee.firstName = tagContent
elif "lastName" == reader.getLocalName():
employee.lastName = tagContent
elif "location" == reader.getLocalName():
employee.location = tagContent
for employee in employees:
print employee
parser.feed("<doc>"); do_something_other(); parser.feed("</doc>")
– Essentiality