priomsrb's answer is great and works. For my usecase i need to integrate it to an existing framework where e.g. the encoding is also covered. Therefore the following refactoring was applied to have a separate LineNumberHandler class.
Then the code will also work with a Sax InputSource where the encoding can be modified like this:
// read in the xml document
org.xml.sax.InputSource is=new org.xml.sax.InputSource();
is.setByteStream(instream);
if (encoding!=null) {
is.setEncoding(encoding);
if (Debug.CORE)
Debug.log("setting XML encoding to - "+is.getEncoding());
}
Separate LineNumberHandler
/**
* LineNumber Handler
* @author wf
*
*/
public static class LineNumberHandler extends DefaultHandler {
final Stack<Element> elementStack = new Stack<Element>();
final StringBuilder textBuffer = new StringBuilder();
private Locator locator;
private Document doc;
/**
* create a line number Handler for the given document
* @param doc
*/
public LineNumberHandler(Document doc) {
this.doc=doc;
}
@Override
public void setDocumentLocator(final Locator locator) {
this.locator = locator; // Save the locator, so that it can be used
// later for line tracking when traversing
// nodes.
}
@Override
public void startElement(final String uri, final String localName,
final String qName, final Attributes attributes) throws SAXException {
addTextIfNeeded();
final Element el = doc.createElement(qName);
for (int i = 0; i < attributes.getLength(); i++) {
el.setAttribute(attributes.getQName(i), attributes.getValue(i));
}
el.setUserData(LINE_NUMBER_KEY_NAME,
String.valueOf(this.locator.getLineNumber()), null);
elementStack.push(el);
}
@Override
public void endElement(final String uri, final String localName,
final String qName) {
addTextIfNeeded();
final Element closedEl = elementStack.pop();
if (elementStack.isEmpty()) { // Is this the root element?
doc.appendChild(closedEl);
} else {
final Element parentEl = elementStack.peek();
parentEl.appendChild(closedEl);
}
}
@Override
public void characters(final char ch[], final int start, final int length)
throws SAXException {
textBuffer.append(ch, start, length);
}
// Outputs text accumulated under the current node
private void addTextIfNeeded() {
if (textBuffer.length() > 0) {
final Element el = elementStack.peek();
final Node textNode = doc.createTextNode(textBuffer.toString());
el.appendChild(textNode);
textBuffer.delete(0, textBuffer.length());
}
}
};
PositionalXMLReader
public class PositionalXMLReader {
final static String LINE_NUMBER_KEY_NAME = "lineNumber";
/**
* read a document from the given input strem
*
* @param is
* - the input stream
* @return - the Document
* @throws IOException
* @throws SAXException
*/
public static Document readXML(final InputStream is)
throws IOException, SAXException {
final Document doc;
SAXParser parser;
try {
final SAXParserFactory factory = SAXParserFactory.newInstance();
parser = factory.newSAXParser();
final DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory
.newInstance();
final DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
doc = docBuilder.newDocument();
} catch (final ParserConfigurationException e) {
throw new RuntimeException("Can't create SAX parser / DOM builder.", e);
}
LineNumberHandler handler = new LineNumberHandler(doc);
parser.parse(is, handler);
return doc;
}
}
JUnit Testcase
package com.bitplan.common.impl;
import static org.junit.Assert.assertEquals;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import com.bitplan.bobase.PositionalXMLReader;
public class TestXMLWithLineNumbers {
/**
* get an Example XML Stream
* @return the example stream
*/
public InputStream getExampleXMLStream() {
String xmlString = "<foo>\n" + " <bar>\n"
+ " <moo>Hello World!</moo>\n" + " </bar>\n" + "</foo>";
InputStream is = new ByteArrayInputStream(xmlString.getBytes());
return is;
}
@Test
public void testXMLWithLineNumbers() throws Exception {
InputStream is = this.getExampleXMLStream();
Document doc = PositionalXMLReader.readXML(is);
is.close();
Node node = doc.getElementsByTagName("moo").item(0);
assertEquals("3", node.getUserData("lineNumber"));
}
}
setProperty
as instructed by the javadoc. – Priapus