I had the similar problem and found the solution.
I used the solution proposed by @t0r0X but it does not work well in the current implementation in Java 11, the method xmlEvent.writeAsEncodedUnicode
creates the invalid string representation of the start element (in the StartElementEvent
class) in the result XML fragment, so I had to modify it, but then it seems to work well, what I could immediatelly verify by the parsing of the fragment by DOM and JaxBMarshaller to specific data containers.
In my case I had the huge structure
<Orders>
<ns2:SyncOrder xmlns:ns2="..." xmlns:ns3="....." ....>
.....
</ns2:SyncOrder>
<ns2:SyncOrder xmlns:ns2="..." xmlns:ns3="....." ....>
.....
</ns2:SyncOrder>
...
</Orders>
in the file of multiple hundred megabytes (a lot of repeating "SyncOrder" structures), so the usage of DOM would lead to a large memory consumption and slow evaluation. Therefore I used the StAX to split the huge XML to smaller XML pieces, which I have analyzed with DOM and used the JaxbElements generated from the xsd definition of the element SyncOrder
(This infrastructure I had from the webservice, which uses the same structure, but it is not important).
In this code there can be seen Where the XML fragment has een created and could be used, I used it directly in other processing...
private static <T> List<T> unmarshallMultipleSyncOrderXmlData(
InputStream aOrdersXmlContainingSyncOrderItems,
Function<SyncOrderType, T> aConversionFunction) throws XMLStreamException, ParserConfigurationException, IOException, SAXException {
DocumentBuilderFactory locDocumentBuilderFactory = DocumentBuilderFactory.newInstance();
locDocumentBuilderFactory.setNamespaceAware(true);
DocumentBuilder locDocBuilder = locDocumentBuilderFactory.newDocumentBuilder();
List<T> locResult = new ArrayList<>();
XMLInputFactory locFactory = XMLInputFactory.newFactory();
XMLEventReader locReader = locFactory.createXMLEventReader(aOrdersXmlContainingSyncOrderItems);
boolean locIsInSyncOrder = false;
QName locSyncOrderElementQName = null;
StringWriter locXmlTextBuffer = new StringWriter();
int locDepth = 0;
while (locReader.hasNext()) {
XMLEvent locEvent = locReader.nextEvent();
if (locEvent.isStartElement()) {
if (locDepth == 0 && Objects.equals(locEvent.asStartElement().getName().getLocalPart(), "Orders")) {
locDepth++;
} else {
if (locDepth <= 0)
throw new IllegalStateException("There has been passed invalid XML stream intot he function. "
+ "Expecting the element 'Orders' as the root alament of the document, but found was '"
+ locEvent.asStartElement().getName().getLocalPart() + "'.");
locDepth++;
if (locSyncOrderElementQName == null) {
/* First element after the "Orders" has passed, so we retrieve
* the name of the element with the namespace prefix: */
locSyncOrderElementQName = locEvent.asStartElement().getName();
}
if(Objects.equals(locEvent.asStartElement().getName(), locSyncOrderElementQName)) {
locIsInSyncOrder = true;
}
}
} else if (locEvent.isEndElement()) {
locDepth--;
if(locDepth == 1 && Objects.equals(locEvent.asEndElement().getName(), locSyncOrderElementQName)) {
locEvent.writeAsEncodedUnicode(locXmlTextBuffer);
/* at this moment the call of locXmlTextBuffer.toString() gets the complete fragment
* of XML containing the valid SyncOrder element, but I have continued to other processing,
* which immediatelly validates the produced XML fragment is valid and passes the values
* to communication object: */
Document locDocument = locDocBuilder.parse(new ByteArrayInputStream(locXmlTextBuffer.toString().getBytes()));
SyncOrderType locItem = unmarshallSyncOrderDomNodeToCo(locDocument);
locResult.add(aConversionFunction.apply(locItem));
locXmlTextBuffer = new StringWriter();
locIsInSyncOrder = false;
}
}
if (locIsInSyncOrder) {
if (locEvent.isStartElement()) {
/* here replaced the standard implementation of startElement's method writeAsEncodedUnicode: */
locXmlTextBuffer.write(startElementToStrng(locEvent.asStartElement()));
} else {
locEvent.writeAsEncodedUnicode(locXmlTextBuffer);
}
}
}
return locResult;
}
private static String startElementToString(StartElement aStartElement) {
StringBuilder locStartElementBuffer = new StringBuilder();
// open element
locStartElementBuffer.append("<");
String locNameAsString = null;
if ("".equals(aStartElement.getName().getNamespaceURI())) {
locNameAsString = aStartElement.getName().getLocalPart();
} else if (aStartElement.getName().getPrefix() != null
&& !"".equals(aStartElement.getName().getPrefix())) {
locNameAsString = aStartElement.getName().getPrefix()
+ ":" + aStartElement.getName().getLocalPart();
} else {
locNameAsString = aStartElement.getName().getLocalPart();
}
locStartElementBuffer.append(locNameAsString);
// add any attributes
Iterator<Attribute> locAttributeIterator = aStartElement.getAttributes();
Attribute attr;
while (locAttributeIterator.hasNext()) {
attr = locAttributeIterator.next();
locStartElementBuffer.append(" ");
locStartElementBuffer.append(attributeToString(attr));
}
// add any namespaces
Iterator<Namespace> locNamespaceIterator = aStartElement.getNamespaces();
Namespace locNamespace;
while (locNamespaceIterator.hasNext()) {
locNamespace = locNamespaceIterator.next();
locStartElementBuffer.append(" ");
locStartElementBuffer.append(attributeToString(locNamespace));
}
// close start tag
locStartElementBuffer.append(">");
// return StartElement as a String
return locStartElementBuffer.toString();
}
private static String attributeToString(Attribute aAttr) {
if( aAttr.getName().getPrefix() != null && aAttr.getName().getPrefix().length() > 0 )
return aAttr.getName().getPrefix() + ":" + aAttr.getName().getLocalPart() + "='" + aAttr.getValue() + "'";
else
return aAttr.getName().getLocalPart() + "='" + aAttr.getValue() + "'";
}
public static SyncOrderType unmarshallSyncOrderDomNodeToCo(
Node aSyncOrderItemNode) {
Source locSource = new DOMSource(aSyncOrderItemNode);
Object locUnmarshalledObject = getMarshallerAndUnmarshaller().unmarshal(locSource);
SyncOrderType locCo = ((JAXBElement<SyncOrderType>) locUnmarshalledObject).getValue();
return locCo;
}