Here's a sample Bookmark
class that I created to demonstrate how you can deal with bookmarks. It finds pairs of w:bookmarkStart
and w:bookmarkEnd
elements and shows how you can get hold of the w:r
elements between those two markers. Based on that, you can process the text, e.g., as shown in the GetValue()
method.
using System.Collections.Generic;
using System.Linq;
using System.Xml.Linq;
using OpenXmlPowerTools;
namespace CodeSnippets.OpenXml.Wordprocessing
{
/// <summary>
/// Represents a corresponding pair of w:bookmarkStart and w:bookmarkEnd elements.
/// </summary>
public class Bookmark
{
private Bookmark(XElement root, string bookmarkName)
{
Root = root;
BookmarkStart = new XElement(W.bookmarkStart,
new XAttribute(W.id, -1),
new XAttribute(W.name, bookmarkName));
BookmarkEnd = new XElement(W.bookmarkEnd,
new XAttribute(W.id, -1));
}
private Bookmark(XElement root, XElement bookmarkStart, XElement bookmarkEnd)
{
Root = root;
BookmarkStart = bookmarkStart;
BookmarkEnd = bookmarkEnd;
}
/// <summary>
/// The root element containing both <see cref="BookmarkStart"/> and
/// <see cref="BookmarkEnd"/>.
/// </summary>
public XElement Root { get; }
/// <summary>
/// The w:bookmarkStart element.
/// </summary>
public XElement BookmarkStart { get; }
/// <summary>
/// The w:bookmarkEnd element.
/// </summary>
public XElement BookmarkEnd { get; }
/// <summary>
/// Finds a pair of w:bookmarkStart and w:bookmarkEnd elements in the given
/// <paramref name="root"/> element, where the w:name attribute value of the
/// w:bookmarkStart element is equal to <paramref name="bookmarkName"/>.
/// </summary>
/// <param name="root">The root <see cref="XElement"/>.</param>
/// <param name="bookmarkName">The bookmark name.</param>
/// <returns>A new <see cref="Bookmark"/> instance representing the bookmark.</returns>
public static Bookmark Find(XElement root, string bookmarkName)
{
XElement bookmarkStart = root
.Descendants(W.bookmarkStart)
.FirstOrDefault(e => (string) e.Attribute(W.name) == bookmarkName);
string id = bookmarkStart?.Attribute(W.id)?.Value;
if (id == null) return new Bookmark(root, bookmarkName);
XElement bookmarkEnd = root
.Descendants(W.bookmarkEnd)
.FirstOrDefault(e => (string) e.Attribute(W.id) == id);
return bookmarkEnd != null
? new Bookmark(root, bookmarkStart, bookmarkEnd)
: new Bookmark(root, bookmarkName);
}
/// <summary>
/// Gets all w:r elements between the bookmark's w:bookmarkStart and
/// w:bookmarkEnd elements.
/// </summary>
/// <returns>A collection of w:r elements.</returns>
public IEnumerable<XElement> GetRuns()
{
return Root
.Descendants()
.SkipWhile(d => d != BookmarkStart)
.Skip(1)
.TakeWhile(d => d != BookmarkEnd)
.Where(d => d.Name == W.r);
}
/// <summary>
/// Gets the concatenated inner text of all runs between the bookmark's
/// w:bookmarkStart and w:bookmarkEnd elements, ignoring paragraph marks
/// and page breaks.
/// </summary>
/// <remarks>
/// The output of this method can be compared to the output of the
/// <see cref="XElement.Value"/> property.
/// </remarks>
/// <returns>The concatenated inner text.</returns>
public string GetValue()
{
return GetRuns().Select(UnicodeMapper.RunToString).StringConcatenate();
}
}
}
The above class processes documents like the following (very simple test document):
<?xml version="1.0" encoding="utf-8"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body>
<w:p>
<w:r>
<w:t>First</w:t>
</w:r>
</w:p>
<w:bookmarkStart w:id="1" w:name="_Bm001" />
<w:p>
<w:r>
<w:t>Second</w:t>
</w:r>
</w:p>
<w:p>
<w:r>
<w:t>Third</w:t>
</w:r>
</w:p>
<w:bookmarkEnd w:id="1" />
<w:p>
<w:r>
<w:t>Fourth</w:t>
</w:r>
</w:p>
</w:body>
</w:document>
The above document is created by the following unit tests, which demonstrate how you could use the Bookmark
class:
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Xml.Linq;
using CodeSnippets.OpenXml.Wordprocessing;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using OpenXmlPowerTools;
using Xunit;
namespace CodeSnippets.Tests.OpenXml.Wordprocessing
{
public class BookmarkTests
{
/// <summary>
/// The w:name value of our bookmark.
/// </summary>
private const string BookmarkName = "_Bm001";
/// <summary>
/// The w:id value of our bookmark.
/// </summary>
private const int BookmarkId = 1;
/// <summary>
/// The test w:document with our bookmark, which encloses the two runs
/// with inner texts "Second" and "Third".
/// </summary>
private static readonly XElement Document =
new XElement(W.document,
new XAttribute(XNamespace.Xmlns + "w", W.w.NamespaceName),
new XElement(W.body,
new XElement(W.p,
new XElement(W.r,
new XElement(W.t, "First"))),
new XElement(W.bookmarkStart,
new XAttribute(W.id, BookmarkId),
new XAttribute(W.name, BookmarkName)),
new XElement(W.p,
new XElement(W.r,
new XElement(W.t, "Second"))),
new XElement(W.p,
new XElement(W.r,
new XElement(W.t, "Third"))),
new XElement(W.bookmarkEnd,
new XAttribute(W.id, BookmarkId)),
new XElement(W.p,
new XElement(W.r,
new XElement(W.t, "Fourth")))
)
);
/// <summary>
/// Creates a <see cref="WordprocessingDocument"/> for on a <see cref="MemoryStream"/>
/// testing purposes, using the given <paramref name="document"/> as the w:document
/// root element of the main document part.
/// </summary>
/// <param name="document">The w:document root element.</param>
/// <returns>The <see cref="MemoryStream"/> containing the <see cref="WordprocessingDocument"/>.</returns>
private static MemoryStream CreateWordprocessingDocument(XElement document)
{
var stream = new MemoryStream();
const WordprocessingDocumentType type = WordprocessingDocumentType.Document;
using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, type))
{
MainDocumentPart part = wordDocument.AddMainDocumentPart();
part.PutXDocument(new XDocument(document));
}
return stream;
}
[Fact]
public void GetRuns_WordprocessingDocumentWithBookmarks_CorrectRunsReturned()
{
// Arrange.
// Create a new Word document on a Stream, using the test w:document
// as the main document part.
Stream stream = CreateWordprocessingDocument(Document);
// Open the WordprocessingDocument on the Stream, using the Open XML SDK.
using WordprocessingDocument wordDocument = WordprocessingDocument.Open(stream, true);
// Get the w:document element from the main document part and find
// our bookmark.
XElement document = wordDocument.MainDocumentPart.GetXElement();
Bookmark bookmark = Bookmark.Find(document, BookmarkName);
// Act, getting the bookmarked runs.
IEnumerable<XElement> runs = bookmark.GetRuns();
// Assert.
Assert.Equal(new[] {"Second", "Third"}, runs.Select(run => run.Value));
}
[Fact]
public void GetText_WordprocessingDocumentWithBookmarks_CorrectRunsReturned()
{
// Arrange.
// Create a new Word document on a Stream, using the test w:document
// as the main document part.
Stream stream = CreateWordprocessingDocument(Document);
// Open the WordprocessingDocument on the Stream, using the Open XML SDK.
using WordprocessingDocument wordDocument = WordprocessingDocument.Open(stream, true);
// Get the w:document element from the main document part and find
// our bookmark.
XElement document = wordDocument.MainDocumentPart.GetXElement();
Bookmark bookmark = Bookmark.Find(document, BookmarkName);
// Act, getting the concatenated text contents of the bookmarked runs.
string text = bookmark.GetValue();
// Assert.
Assert.Equal("SecondThird", text);
}
}
}
You can find the full code example in my CodeSnippets GitHub repository. Look for the Bookmark and BookmarkTests classes and note that I'm using the Open-Xml-PowerTools.
You can obviously do more complicated things with those Open XML elements. This is just a simple example.