So, I have some observations about the Qi version.
Also created an X3 version.
Finally, wrote a manual expansion function that beats all the other candidates (I expect it to be faster than MFC, because it doesn't bother with repeated deletes/inserts).
Skip to the benchmark charts if you want.
About the Qi version
- yes, symbol tables suffer from locality issues of node-based containers. They might not be the best match you can use here.
- There's no need to rebuild the symbols each loop:
Instead of character-wise skipping of non-symbols, scan until the next:
+(bsq::char_ - symbols)
inline std::string spirit_qi(const std::string& input, bsq::symbols<char, std::string> const& symbols)
{
std::string retVal;
retVal.reserve(input.size() * 2);
auto beg = input.cbegin();
auto end = input.cend();
if(!bsq::parse(beg, end, *(symbols | +(bsq::char_ - symbols)), retVal))
retVal = input;
return retVal;
}
This is considerably faster already. But:
Manual Loops
In this trivial example, why don't you just do the parsing manually?
inline std::string manual_expand(const std::string& input, TokenMap const& tokens)
{
std::ostringstream builder;
auto expand = [&](auto const& key) {
auto match = tokens.find(key);
if (match == tokens.end())
builder << "$(" << key << ")";
else
builder << match->second;
};
builder.str().reserve(input.size()*2);
builder.str("");
std::ostreambuf_iterator<char> out(builder);
for(auto f(input.begin()), l(input.end()); f != l;) {
switch(*f) {
case '$' : {
if (++f==l || *f!='(') {
*out++ = '$';
break;
}
else {
auto s = ++f;
size_t n = 0;
while (f!=l && *f != ')')
++f, ++n;
// key is [s,f] now
expand(std::string(&*s, &*s+n));
if (f!=l)
++f; // skip '}'
}
}
default:
*out++ = *f++;
}
}
return builder.str();
}
This is vastly superior in performance on my machine.
Other ideas
You could look at Boost Spirit Lex, potentially with statically generated token tables: http://www.boost.org/doc/libs/1_60_0/libs/spirit/doc/html/spirit/lex/abstracts/lexer_static_model.html. I'm not particularly fond of Lex though.
COMPARISONS:
See the Interactive Chart
That's using Nonius for the benchmarking statistics.
Full Benchmark code: http://paste.ubuntu.com/14133072/
#include <boost/container/flat_map.hpp>
#define USE_X3
#ifdef USE_X3
# include <boost/spirit/home/x3.hpp>
#else
# include <boost/spirit/include/qi.hpp>
#endif
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/searching/boyer_moore.hpp>
#include <boost/algorithm/searching/boyer_moore_horspool.hpp>
#include <boost/algorithm/searching/knuth_morris_pratt.hpp>
#include <string>
#include <unordered_map>
#include <iostream>
#include <fstream>
#include <nonius/benchmark.h++>
#include <nonius/main.h++>
using TokenMap = boost::container::flat_map<std::string, std::string>;
#ifdef USE_X3
namespace x3 = boost::spirit::x3;
struct append {
std::string& out;
void do_append(char const ch) const { out += ch; }
void do_append(std::string const& s) const { out += s; }
template<typename It>
void do_append(boost::iterator_range<It> const& r) const { out.append(r.begin(), r.end()); }
template<typename Ctx>
void operator()(Ctx& ctx) const { do_append(_attr(ctx)); }
};
inline std::string spirit_x3(const std::string& input, x3::symbols<char const*> const& symbols)
{
std::string retVal;
retVal.reserve(input.size() * 2);
append appender { retVal };
auto beg = input.cbegin();
auto end = input.cend();
auto rule = *(symbols[appender] | x3::char_ [appender]);
if(!x3::parse(beg, end, rule))
retVal = input;
return retVal;
}
#else
namespace bsq = boost::spirit::qi;
inline std::string spirit_qi_old(const std::string& input, TokenMap const& tokens)
{
std::string retVal;
retVal.reserve(input.size() * 2);
bsq::symbols<char const, char const*> symbols;
for(const auto& token : tokens) {
symbols.add(token.first.c_str(), token.second.c_str());
}
auto beg = input.cbegin();
auto end = input.cend();
if(!bsq::parse(beg, end, *(symbols | bsq::char_), retVal))
retVal = input;
return retVal;
}
inline std::string spirit_qi(const std::string& input, bsq::symbols<char, std::string> const& symbols)
{
std::string retVal;
retVal.reserve(input.size() * 2);
auto beg = input.cbegin();
auto end = input.cend();
if(!bsq::parse(beg, end, *(symbols | +(bsq::char_ - symbols)), retVal))
retVal = input;
return retVal;
}
#endif
inline std::string manual_expand(const std::string& input, TokenMap const& tokens) {
std::ostringstream builder;
auto expand = [&](auto const& key) {
auto match = tokens.find(key);
if (match == tokens.end())
builder << "$(" << key << ")";
else
builder << match->second;
};
builder.str().reserve(input.size()*2);
std::ostreambuf_iterator<char> out(builder);
for(auto f(input.begin()), l(input.end()); f != l;) {
switch(*f) {
case '$' : {
if (++f==l || *f!='(') {
*out++ = '$';
break;
}
else {
auto s = ++f;
size_t n = 0;
while (f!=l && *f != ')')
++f, ++n;
// key is [s,f] now
expand(std::string(&*s, &*s+n));
if (f!=l)
++f; // skip '}'
}
}
default:
*out++ = *f++;
}
}
return builder.str();
}
inline std::string boost_replace_all(const std::string& input, TokenMap const& tokens)
{
std::string retVal(input);
retVal.reserve(input.size() * 2);
for(const auto& token : tokens)
{
boost::replace_all(retVal, token.first, token.second);
}
return retVal;
}
inline void naive_stl(std::string& input, TokenMap const& tokens)
{
input.reserve(input.size() * 2);
for(const auto& token : tokens)
{
auto next = std::search(input.cbegin(), input.cend(), token.first.begin(), token.first.end());
while(next != input.cend())
{
input.replace(next, next + token.first.size(), token.second);
next = std::search(input.cbegin(), input.cend(), token.first.begin(), token.first.end());
}
}
}
inline void boyer_more(std::string& input, TokenMap const& tokens)
{
input.reserve(input.size() * 2);
for(const auto& token : tokens)
{
auto next =
boost::algorithm::boyer_moore_search(input.cbegin(), input.cend(), token.first.begin(), token.first.end());
while(next != input.cend())
{
input.replace(next, next + token.first.size(), token.second);
next = boost::algorithm::boyer_moore_search(input.cbegin(), input.cend(), token.first.begin(),
token.first.end());
}
}
}
inline void bmh_search(std::string& input, TokenMap const& tokens)
{
input.reserve(input.size() * 2);
for(const auto& token : tokens)
{
auto next = boost::algorithm::boyer_moore_horspool_search(input.cbegin(), input.cend(), token.first.begin(),
token.first.end());
while(next != input.cend())
{
input.replace(next, next + token.first.size(), token.second);
next = boost::algorithm::boyer_moore_search(input.cbegin(), input.cend(), token.first.begin(),
token.first.end());
}
}
}
inline void kmp_search(std::string& input, TokenMap const& tokens)
{
input.reserve(input.size() * 2);
for(const auto& token : tokens)
{
auto next = boost::algorithm::knuth_morris_pratt_search(input.cbegin(), input.cend(), token.first.begin(),
token.first.end());
while(next != input.cend())
{
input.replace(next, next + token.first.size(), token.second);
next = boost::algorithm::boyer_moore_search(input.cbegin(), input.cend(), token.first.begin(),
token.first.end());
}
}
}
namespace testdata {
std::string const expected =
"Five and Seven said nothing, but looked at Two. Two began in a low voice, 'Why the fact is, you see, Miss, "
"this here ought to have been a red rose-tree, and we put a white one in by mistake; and if the Queen was to "
"find it out, we should all have our heads cut off, you know. So you see, Miss, we're doing our best, afore "
"she comes, to—' At this moment Five, who had been anxiously looking across the garden, called out 'The Queen! "
"The Queen!' and the three gardeners instantly threw themselves flat upon their faces. There was a sound of "
"many footsteps, and Alice looked round, eager to see the Queen.First came ten soldiers carrying clubs; these "
"were all shaped like the three gardeners, oblong and flat, with their hands and feet at the corners: next the "
"ten courtiers; these were ornamented all over with diamonds, and walked two and two, as the soldiers did. "
"After these came the royal children; there were ten of them, and the little dears came jumping merrily along "
"hand in hand, in couples: they were all ornamented with hearts. Next came the guests, mostly Kings and "
"Queens, and among them Alice recognised the White Rabbit: it was talking in a hurried nervous manner, smiling "
"at everything that was said, and went by without noticing her. Then followed the Knave of Hearts, carrying "
"the King's crown on a crimson velvet cushion; and, last of all this grand procession, came THE KING AND QUEEN "
"OF HEARTS.Alice was rather doubtful whether she ought not to lie down on her face like the three gardeners, "
"but she could not remember ever having heard of such a rule at processions; 'and besides, what would be the "
"use of a procession,' thought she, 'if people had all to lie down upon their faces, so that they couldn't see "
"it?' So she stood still where she was, and waited.When the procession came opposite to Alice, they all "
"stopped and looked at her, and the Queen said severely 'Who is this?' She said it to the Knave of Hearts, who "
"only bowed and smiled in reply.'Idiot!' said the Queen, tossing her head impatiently; and, turning to Alice, "
"she went on, 'What's your name, child?''My name is Alice, so please your Majesty,' said Alice very politely; "
"but she added, to herself, 'Why, they're only a pack of cards, after all. I needn't be afraid of them!''And "
"who are these?' said the Queen, pointing to the three gardeners who were lying round the rosetree; for, you "
"see, as they were lying on their faces, and the pattern on their backs was the same as the rest of the pack, "
"she could not tell whether they were gardeners, or soldiers, or courtiers, or three of her own children.'How "
"should I know?' said Alice, surprised at her own courage. 'It's no business of mine.'The Queen turned crimson "
"with fury, and, after glaring at her for a moment like a wild beast, screamed 'Off with her head! "
"Off—''Nonsense!' said Alice, very loudly and decidedly, and the Queen was silent.The King laid his hand upon "
"her arm, and timidly said 'Consider, my dear: she is only a child!'The Queen turned angrily away from him, "
"and said to the Knave 'Turn them over!'The Knave did so, very carefully, with one foot.'Get up!' said the "
"Queen, in a shrill, loud voice, and the three gardeners instantly jumped up, and began bowing to the King, "
"the Queen, the royal children, and everybody else.'Leave off that!' screamed the Queen. 'You make me giddy.' "
"And then, turning to the rose-tree, she went on, 'What have you been doing here?'";
std::string const inputWithtokens =
"Five and Seven said nothing, but looked at $(Two). $(Two) began in a low voice, 'Why the fact is, you see, "
"Miss, "
"this here ought to have been a red rose-tree, and we put a white one in by mistake; and if the Queen was to "
"find it out, we should all have our $(heads) cut off, you know. So you see, Miss, we're doing our best, afore "
"she comes, to—' At this moment Five, who had been anxiously looking across the garden, called out 'The Queen! "
"The Queen!' and the three gardeners instantly threw themselves flat upon their faces. There was a sound of "
"many footsteps, and Alice looked round, eager to see the $(Queen).First came ten soldiers carrying clubs; "
"these "
"were all shaped like the three gardeners, oblong and flat, with their hands and feet at the corners: next the "
"ten courtiers; these were ornamented all over with $(diamonds), and walked two and two, as the soldiers did. "
"After these came the royal children; there were ten of them, and the little dears came jumping merrily along "
"hand in hand, in couples: they were all ornamented with hearts. Next came the guests, mostly Kings and "
"Queens, and among them Alice recognised the White Rabbit: it was talking in a hurried nervous manner, smiling "
"at everything that was said, and went by without noticing her. Then followed the Knave of Hearts, carrying "
"the King's crown on a crimson velvet cushion; and, last of all this grand procession, came THE KING AND QUEEN "
"OF HEARTS.Alice was rather doubtful whether she ought not to lie down on her face like the three gardeners, "
"but she could not remember ever having heard of such a rule at processions; 'and besides, what would be the "
"use of a procession,' thought she, 'if people had all to lie down upon their faces, so that they couldn't see "
"it?' So she stood still where she was, and waited.When the procession came opposite to Alice, they all "
"stopped and looked at her, and the $(Queen) said severely 'Who is this?' She said it to the Knave of Hearts, "
"who "
"only bowed and smiled in reply.'Idiot!' said the Queen, tossing her head impatiently; and, turning to Alice, "
"she went on, 'What's your name, child?''My name is Alice, so please your Majesty,' said Alice very politely; "
"but she added, to herself, 'Why, they're only a pack of cards, after all. I needn't be afraid of them!''And "
"who are these?' said the $(Queen), pointing to the three gardeners who were lying round the rosetree; for, "
"you "
"see, as they were lying on their faces, and the $(pattern) on their backs was the same as the rest of the "
"pack, "
"she could not tell whether they were gardeners, or soldiers, or courtiers, or three of her own children.'How "
"should I know?' said Alice, surprised at her own courage. 'It's no business of mine.'The Queen turned crimson "
"with fury, and, after glaring at her for a moment like a wild beast, screamed 'Off with her head! "
"Off—''Nonsense!' said $(Alice), very loudly and decidedly, and the Queen was silent.The $(King) laid his hand "
"upon "
"her arm, and timidly said 'Consider, my dear: she is only a child!'The $(Queen) turned angrily away from him, "
"and said to the $(Knave) 'Turn them over!'The $(Knave) did so, very carefully, with one foot.'Get up!' said "
"the "
"Queen, in a shrill, loud voice, and the three gardeners instantly jumped up, and began bowing to the King, "
"the Queen, the royal children, and everybody else.'Leave off that!' screamed the Queen. 'You make me giddy.' "
"And then, turning to the rose-tree, she went on, 'What have you been doing here?'";
static TokenMap const raw_tokens {
{"Two", "Two"}, {"heads", "heads"},
{"diamonds", "diamonds"}, {"Queen", "Queen"},
{"pattern", "pattern"}, {"Alice", "Alice"},
{"King", "King"}, {"Knave", "Knave"},
{"Why", "Why"}, {"glaring", "glaring"},
{"name", "name"}, {"know", "know"},
{"Idiot", "Idiot"}, {"children", "children"},
{"Nonsense", "Nonsense"}, {"procession", "procession"},
};
static TokenMap const tokens {
{"$(Two)", "Two"}, {"$(heads)", "heads"},
{"$(diamonds)", "diamonds"}, {"$(Queen)", "Queen"},
{"$(pattern)", "pattern"}, {"$(Alice)", "Alice"},
{"$(King)", "King"}, {"$(Knave)", "Knave"},
{"$(Why)", "Why"}, {"$(glaring)", "glaring"},
{"$(name)", "name"}, {"$(know)", "know"},
{"$(Idiot)", "Idiot"}, {"$(children)", "children"},
{"$(Nonsense)", "Nonsense"}, {"$(procession)", "procession"},
};
}
NONIUS_BENCHMARK("manual_expand", [](nonius::chronometer cm) {
std::string const tmp = testdata::inputWithtokens;
auto& tokens = testdata::raw_tokens;
std::string result;
cm.measure([&](int) {
result = manual_expand(tmp, tokens);
});
assert(result == testdata::expected);
})
#ifdef USE_X3
NONIUS_BENCHMARK("spirit_x3", [](nonius::chronometer cm) {
auto const symbols = [&] {
x3::symbols<char const*> symbols;
for(const auto& token : testdata::tokens) {
symbols.add(token.first.c_str(), token.second.c_str());
}
return symbols;
}();
std::string result;
cm.measure([&](int) {
result = spirit_x3(testdata::inputWithtokens, symbols);
});
//std::cout << "====\n" << result << "\n====\n";
assert(testdata::expected == result);
})
#else
NONIUS_BENCHMARK("spirit_qi", [](nonius::chronometer cm) {
auto const symbols = [&] {
bsq::symbols<char, std::string> symbols;
for(const auto& token : testdata::tokens) {
symbols.add(token.first.c_str(), token.second.c_str());
}
return symbols;
}();
std::string result;
cm.measure([&](int) {
result = spirit_qi(testdata::inputWithtokens, symbols);
});
assert(testdata::expected == result);
})
NONIUS_BENCHMARK("spirit_qi_old", [](nonius::chronometer cm) {
std::string result;
cm.measure([&](int) {
result = spirit_qi_old(testdata::inputWithtokens, testdata::tokens);
});
assert(testdata::expected == result);
})
#endif
NONIUS_BENCHMARK("boyer_more", [](nonius::chronometer cm) {
cm.measure([&](int) {
std::string tmp = testdata::inputWithtokens;
boyer_more(tmp, testdata::tokens);
assert(tmp == testdata::expected);
});
})
NONIUS_BENCHMARK("bmh_search", [](nonius::chronometer cm) {
cm.measure([&](int) {
std::string tmp = testdata::inputWithtokens;
bmh_search(tmp, testdata::tokens);
assert(tmp == testdata::expected);
});
})
NONIUS_BENCHMARK("kmp_search", [](nonius::chronometer cm) {
cm.measure([&](int) {
std::string tmp = testdata::inputWithtokens;
kmp_search(tmp, testdata::tokens);
assert(tmp == testdata::expected);
});
})
NONIUS_BENCHMARK("naive_stl", [](nonius::chronometer cm) {
cm.measure([&](int) {
std::string tmp = testdata::inputWithtokens;
naive_stl(tmp, testdata::tokens);
assert(tmp == testdata::expected);
});
})
NONIUS_BENCHMARK("boost_replace_all", [](nonius::chronometer cm) {
std::string const tmp = testdata::inputWithtokens;
std::string result;
cm.measure([&](int) {
result = boost_replace_all(testdata::inputWithtokens, testdata::tokens);
});
assert(result == testdata::expected);
})
CString
? Is it your class? It is not a standard C++ class. – Endpaper./test -r html -o stats.html
. The pngs are just screen grabs. The results of those timings are already here – Belonging