Okay, risking conflating too many things in an example, here goes:
namespace square::peg {
using namespace x3;
const auto quoted_string = lexeme['"' > *(print - '"') > '"'];
const auto bare_string = lexeme[alpha > *alnum] > ';';
const auto two_ints = int_ > int_;
const auto main = quoted_string | bare_string | two_ints;
const auto entry_point = skip(space)[ expect[main] > eoi ];
} // namespace square::peg
That should do. The key is that the only things that should be expectation
points is things that make the respective branch fail BEYOND the point where it
was unambiguously the right branch. (Otherwise, there would literally not be a
hard expectation).
With two minor get_info
specialization for prettier messages¹, this could lead
to decent error messages even when manually catching the exception:
Live On Coliru
int main() {
using It = std::string::const_iterator;
for (std::string const input : {
" -89 0038 ",
" \"-89 0038\" ",
" something123123 ;",
// undecidable
"",
// violate expecations, no successful parse
" -89 oops ", // not an integer
" \"-89 0038 ", // missing "
" bareword ", // missing ;
// trailing debris, successful "main"
" -89 3.14 ", // followed by .14
})
{
std::cout << "====== " << std::quoted(input) << "\n";
It iter = input.begin(), end = input.end();
try {
if (parse(iter, end, square::peg::entry_point)) {
std::cout << "Parsed successfully\n";
} else {
std::cout << "Parsing failed\n";
}
} catch (x3::expectation_failure<It> const& ef) {
auto pos = std::distance(input.begin(), ef.where());
std::cout << "Expect " << ef.which() << " at "
<< "\n\t" << input
<< "\n\t" << std::setw(pos) << std::setfill('-') << "" << "^\n";
}
}
}
Prints
====== " -89 0038 "
Parsed successfully
====== " \"-89 0038\" "
Parsed successfully
====== " something123123 ;"
Parsed successfully
====== ""
Expect quoted string, bare string or integer number pair at
^
====== " -89 oops "
Expect integral number at
-89 oops
-------^
====== " \"-89 0038 "
Expect '"' at
"-89 0038
--------------^
====== " bareword "
Expect ';' at
bareword
------------^
====== " -89 3.14 "
Expect eoi at
-89 3.14
--------^
This is already beyond what most people expect from their parsers.
But: Automate That, Also, More Flexible
We might not be content reporting just the one expectation and bailing out. Indeed, you can report and continue parsing as there were just a regular mismatch: this is where on_error
comes in.
Let's create a tag base:
struct with_error_handling {
template<typename It, typename Ctx>
x3::error_handler_result on_error(It f, It l, expectation_failure<It> const& ef, Ctx const&) const {
std::string s(f,l);
auto pos = std::distance(f, ef.where());
std::cout << "Expecting " << ef.which() << " at "
<< "\n\t" << s
<< "\n\t" << std::setw(pos) << std::setfill('-') << "" << "^\n";
return error_handler_result::fail;
}
};
Now, all we have to do is derive our rule ID from with_error_handling
and BAM!, we don't have to write any exception handlers, rules will simply "fail" with the appropriate diagnostics. What's more, some inputs can lead to multiple (hopefully helpful) diagnostics:
auto const eh = [](auto p) {
struct _ : with_error_handling {};
return rule<_> {} = p;
};
const auto quoted_string = eh(lexeme['"' > *(print - '"') > '"']);
const auto bare_string = eh(lexeme[alpha > *alnum] > ';');
const auto two_ints = eh(int_ > int_);
const auto main = quoted_string | bare_string | two_ints;
using main_type = std::remove_cv_t<decltype(main)>;
const auto entry_point = skip(space)[ eh(expect[main] > eoi) ];
Now, main
becomes just:
Live On Coliru
for (std::string const input : {
" -89 0038 ",
" \"-89 0038\" ",
" something123123 ;",
// undecidable
"",
// violate expecations, no successful parse
" -89 oops ", // not an integer
" \"-89 0038 ", // missing "
" bareword ", // missing ;
// trailing debris, successful "main"
" -89 3.14 ", // followed by .14
})
{
std::cout << "====== " << std::quoted(input) << "\n";
It iter = input.begin(), end = input.end();
if (parse(iter, end, square::peg::entry_point)) {
std::cout << "Parsed successfully\n";
} else {
std::cout << "Parsing failed\n";
}
}
And the program prints:
====== " -89 0038 "
Parsed successfully
====== " \"-89 0038\" "
Parsed successfully
====== " something123123 ;"
Parsed successfully
====== ""
Expecting quoted string, bare string or integer number pair at
^
Parsing failed
====== " -89 oops "
Expecting integral number at
-89 oops
-------^
Expecting quoted string, bare string or integer number pair at
-89 oops
^
Parsing failed
====== " \"-89 0038 "
Expecting '"' at
"-89 0038
--------------^
Expecting quoted string, bare string or integer number pair at
"-89 0038
^
Parsing failed
====== " bareword "
Expecting ';' at
bareword
------------^
Expecting quoted string, bare string or integer number pair at
bareword
^
Parsing failed
====== " -89 3.14 "
Expecting eoi at
-89 3.14
--------^
Parsing failed
Attribute Propagation, on_success
Parsers aren't very useful when they don't actually parse anything, so let's add some constructive value handling, also showcaseing on_success
:
Defining some AST types to receive the attributes:
struct quoted : std::string {};
struct bare : std::string {};
using two_i = std::pair<int, int>;
using Value = boost::variant<quoted, bare, two_i>;
Make sure we can print Value
s:
static inline std::ostream& operator<<(std::ostream& os, Value const& v) {
struct {
std::ostream& _os;
void operator()(quoted const& v) const { _os << "quoted(" << std::quoted(v) << ")"; }
void operator()(bare const& v) const { _os << "bare(" << v << ")"; }
void operator()(two_i const& v) const { _os << "two_i(" << v.first << ", " << v.second << ")"; }
} vis{os};
boost::apply_visitor(vis, v);
return os;
}
Now, use the old as<>
trick to coerce attribute types, this time with error-handling:
As icing on the cake, let's demonstrate on_success
in with_error_handling
:
template<typename It, typename Ctx>
void on_success(It f, It l, two_i const& v, Ctx const&) const {
std::cout << "Parsed " << std::quoted(std::string(f,l)) << " as integer pair " << v.first << ", " << v.second << "\n";
}
Now with largely unmodified main program (just prints the result value as well):
Live On Coliru
It iter = input.begin(), end = input.end();
Value v;
if (parse(iter, end, square::peg::entry_point, v)) {
std::cout << "Result value: " << v << "\n";
} else {
std::cout << "Parsing failed\n";
}
Prints
====== " -89 0038 "
Parsed "-89 0038" as integer pair -89, 38
Result value: two_i(-89, 38)
====== " \"-89 0038\" "
Result value: quoted("-89 0038")
====== " something123123 ;"
Result value: bare(something123123)
====== ""
Expecting quoted string, bare string or integer number pair at
^
Parsing failed
====== " -89 oops "
Expecting integral number at
-89 oops
-------^
Expecting quoted string, bare string or integer number pair at
-89 oops
^
Parsing failed
====== " \"-89 0038 "
Expecting '"' at
"-89 0038
--------------^
Expecting quoted string, bare string or integer number pair at
"-89 0038
^
Parsing failed
====== " bareword "
Expecting ';' at
bareword
------------^
Expecting quoted string, bare string or integer number pair at
bareword
^
Parsing failed
====== " -89 3.14 "
Parsed "-89 3" as integer pair -89, 3
Expecting eoi at
-89 3.14
--------^
Parsing failed
Really Overdoing Things
I don't know about you, but I hate doing side-effects, let alone printing to the console from a parser. Let's use x3::with
instead.
We want to append to the diagnostics via the Ctx&
argument instead of writing
to std::cout
in the on_error
handler:
struct with_error_handling {
struct diags;
template<typename It, typename Ctx>
x3::error_handler_result on_error(It f, It l, expectation_failure<It> const& ef, Ctx const& ctx) const {
std::string s(f,l);
auto pos = std::distance(f, ef.where());
std::ostringstream oss;
oss << "Expecting " << ef.which() << " at "
<< "\n\t" << s
<< "\n\t" << std::setw(pos) << std::setfill('-') << "" << "^";
x3::get<diags>(ctx).push_back(oss.str());
return error_handler_result::fail;
}
};
And on the call site, we can pass the context:
std::vector<std::string> diags;
if (parse(iter, end, x3::with<D>(diags) [square::peg::entry_point], v)) {
std::cout << "Result value: " << v;
} else {
std::cout << "Parsing failed";
}
std::cout << " with " << diags.size() << " diagnostics messages: \n";
The full program also prints the diagnostics:
Live On Wandbox²
Full Listing
//#define BOOST_SPIRIT_X3_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
struct quoted : std::string {};
struct bare : std::string {};
using two_i = std::pair<int, int>;
using Value = boost::variant<quoted, bare, two_i>;
static inline std::ostream& operator<<(std::ostream& os, Value const& v) {
struct {
std::ostream& _os;
void operator()(quoted const& v) const { _os << "quoted(" << std::quoted(v) << ")"; }
void operator()(bare const& v) const { _os << "bare(" << v << ")"; }
void operator()(two_i const& v) const { _os << "two_i(" << v.first << ", " << v.second << ")"; }
} vis{os};
boost::apply_visitor(vis, v);
return os;
}
namespace square::peg {
using namespace x3;
struct with_error_handling {
struct diags;
template<typename It, typename Ctx>
x3::error_handler_result on_error(It f, It l, expectation_failure<It> const& ef, Ctx const& ctx) const {
std::string s(f,l);
auto pos = std::distance(f, ef.where());
std::ostringstream oss;
oss << "Expecting " << ef.which() << " at "
<< "\n\t" << s
<< "\n\t" << std::setw(pos) << std::setfill('-') << "" << "^";
x3::get<diags>(ctx).push_back(oss.str());
return error_handler_result::fail;
}
};
template <typename T = x3::unused_type> auto const as = [](auto p) {
struct _ : with_error_handling {};
return rule<_, T> {} = p;
};
const auto quoted_string = as<quoted>(lexeme['"' > *(print - '"') > '"']);
const auto bare_string = as<bare>(lexeme[alpha > *alnum] > ';');
const auto two_ints = as<two_i>(int_ > int_);
const auto main = quoted_string | bare_string | two_ints;
using main_type = std::remove_cv_t<decltype(main)>;
const auto entry_point = skip(space)[ as<Value>(expect[main] > eoi) ];
} // namespace square::peg
namespace boost::spirit::x3 {
template <> struct get_info<int_type> {
typedef std::string result_type;
std::string operator()(int_type const&) const { return "integral number"; }
};
template <> struct get_info<square::peg::main_type> {
typedef std::string result_type;
std::string operator()(square::peg::main_type const&) const { return "quoted string, bare string or integer number pair"; }
};
}
int main() {
using It = std::string::const_iterator;
using D = square::peg::with_error_handling::diags;
for (std::string const input : {
" -89 0038 ",
" \"-89 0038\" ",
" something123123 ;",
// undecidable
"",
// violate expecations, no successful parse
" -89 oops ", // not an integer
" \"-89 0038 ", // missing "
" bareword ", // missing ;
// trailing debris, successful "main"
" -89 3.14 ", // followed by .14
})
{
std::cout << "====== " << std::quoted(input) << "\n";
It iter = input.begin(), end = input.end();
Value v;
std::vector<std::string> diags;
if (parse(iter, end, x3::with<D>(diags) [square::peg::entry_point], v)) {
std::cout << "Result value: " << v;
} else {
std::cout << "Parsing failed";
}
std::cout << " with " << diags.size() << " diagnostics messages: \n";
for(auto& msg: diags) {
std::cout << " - " << msg << "\n";
}
}
}
¹ you could use rules with their names instead, obviating this more complex trick
² on older versions of the library you may have to battle to get reference semantics on the with<>
data: Live On Coliru
x3::with(peak_ref) [....]
and report on that location in theon_error
handler. If you show a minimal example (without error handling) I could try my hand at that. – Blackheart