If you really want to do this the "nice" way, you'd have to hack it into extract_int
in numeric_utils.hpp.
Even better, you'd want to make it a strategy class much like with the real_policies
used by real_parser
. Because just mixing in more branches with the existing general purpose integer handling code just complicates that and has the potential to slow down any integer parsing.
I have not done this. However, I do have a proof-of-concept approach here:
Mind you, this is not well tested and not fit for serious use for the reasons stated, but you can use it as inspiration. You might want to just duplicate the uint_parser
directive as-a-whole and stick it in your Spirit Repository location.
The patch
It's relatively straightforward. If you define ALLOW_SO_UNDERSCORE_HACK
you will get the bypass for underscore inserted into the loop unrolling macros:
#if defined(ALLOW_SO_UNDERSCORE_HACK)
# define SPIRIT_SO_SKIP_UNDERSCORE_HACK() \
if ('_' == *it) { \
++it; \
continue; \
}
#else
# define SPIRIT_SO_SKIP_UNDERSCORE_HACK()
#endif
The only real complexity there is from "seeing through: the optimizations made in that translation unit.
There's a rather arbitrary choice to (dis)allow underscores amonge the leading zeros. I have opted to do so:
#if defined(ALLOW_SO_UNDERSCORE_HACK)
// skip leading zeros
for(;it != last;++it) {
if ('0' == *it && leading_zeros < MaxDigits) {
++leading_zeros;
continue;
} else if ('_' == *it) {
continue;
}
break;
}
#else
Finally, uderscores are not counted towards the MinDigits
and MaxDigits
limits
DEMO
The following test program demonstrates things. Note The reordering of branches.
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Iterator>
struct unsigned_parser : qi::grammar<Iterator, uint64_t()> {
unsigned_parser() : unsigned_parser::base_type(start) {
using namespace qi;
uint_parser<uint64_t, 10> dec_parser;
uint_parser<uint64_t, 16> hex_parser;
uint_parser<uint64_t, 8> oct_parser;
uint_parser<uint64_t, 2> bin_parser;
start = eps(false)
| (hex_parser >> omit[ char_("hHxX")]) /* hexadecimal with suffix */
| (oct_parser >> omit[ char_("qQoO")]) /* octal with suffix */
| (bin_parser >> omit[ char_("bByY")]) /* binary with suffix */
| (dec_parser >> omit[-char_("dDtT")]) /* decimal with optional suffix */
;
}
qi::rule<Iterator, uint64_t()> start;
};
int main(int argv, const char *argc[]) {
typedef std::string::const_iterator iter;
unsigned_parser<iter> up;
for (auto const& test : std::vector<std::string>(argc+1, argc+argv)) {
iter i = test.begin(), end = test.end();
uint64_t val;
bool rv = parse(i, end, up, val);
std::cout << (rv?"Successful":"Failed") << " parse: '" << test << "' -> " << val << "\n";
if (i != end)
std::cout << " ** Remaining unparsed: '" << std::string(i,end) << "'\n";
}
}
If you call it with command line arguments 123_456 123456 1_bh 0_010Q 1010_1010_0111_0111_b
it will print:
Successful parse: '123_456' -> 123456
Successful parse: '123456' -> 123456
Successful parse: '1_bh' -> 27
Successful parse: '0_010Q' -> 8
Successful parse: '1010_1010_0111_0111_b' -> 43639
LISTING
Full patch (on boost-1.57.0
tag) for preservation on SO:
commit 24b16304f436bfd0f6e2041b2b7be0c8677c7e75
Author: Seth Heeren <[email protected]>
Date: Thu Mar 19 01:44:55 2015 +0100
https://mcmap.net/q/1481920/-using-boost-spirit-qi-to-parse-numbers-with-separators
rough patch for exposition of my answer only
diff --git a/include/boost/spirit/home/qi/numeric/detail/numeric_utils.hpp b/include/boost/spirit/home/qi/numeric/detail/numeric_utils.hpp
index 5137f87..1ced164 100644
--- a/include/boost/spirit/home/qi/numeric/detail/numeric_utils.hpp
+++ b/include/boost/spirit/home/qi/numeric/detail/numeric_utils.hpp
@@ -262,10 +262,21 @@ namespace boost { namespace spirit { namespace qi { namespace detail
///////////////////////////////////////////////////////////////////////////
// extract_int: main code for extracting integers
///////////////////////////////////////////////////////////////////////////
+#if defined(ALLOW_SO_UNDERSCORE_HACK)
+# define SPIRIT_SO_SKIP_UNDERSCORE_HACK() \
+ if ('_' == *it) { \
+ ++it; \
+ continue; \
+ }
+#else
+# define SPIRIT_SO_SKIP_UNDERSCORE_HACK()
+#endif
+
#define SPIRIT_NUMERIC_INNER_LOOP(z, x, data) \
if (!check_max_digits<MaxDigits>::call(count + leading_zeros) \
|| it == last) \
break; \
+ SPIRIT_SO_SKIP_UNDERSCORE_HACK() \
ch = *it; \
if (!radix_check::is_valid(ch) || !extractor::call(ch, count, val)) \
break; \
@@ -301,12 +312,25 @@ namespace boost { namespace spirit { namespace qi { namespace detail
std::size_t leading_zeros = 0;
if (!Accumulate)
{
+#if defined(ALLOW_SO_UNDERSCORE_HACK)
+ // skip leading zeros
+ for(;it != last;++it) {
+ if ('0' == *it && leading_zeros < MaxDigits) {
+ ++leading_zeros;
+ continue;
+ } else if ('_' == *it) {
+ continue;
+ }
+ break;
+ }
+#else
// skip leading zeros
while (it != last && *it == '0' && leading_zeros < MaxDigits)
{
++it;
++leading_zeros;
}
+#endif
}
typedef typename
@@ -366,6 +390,7 @@ namespace boost { namespace spirit { namespace qi { namespace detail
#define SPIRIT_NUMERIC_INNER_LOOP(z, x, data) \
if (it == last) \
break; \
+ SPIRIT_SO_SKIP_UNDERSCORE_HACK() \
ch = *it; \
if (!radix_check::is_valid(ch)) \
break; \
@@ -399,12 +424,25 @@ namespace boost { namespace spirit { namespace qi { namespace detail
std::size_t count = 0;
if (!Accumulate)
{
+#if defined(ALLOW_SO_UNDERSCORE_HACK)
+ // skip leading zeros
+ for(;it != last;++it) {
+ if ('0' == *it) {
+ ++count;
+ continue;
+ } else if ('_' == *it) {
+ continue;
+ }
+ break;
+ }
+#else
// skip leading zeros
while (it != last && *it == '0')
{
++it;
++count;
}
+#endif
if (it == last)
{
@@ -472,6 +510,7 @@ namespace boost { namespace spirit { namespace qi { namespace detail
};
#undef SPIRIT_NUMERIC_INNER_LOOP
+#undef SPIRIT_SO_SKIP_UNDERSCORE_HACK
///////////////////////////////////////////////////////////////////////////
// Cast an signed integer to an unsigned integer
BOOST_PHOENIX_ADAPT_FUNCTION
Live On Coliru – Crinite