How to parse a symmetric quoted string using nom in rust?

This would be my approach (using nom-5.1.1):

extern crate nom;

use nom::{
  IResult,
  multi::{count, fold_many0, many_till},
  bytes::complete::{tag, take},
  sequence::pair
};

fn quoted_str(input: &str) -> IResult<&str, &str> {

  // Count number of leading #
  let (remaining, hash_count) = fold_many0(tag("#"), 0, |acc, _| acc + 1)(input)?;

  // Match "
  let (remaining, _) = tag("\"")(remaining)?;

  // Take until closing " plus # (repeated hash_count times)
  let closing = pair(tag("\""), count(tag("#"), hash_count));
  let (remaining, (inner, _)) = many_till(take(1u32), closing)(remaining)?;

  // Extract inner range
  let offset = hash_count + 1;
  let length = inner.len();

  Ok((remaining, &input[offset .. offset + length]))
}

#[test]
fn run_test() {
  assert_eq!(quoted_str("\"ABC\""), Ok(("", "ABC")));
  assert_eq!(quoted_str("#\"ABC\"#"), Ok(("", "ABC")));
  assert_eq!(quoted_str("##\"ABC\"##"), Ok(("", "ABC")));
  assert_eq!(quoted_str("###\"ABC\"###"), Ok(("", "ABC")));

  assert_eq!(quoted_str("#\"ABC\"XYZ\"#"), Ok(("", "ABC\"XYZ")));
  assert_eq!(quoted_str("#\"ABC\"#XYZ\"#"), Ok(("XYZ\"#", "ABC")));
  assert_eq!(quoted_str("#\"ABC\"##XYZ\"#"), Ok(("#XYZ\"#", "ABC")));

  assert_eq!(quoted_str("##\"ABC\"XYZ\"##"), Ok(("", "ABC\"XYZ")));
  assert_eq!(quoted_str("##\"ABC\"#XYZ\"##"), Ok(("", "ABC\"#XYZ")));
  assert_eq!(quoted_str("##\"ABC\"##XYZ\"##"), Ok(("XYZ\"##", "ABC")));
  assert_eq!(quoted_str("##\"ABC\"###XYZ\"##"), Ok(("#XYZ\"##", "ABC")));

  assert_eq!(quoted_str("\"ABC\"XYZ"), Ok(("XYZ", "ABC")));
  assert_eq!(quoted_str("#\"ABC\"#XYZ"), Ok(("XYZ", "ABC")));
  assert_eq!(quoted_str("##\"ABC\"##XYZ"), Ok(("XYZ", "ABC")));
}

If performance is important to you, the implicit vector allocation in many_till could be avoided by writing a fold_many_till function based on the code for fold_many0 and many_fill. It seems nom does not currently provide such a function.

Recommended topics

Hot tags