使用 boost::spirit，如何要求记录的一部分独占一行？

发布于 2024-08-24 09:56:38 字数 5205 浏览 19 评论 0 原文

我有一个记录解析器，它抛出几个异常之一来指示哪个规则失败。

前言：

#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>

using namespace boost::spirit;
using namespace boost::spirit::ascii;
using namespace boost::spirit::qi;
using namespace boost::spirit::qi::labels;

using boost::phoenix::function;
using boost::phoenix::ref;
using boost::spirit::qi::eol;
using boost::spirit::qi::fail;
using boost::spirit::qi::lit;
using boost::spirit::qi::on_error;

using BOOST_SPIRIT_CLASSIC_NS::file_position;
using BOOST_SPIRIT_CLASSIC_NS::position_iterator;

我们使用 position_iterator< /code> 来自 Spirit.Classic，所以下面的流插入运算符很方便。

std::ostream&
operator<<(std::ostream& o, const file_position &fp)
{
  o << fp.file << ": " << fp.line << ',' << fp.column;
  return o;
}

模板 err_t 分解出用于抛出与不同形式的解析失败相关的异常的样板。

template <typename Exception>
struct err_t {
  template <typename, typename, typename>
  struct result { typedef void type; };

  template <typename Iterator>
  void operator() (info const &what, Iterator errPos, Iterator last) const
  {
    std::stringstream ss;
    ss << errPos.get_position()
       << ": expecting " << what
       << " near '" << std::string(errPos, last) << "'\n";
    throw Exception(ss.str());
  }
};

异常与其 err_t 包装器一起使用：

class MissingA : public std::runtime_error {
  public: MissingA(const std::string &s) : std::runtime_error(s) {}
};

class MissingB : public std::runtime_error {
  public: MissingB(const std::string &s) : std::runtime_error(s) {}
};

class MissingC : public std::runtime_error {
  public: MissingC(const std::string &s) : std::runtime_error(s) {}
};

function<err_t<MissingA> > const missingA = err_t<MissingA>();
function<err_t<MissingB> > const missingB = err_t<MissingB>();
function<err_t<MissingC> > const missingC = err_t<MissingC>();
function<err_t<std::runtime_error> > const other_error =
  err_t<std::runtime_error>();

语法查找简单序列。没有 eps，start 规则在空输入上失败，而不是 a 失败。

template <typename Iterator, typename Skipper>
struct my_grammar
  : grammar<Iterator, Skipper>
{
  my_grammar(int &result)
    : my_grammar::base_type(start)
    , result(result)
  {
    a = eps > lit("Header A") > eol;
    b = eps > lit("Header B") > eol;
    c = eps > lit("C:") > int_[ref(result) = _1] > eol;
    start = a > b > c;

    a.name("A");
    b.name("B");
    c.name("C");

    on_error<fail>(start, other_error(_4, _3, _2));
    on_error<fail>(a, missingA(_4, _3, _2));
    on_error<fail>(b, missingB(_4, _3, _2));
    on_error<fail>(c, missingC(_4, _3, _2));
  }

  rule<Iterator, Skipper> start;
  rule<Iterator, Skipper> a;
  rule<Iterator, Skipper> b;
  rule<Iterator, Skipper> c;
  int &result;
};

在 my_parse 中，我们将流的内容转储到 std::string 中，并使用 position_iterator 来跟踪解析的位置。

int
my_parse(const std::string &path, std::istream &is)
{
  std::string buf;
  is.unsetf(std::ios::skipws);
  std::copy(std::istream_iterator<char>(is),
            std::istream_iterator<char>(),
            std::back_inserter(buf));

  typedef position_iterator<std::string::const_iterator> itertype;
  typedef my_grammar<itertype, boost::spirit::ascii::space_type> grammar;
  itertype it(buf.begin(), buf.end(), path);
  itertype end;

  int result;
  grammar g(result);

  bool r = phrase_parse(it, end, g, boost::spirit::ascii::space);
  if (r && it == end) {
    std::cerr << "success!\n";
    return result;
  }
  else {
    file_position fpos = it.get_position();
    std::cerr << "parse failed at " << fpos << '\n';
    return -9999;
  }
}

最后，主程序

int main()
{
  std::stringstream ss;
  ss << "Header A\n"
     << "Header B\n"
     << "C: 3\n";

  int val = my_parse("path", ss);
  std::cout << "val = " << val << '\n';

  return 0;
}

上面的代码抛出MissingA：

terminate called after throwing an instance of 'MissingA'
  what():  path: 2,1: expecting  near 'Header B
C: 3
'

我认为船长可能已经消耗了换行符，但尝试lexeme[eol]却产生了相同的结果。

我一定错过了一些明显的东西，因为这似乎是最简单的解析器之一。我做错了什么？

原文

I have a record parser that throws one of several exceptions to indicate which rule failed.

Front matter:

#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>

using namespace boost::spirit;
using namespace boost::spirit::ascii;
using namespace boost::spirit::qi;
using namespace boost::spirit::qi::labels;

using boost::phoenix::function;
using boost::phoenix::ref;
using boost::spirit::qi::eol;
using boost::spirit::qi::fail;
using boost::spirit::qi::lit;
using boost::spirit::qi::on_error;

using BOOST_SPIRIT_CLASSIC_NS::file_position;
using BOOST_SPIRIT_CLASSIC_NS::position_iterator;

We use the position_iterator from Spirit.Classic, so the following stream-insertion operator is handy.

std::ostream&
operator<<(std::ostream& o, const file_position &fp)
{
  o << fp.file << ": " << fp.line << ',' << fp.column;
  return o;
}

The template err_t factors out the boilerplate for throwing the exceptions associated with different forms of parse failure.

template <typename Exception>
struct err_t {
  template <typename, typename, typename>
  struct result { typedef void type; };

  template <typename Iterator>
  void operator() (info const &what, Iterator errPos, Iterator last) const
  {
    std::stringstream ss;
    ss << errPos.get_position()
       << ": expecting " << what
       << " near '" << std::string(errPos, last) << "'\n";
    throw Exception(ss.str());
  }
};

The exceptions used along with their err_t wrappers:

class MissingA : public std::runtime_error {
  public: MissingA(const std::string &s) : std::runtime_error(s) {}
};

class MissingB : public std::runtime_error {
  public: MissingB(const std::string &s) : std::runtime_error(s) {}
};

class MissingC : public std::runtime_error {
  public: MissingC(const std::string &s) : std::runtime_error(s) {}
};

function<err_t<MissingA> > const missingA = err_t<MissingA>();
function<err_t<MissingB> > const missingB = err_t<MissingB>();
function<err_t<MissingC> > const missingC = err_t<MissingC>();
function<err_t<std::runtime_error> > const other_error =
  err_t<std::runtime_error>();

The grammar looks for simple sequences. Without eps, the start rule fails rather than a on an empty input.

template <typename Iterator, typename Skipper>
struct my_grammar
  : grammar<Iterator, Skipper>
{
  my_grammar(int &result)
    : my_grammar::base_type(start)
    , result(result)
  {
    a = eps > lit("Header A") > eol;
    b = eps > lit("Header B") > eol;
    c = eps > lit("C:") > int_[ref(result) = _1] > eol;
    start = a > b > c;

    a.name("A");
    b.name("B");
    c.name("C");

    on_error<fail>(start, other_error(_4, _3, _2));
    on_error<fail>(a, missingA(_4, _3, _2));
    on_error<fail>(b, missingB(_4, _3, _2));
    on_error<fail>(c, missingC(_4, _3, _2));
  }

  rule<Iterator, Skipper> start;
  rule<Iterator, Skipper> a;
  rule<Iterator, Skipper> b;
  rule<Iterator, Skipper> c;
  int &result;
};

In my_parse, we dump the contents of the stream into a std::string and use position_iterator to track the parse's location.

int
my_parse(const std::string &path, std::istream &is)
{
  std::string buf;
  is.unsetf(std::ios::skipws);
  std::copy(std::istream_iterator<char>(is),
            std::istream_iterator<char>(),
            std::back_inserter(buf));

  typedef position_iterator<std::string::const_iterator> itertype;
  typedef my_grammar<itertype, boost::spirit::ascii::space_type> grammar;
  itertype it(buf.begin(), buf.end(), path);
  itertype end;

  int result;
  grammar g(result);

  bool r = phrase_parse(it, end, g, boost::spirit::ascii::space);
  if (r && it == end) {
    std::cerr << "success!\n";
    return result;
  }
  else {
    file_position fpos = it.get_position();
    std::cerr << "parse failed at " << fpos << '\n';
    return -9999;
  }
}

Finally, the main program

int main()
{
  std::stringstream ss;
  ss << "Header A\n"
     << "Header B\n"
     << "C: 3\n";

  int val = my_parse("path", ss);
  std::cout << "val = " << val << '\n';

  return 0;
}

The code above throws MissingA:

terminate called after throwing an instance of 'MissingA'
  what():  path: 2,1: expecting  near 'Header B
C: 3
'

I thought the skipper might have consumed the newline, but attempting lexeme[eol] instead produced the same result.

I must be missing something obvious because this seems one of the most trivial sort of parsers to write. What am I doing wrong?

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

万劫不复 2024-08-31 09:56:38

是的，船长吃掉了换行符。 lexeme[eol] 也没有帮助，因为 lexeme 指令在切换到无船长模式之前调用船长（请参阅此处了解更多详细信息）。

为了避免跳过换行符，请使用不同的船长类型，或者将 eol 组件包装到 no_skip[eol] 中，这在语义上等同于 lexeme[] ，但它不调用船长。但请注意，no_skip[] 最近才添加，因此仅在下一个版本 (Boost V1.43) 中可用。但它已经在 Boost SVN 中了（参见此处获取初步文档）。