Hermes
Loading...
Searching...
No Matches
parsers.h
1
27
28#ifndef HERMES_HERMES_COMMON_PARSERS_H
29#define HERMES_HERMES_COMMON_PARSERS_H
30
31#include <string>
32#include <utility>
33#include <vector>
34#include <unordered_map>
35#include <hermes/common/result.h>
36
37namespace hermes {
38
39// *********************************************************************************************************************
40// ParseTree
41// *********************************************************************************************************************
44class ParseTree {
45 friend class StringParser;
46public:
51 enum class NodeType {
52 INPUT = 0,
53 TOKEN = 1,
54 BLOCK = 2
55 };
56 // *******************************************************************************************************************
57 // ParseTree::Node
58 // *******************************************************************************************************************
61 struct Node {
62 std::string name;
63 std::string value;
64 size_t start{};
65 size_t size{};
66 int block_id{};
67 std::vector<Node> children;
69 };
70 // *******************************************************************************************************************
71 // METHODS
72 // *******************************************************************************************************************
73 void iterate(const std::function<bool(const Node &)> &callback) const;
74 // *******************************************************************************************************************
75 // DEBUG
76 // *******************************************************************************************************************
81 friend std::ostream &operator<<(std::ostream &os, const ParseTree &parse_tree);
82private:
86 static std::string typeName(NodeType type) {
87#define DATA_TYPE_NAME(Type) \
88 if(NodeType::Type == type) \
89 return #Type;
90 DATA_TYPE_NAME(INPUT)
91 DATA_TYPE_NAME(TOKEN)
92 DATA_TYPE_NAME(BLOCK)
93 return "INVALID NODE TYPE";
94#undef DATA_TYPE_NAME
95 }
98 explicit ParseTree(Node node) {
99 root_ = std::move(node);
100 }
102 Node root_;
103};
104
105// *********************************************************************************************************************
106// StringParser
107// *********************************************************************************************************************
110public:
111 // *******************************************************************************************************************
112 // STATIC METHODS
113 // *******************************************************************************************************************
114 // construction
117 static StringParser cLanguage();
118 // parsing
124 static size_t matchPrefixWithAny(const std::string &characters, const std::string &s, size_t start = 0);
133 static size_t matchPrefixWithAny(const std::vector<std::pair<std::string, std::string>> &block_delimiters,
134 const std::string &s,
135 size_t &block_content_start,
136 size_t &block_content_size,
137 size_t start = 0);
145 static size_t matchPrefixWithAny(const std::vector<std::string> &block_openings,
146 const std::string &s,
147 int &match_id,
148 size_t start = 0);
154 static size_t startsWith(const std::vector<std::pair<std::string, std::string>> &block_delimiters,
155 const std::string &s,
156 size_t i = 0);
157 // *******************************************************************************************************************
158 // CONSTRUCTORS
159 // *******************************************************************************************************************
160 StringParser();
161 virtual ~StringParser();
162 // *******************************************************************************************************************
163 // RULES
164 // *******************************************************************************************************************
165 // empty / blank characters
171 void setBlankCharacters(const std::string &blank_characters);
176 void pushBlankCharacter(char blank_character);
182 void pushBlankDelimiters(const std::string &open_pattern, const std::string &close_pattern);
183 // enclosing blocks
189 void pushBlockDelimiters(const std::string &open_pattern, const std::string &close_pattern);
190 // tokens
194 void pushTokenPattern(const std::string &name, const std::string &regex_pattern);
195 // *******************************************************************************************************************
196 // PARSING
197 // *******************************************************************************************************************
202 Result<ParseTree> parse(const std::string &text, bool copy_string = true);
203private:
209 [[nodiscard]] size_t consumeAllBlanks(const std::string &s, size_t start = 0) const;
216 [[nodiscard]] size_t parseToken(const std::string &s, std::string &token_name, size_t start = 0) const;
217
218 std::string blank_characters_ = " \t\n";
219 std::unordered_map<std::string, std::string> token_patterns_;
221 std::vector<std::string> blank_block_openings_;
222 std::vector<std::string> blank_block_closings_;
224 std::vector<std::string> block_openings_;
225 std::vector<std::string> block_closings_;
226};
227
228}
229
230#endif //HERMES_HERMES_COMMON_PARSERS_H
The ParseTree represents a parsing tree (or derivation tree) generated by a parser....
Definition parsers.h:44
NodeType
Parse tree nodes can be of different types:
Definition parsers.h:51
@ BLOCK
a block of of text enclosed by block delimiters which might contain other nodes
@ TOKEN
a single token (a word identified by the parser)
@ INPUT
a piece of text which might contain other nodes
friend std::ostream & operator<<(std::ostream &os, const ParseTree &parse_tree)
Dumps all contant of the tree.
Definition parsers.cpp:47
General token parser for strings.
Definition parsers.h:109
void pushTokenPattern(const std::string &name, const std::string &regex_pattern)
Appends a token - pattern pair to the parser.
Definition parsers.cpp:110
static StringParser cLanguage()
A simple and rough parser for c-like languages (far from complete).
Definition parsers.cpp:73
static size_t startsWith(const std::vector< std::pair< std::string, std::string > > &block_delimiters, const std::string &s, size_t i=0)
Checks if string starts with a block delimiter (first element in pairs)
Definition parsers.cpp:194
Result< ParseTree > parse(const std::string &text, bool copy_string=true)
Parses a given string into a parsing tree.
Definition parsers.cpp:238
static size_t matchPrefixWithAny(const std::string &characters, const std::string &s, size_t start=0)
Matches the prefix (from start) of the string with any any sequence in the given set of characters.
Definition parsers.cpp:114
Holds 2-dimensional integer index coordinates.
Definition index.h:50
A node represents a element in the parsed string.
Definition parsers.h:61
int block_id
block id of the parser's block list (root is -1)
Definition parsers.h:66