libfilezilla
xml.hpp
1 #ifndef LIBFILEZILLA_XML_HEADER
2 #define LIBFILEZILLA_XML_HEADER
3 
4 #include <functional>
5 #include <string>
6 #include <tuple>
7 #include <vector>
8 
9 #include "buffer.hpp"
10 #include "logger.hpp"
11 
12 namespace fz {
13 
14 namespace xml {
15 
17 enum class callback_event
18 {
20  open,
21 
23  close,
24 
26  attribute,
27 
30  value,
31 
33  parsing_instruction,
34 
35  doctype,
36 };
37 
47 class FZ_PUBLIC_SYMBOL parser final
48 {
49 public:
56  typedef std::function<bool(callback_event type, std::string_view path, std::string_view name, std::string && value)> callback_t;
57 
58  parser();
59  parser(callback_t const& cb);
60  parser(callback_t && cb);
61 
64  void set_callback(callback_t && cb);
65  void set_callback(callback_t const& cb);
66 
68  bool parse(std::string_view data);
69 
72  bool finalize();
73 
75  std::string get_error() const;
76 
77 private:
78  bool decode_ref();
79  bool is_valid_tag_or_attr(std::string_view s) const;
80  bool normalize_value();
81 
82  bool parse_valid_utf8(std::string_view data);
83  bool parse(char const* const begin, char const* const end);
84  void set_error(std::string_view msg, size_t offset);
85 
86  bool deduce_encoding(std::string_view & data);
87 
88  enum class state {
89  content,
90  tag_start, // Just after reading <
91  tag_name, // Reading tag name
92  tag_closing, // In a closing tag, matching the tag name
93  tag_end, // Just before reading >
94 
95  attributes,
96  attribute_quote,
97  attribute_value,
98 
99  // <?xml and other parsing intructions
100  pi,
101  pi_value,
102 
103  // entity and character references
104  reference,
105  attrvalue_reference,
106 
107  comment_start,
108  comment_end,
109 
110  doctype_start,
111  doctype_name,
112  doctype_value,
113 
114  cdata_start,
115  cdata_end,
116 
117  done,
118  error
119  };
120 
121  callback_t cb_;
122 
123  std::string path_;
124  std::vector<size_t> nodes_;
125  std::string value_;
126  size_t processed_{};
127  std::string converted_{};
128 
129  union {
130  size_t utf8_state_{};
131  uint32_t utf16_state_;
132  };
133 
134  state s_{ state::content };
135 
136  enum class encoding {
137  unknown,
138  utf8,
139  utf16le,
140  utf16be
141  };
142  encoding encoding_{};
143 
144  union {
145  size_t tag_match_pos_{};
146  char quotes_;
147  unsigned char dashes_;
148  };
149 
150  bool got_xmldecl_{};
151  bool got_doctype_{};
152  bool got_element_{};
153 };
154 
167 class FZ_PUBLIC_SYMBOL namespace_parser final
168 {
169 public:
173 
174  void set_callback(parser::callback_t && cb);
175  void set_callback(parser::callback_t const& cb);
176 
177  bool parse(std::string_view data);
178  bool finalize();
179 
180  std::string get_error() const;
181 
183  typedef std::function<bool(callback_event type, std::string_view path, std::string_view name, std::string_view value)> raw_callback_t;
184  void set_raw_callback(raw_callback_t && cb);
185  void set_raw_callback(raw_callback_t const& cb);
186 private:
187  std::string_view apply_namespaces(std::string_view in);
188  bool apply_namespace_to_path();
189 
190  bool on_callback(callback_event type, std::string_view path, std::string_view name, std::string && value);
191 
192  parser parser_;
193 
194  parser::callback_t cb_;
195  raw_callback_t raw_cb_;
196 
197  std::string path_;
198  fz::buffer applied_;
199  std::vector<size_t> nodes_;
200  std::vector<std::pair<std::string, std::string>> attributes_;
201  std::vector<std::tuple<size_t, std::string, std::string>> namespaces_;
202  bool needs_namespace_expansion_{};
203  bool error_{};
204 };
205 
207 class FZ_PUBLIC_SYMBOL pretty_printer
208 {
209 public:
210  pretty_printer() = default;
211  virtual ~pretty_printer();
212 
213  void log(callback_event type, std::string_view, std::string_view name, std::string_view value);
214 
215 protected:
216  virtual void on_line(std::string_view line) = 0;
217 
218 private:
219  void finish_line();
220  void print_line();
221 
222  size_t depth_{};
223  std::string value_;
224  std::string line_;
225 };
226 
228 class FZ_PUBLIC_SYMBOL pretty_logger : public pretty_printer
229 {
230 public:
232 
233 protected:
234  virtual void on_line(std::string_view line) override;
235 
236  logmsg::type level_;
237  logger_interface & logger_;
238 };
239 
240 
241 }
242 }
243 
244 #endif
A slow pretty printer for XML as it is being parsed.
Definition: xml.hpp:228
A streaming XML parser.
Definition: xml.hpp:47
A stremable XML parser that resolves namespace declarations and namespace prefixes.
Definition: xml.hpp:167
Interface for logging.
type
Definition: logger.hpp:15
The namespace used by libfilezilla.
Definition: apply.hpp:17
Declares fz::buffer.
A slow pretty printer for XML as it is being parsed.
Definition: xml.hpp:207
std::function< bool(callback_event type, std::string_view path, std::string_view name, std::string &&value)> callback_t
Definition: xml.hpp:56
std::function< bool(callback_event type, std::string_view path, std::string_view name, std::string_view value)> raw_callback_t
Additional raw callback to look at events before namespace processing takes place.
Definition: xml.hpp:183
The buffer class is a simple buffer where data can be appended at the end and consumed at the front...
Definition: buffer.hpp:26
Abstract interface for logging strings.
Definition: logger.hpp:50