123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473 |
- // file : xml/parser -*- C++ -*-
- // copyright : Copyright (c) 2013-2014 Code Synthesis Tools CC
- // license : MIT; see accompanying LICENSE file
- #ifndef XML_PARSER
- #define XML_PARSER
- #include <xml/details/pre.hxx>
- #include <map>
- #include <vector>
- #include <string>
- #include <iosfwd>
- #include <cstddef> // std::size_t
- #include <xml/details/config.hxx> // LIBSTUDXML_EXTERNAL_EXPAT
- #ifndef LIBSTUDXML_EXTERNAL_EXPAT
- # include <xml/details/expat/expat.h>
- #else
- # include <expat.h>
- #endif
- // We only support UTF-8 Expat.
- //
- #ifdef XML_UNICODE
- # error UTF-16 expat (XML_UNICODE defined) is not supported
- #endif
- #include <xml/forward>
- #include <xml/qname>
- #include <xml/content>
- #include <xml/exception>
- #include <xml/details/export.hxx>
- namespace xml
- {
- class parser;
- struct LIBSTUDXML_EXPORT parsing: exception
- {
- virtual
- ~parsing () throw ();
- parsing (const std::string& name,
- unsigned long long line,
- unsigned long long column,
- const std::string& description);
- parsing (const parser&, const std::string& description);
- const std::string&
- name () const {return name_;}
- unsigned long long
- line () const {return line_;}
- unsigned long long
- column () const {return column_;}
- const std::string&
- description () const {return description_;}
- virtual const char*
- what () const throw ();
- private:
- void
- init ();
- private:
- std::string name_;
- unsigned long long line_;
- unsigned long long column_;
- std::string description_;
- std::string what_;
- };
- class LIBSTUDXML_EXPORT parser
- {
- public:
- typedef xml::qname qname_type;
- typedef xml::content content_type;
- typedef unsigned short feature_type;
- // If both receive_attributes_event and receive_attributes_map are
- // specified, then receive_attributes_event is assumed.
- //
- static const feature_type receive_elements = 0x0001;
- static const feature_type receive_characters = 0x0002;
- static const feature_type receive_attributes_map = 0x0004;
- static const feature_type receive_attributes_event = 0x0008;
- static const feature_type receive_namespace_decls = 0x0010;
- static const feature_type receive_default = receive_elements |
- receive_characters |
- receive_attributes_map;
- // Parse std::istream. Input name is used in diagnostics to identify
- // the document being parsed.
- //
- // If stream exceptions are enabled then std::ios_base::failure
- // exception is used to report io errors (badbit and failbit).
- // Otherwise, those are reported as the parsing exception.
- //
- parser (std::istream&,
- const std::string& input_name,
- feature_type = receive_default);
- // Parse memory buffer that contains the whole document. Input name
- // is used in diagnostics to identify the document being parsed.
- //
- parser (const void* data,
- std::size_t size,
- const std::string& input_name,
- feature_type = receive_default);
- const std::string&
- input_name () const {return iname_;}
- ~parser ();
- private:
- parser (const parser&);
- parser& operator= (const parser&);
- // Parsing events.
- //
- public:
- enum event_type
- {
- // If adding new events, also update the stream insertion operator.
- //
- start_element,
- end_element,
- start_attribute,
- end_attribute,
- characters,
- start_namespace_decl,
- end_namespace_decl,
- eof
- };
- event_type
- next ();
- // Get the next event and make sure that it's what's expected. If it
- // is not, then throw an appropriate parsing exception.
- //
- void
- next_expect (event_type);
- void
- next_expect (event_type, const std::string& name);
- void
- next_expect (event_type, const qname_type& qname);
- void
- next_expect (event_type, const std::string& ns, const std::string& name);
- event_type
- peek ();
- // Return the even that was last returned by the call to next() or
- // peek().
- //
- event_type
- event () {return event_;}
- // Event data.
- //
- public:
- const qname_type& qname () const {return *pqname_;}
- const std::string& namespace_ () const {return pqname_->namespace_ ();}
- const std::string& name () const {return pqname_->name ();}
- const std::string& prefix () const {return pqname_->prefix ();}
- std::string& value () {return *pvalue_;}
- const std::string& value () const {return *pvalue_;}
- template <typename T> T value () const;
- unsigned long long line () const {return line_;}
- unsigned long long column () const {return column_;}
- // Attribute map lookup. If attribute is not found, then the version
- // without the default value throws an appropriate parsing exception
- // while the version with the default value returns that value.
- //
- // Note also that there is no attribute(ns,name) version since it
- // would conflict with attribute(name,dv) (qualified attributes
- // are not very common).
- //
- // Attribute map is valid throughout at the "element level" until
- // end_element and not just during start_element. As a special case,
- // the map is still valid after peek() that returned end_element until
- // this end_element event is retrieved with next().
- //
- const std::string&
- attribute (const std::string& name) const;
- template <typename T>
- T
- attribute (const std::string& name) const;
- std::string
- attribute (const std::string& name,
- const std::string& default_value) const;
- template <typename T>
- T
- attribute (const std::string& name, const T& default_value) const;
- const std::string&
- attribute (const qname_type& qname) const;
- template <typename T>
- T
- attribute (const qname_type& qname) const;
- std::string
- attribute (const qname_type& qname,
- const std::string& default_value) const;
- template <typename T>
- T
- attribute (const qname_type& qname, const T& default_value) const;
- bool
- attribute_present (const std::string& name) const;
- bool
- attribute_present (const qname_type& qname) const;
- // Low-level attribute map access. Note that this API assumes
- // all attributes are handled.
- //
- struct attribute_value_type
- {
- std::string value;
- mutable bool handled;
- };
- typedef std::map<qname_type, attribute_value_type> attribute_map_type;
- const attribute_map_type&
- attribute_map () const;
- // Optional content processing.
- //
- public:
- // Note that you cannot get/set content while peeking.
- //
- void
- content (content_type);
- content_type
- content () const;
- // Versions that also set the content. Event type must be start_element.
- //
- void
- next_expect (event_type, const std::string& name, content_type);
- void
- next_expect (event_type, const qname_type& qname, content_type);
- void
- next_expect (event_type,
- const std::string& ns, const std::string& name,
- content_type);
- // Helpers for parsing elements with simple content. The first two
- // functions assume that start_element has already been parsed. The
- // rest parse the complete element, from start to end.
- //
- // Note also that as with attribute(), there is no (namespace,name)
- // overload since it would conflicts with (namespace,default_value).
- //
- public:
- std::string
- element ();
- template <typename T>
- T
- element ();
- std::string
- element (const std::string& name);
- std::string
- element (const qname_type& qname);
- template <typename T>
- T
- element (const std::string& name);
- template <typename T>
- T
- element (const qname_type& qname);
- std::string
- element (const std::string& name, const std::string& default_value);
- std::string
- element (const qname_type& qname, const std::string& default_value);
- template <typename T>
- T
- element (const std::string& name, const T& default_value);
- template <typename T>
- T
- element (const qname_type& qname, const T& default_value);
- // C++11 range-based for support. Generally, the iterator interface
- // doesn't make much sense for the parser so for now we have an
- // implementation that is just enough to the range-based for.
- //
- public:
- struct iterator
- {
- typedef event_type value_type;
- iterator (parser* p = 0, event_type e = eof): p_ (p), e_ (e) {}
- value_type operator* () const {return e_;}
- iterator& operator++ () {e_ = p_->next (); return *this;}
- // Comparison only makes sense when comparing to end (eof).
- //
- bool operator== (iterator y) const {return e_ == eof && y.e_ == eof;}
- bool operator!= (iterator y) const {return !(*this == y);}
- private:
- parser* p_;
- event_type e_;
- };
- iterator begin () {return iterator (this, next ());}
- iterator end () {return iterator (this, eof);}
- private:
- static void XMLCALL
- start_element_ (void*, const XML_Char*, const XML_Char**);
- static void XMLCALL
- end_element_ (void*, const XML_Char*);
- static void XMLCALL
- characters_ (void*, const XML_Char*, int);
- static void XMLCALL
- start_namespace_decl_ (void*, const XML_Char*, const XML_Char*);
- static void XMLCALL
- end_namespace_decl_ (void*, const XML_Char*);
- private:
- void
- init ();
- event_type
- next_ (bool peek);
- event_type
- next_body ();
- void
- handle_error ();
- private:
- // If size_ is 0, then data is std::istream. Otherwise, it is a buffer.
- //
- union
- {
- std::istream* is;
- const void* buf;
- } data_;
- std::size_t size_;
- const std::string iname_;
- feature_type feature_;
- XML_Parser p_;
- std::size_t depth_;
- bool accumulate_; // Whether we are accumulating character content.
- enum {state_next, state_peek} state_;
- event_type event_;
- event_type queue_;
- qname_type qname_;
- std::string value_;
- // These are used to avoid copying when we are handling attributes
- // and namespace decls.
- //
- const qname_type* pqname_;
- std::string* pvalue_;
- unsigned long long line_;
- unsigned long long column_;
- // Attributes as events.
- //
- struct attribute_type
- {
- qname_type qname;
- std::string value;
- };
- typedef std::vector<attribute_type> attributes;
- attributes attr_;
- attributes::size_type attr_i_; // Index of the current attribute.
- // Namespace declarations.
- //
- typedef std::vector<qname_type> namespace_decls;
- namespace_decls start_ns_;
- namespace_decls::size_type start_ns_i_; // Index of the current decl.
- namespace_decls end_ns_;
- namespace_decls::size_type end_ns_i_; // Index of the current decl.
- // Element state consisting of the content model and attribute map.
- //
- struct element_entry
- {
- element_entry (std::size_t d, content_type c = content_type::mixed)
- : depth (d), content (c), attr_unhandled_ (0) {}
- std::size_t depth;
- content_type content;
- attribute_map_type attr_map_;
- mutable attribute_map_type::size_type attr_unhandled_;
- };
- typedef std::vector<element_entry> element_state;
- std::vector<element_entry> element_state_;
- // Empty attribute map to return when an element has no attributes.
- //
- const attribute_map_type empty_attr_map_;
- // Return the element entry corresponding to the current depth, if
- // exists, and NULL otherwise.
- //
- const element_entry*
- get_element () const;
- const element_entry*
- get_element_ () const;
- void
- pop_element ();
- };
- LIBSTUDXML_EXPORT
- std::ostream&
- operator<< (std::ostream&, parser::event_type);
- }
- #include <xml/parser.ixx>
- #include <xml/parser.txx>
- #include <xml/details/post.hxx>
- #endif // XML_PARSER
|