parser.cxx 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961
  1. // file : xml/parser.cxx
  2. // copyright : Copyright (c) 2013-2014 Code Synthesis Tools CC
  3. // license : MIT; see accompanying LICENSE file
  4. #include <new> // std::bad_alloc
  5. #include <cassert>
  6. #include <cstring> // std::strchr
  7. #include <istream>
  8. #include <ostream>
  9. #include <sstream>
  10. #include <xml/parser>
  11. using namespace std;
  12. namespace xml
  13. {
  14. // parsing
  15. //
  16. parsing::~parsing () throw () {}
  17. parsing::parsing (const string& n,
  18. unsigned long long l,
  19. unsigned long long c,
  20. const string& d)
  21. : name_ (n), line_ (l), column_ (c), description_ (d)
  22. {
  23. init ();
  24. }
  25. parsing::parsing (const parser& p, const std::string& d)
  26. : name_ (p.input_name ()),
  27. line_ (p.line ()),
  28. column_ (p.column ()),
  29. description_ (d)
  30. {
  31. init ();
  32. }
  33. void parsing::
  34. init ()
  35. {
  36. ostringstream os;
  37. if (!name_.empty ())
  38. os << name_ << ':';
  39. os << line_ << ':' << column_ << ": error: " << description_;
  40. what_ = os.str ();
  41. }
  42. char const* parsing::
  43. what () const throw ()
  44. {
  45. return what_.c_str ();
  46. }
  47. // parser::event_type
  48. //
  49. static const char* parser_event_str[] =
  50. {
  51. "start element",
  52. "end element",
  53. "start attribute",
  54. "end attribute",
  55. "characters",
  56. "start namespace declaration",
  57. "end namespace declaration",
  58. "end of file"
  59. };
  60. ostream&
  61. operator<< (ostream& os, parser::event_type e)
  62. {
  63. return os << parser_event_str[e];
  64. }
  65. // parser
  66. //
  67. parser::
  68. ~parser ()
  69. {
  70. if (p_ != 0)
  71. XML_ParserFree (p_);
  72. }
  73. void parser::
  74. init ()
  75. {
  76. depth_ = 0;
  77. state_ = state_next;
  78. event_ = eof;
  79. queue_ = eof;
  80. pqname_ = &qname_;
  81. pvalue_ = &value_;
  82. line_ = 0;
  83. column_ = 0;
  84. attr_i_ = 0;
  85. start_ns_i_ = 0;
  86. end_ns_i_ = 0;
  87. if ((feature_ & receive_attributes_map) != 0 &&
  88. (feature_ & receive_attributes_event) != 0)
  89. feature_ &= ~receive_attributes_map;
  90. // Allocate the parser. Make sure nothing else can throw after
  91. // this call since otherwise we will leak it.
  92. //
  93. p_ = XML_ParserCreateNS (0, XML_Char (' '));
  94. if (p_ == 0)
  95. throw bad_alloc ();
  96. // Get prefixes in addition to namespaces and local names.
  97. //
  98. XML_SetReturnNSTriplet (p_, true);
  99. // Set handlers.
  100. //
  101. XML_SetUserData(p_, this);
  102. if ((feature_ & receive_elements) != 0)
  103. {
  104. XML_SetStartElementHandler (p_, &start_element_);
  105. XML_SetEndElementHandler (p_, &end_element_);
  106. }
  107. if ((feature_ & receive_characters) != 0)
  108. XML_SetCharacterDataHandler (p_, &characters_);
  109. if ((feature_ & receive_namespace_decls) != 0)
  110. XML_SetNamespaceDeclHandler (p_,
  111. &start_namespace_decl_,
  112. &end_namespace_decl_);
  113. }
  114. void parser::
  115. handle_error ()
  116. {
  117. XML_Error e (XML_GetErrorCode (p_));
  118. if (e == XML_ERROR_ABORTED)
  119. {
  120. // For now we only abort the parser in the characters_() and
  121. // start_element_() handlers.
  122. //
  123. switch (content ())
  124. {
  125. case content_type::empty:
  126. throw parsing (*this, "characters in empty content");
  127. case content_type::simple:
  128. throw parsing (*this, "element in simple content");
  129. case content_type::complex:
  130. throw parsing (*this, "characters in complex content");
  131. default:
  132. assert (false);
  133. }
  134. }
  135. else
  136. throw parsing (iname_,
  137. XML_GetCurrentLineNumber (p_),
  138. XML_GetCurrentColumnNumber (p_),
  139. XML_ErrorString (e));
  140. }
  141. struct stream_exception_controller
  142. {
  143. ~stream_exception_controller ()
  144. {
  145. istream::iostate s = is_.rdstate ();
  146. s &= ~istream::failbit;
  147. // If our error state (sans failbit) intersects with the
  148. // exception state then that means we have an active
  149. // exception and changing error/exception state will
  150. // cause another to be thrown.
  151. //
  152. if (!(old_state_ & s))
  153. {
  154. // Clear failbit if it was caused by eof.
  155. //
  156. if (is_.fail () && is_.eof ())
  157. is_.clear (s);
  158. is_.exceptions (old_state_);
  159. }
  160. }
  161. stream_exception_controller (istream& is)
  162. : is_ (is), old_state_ (is_.exceptions ())
  163. {
  164. is_.exceptions (old_state_ & ~istream::failbit);
  165. }
  166. private:
  167. stream_exception_controller (const stream_exception_controller&);
  168. stream_exception_controller&
  169. operator= (const stream_exception_controller&);
  170. private:
  171. istream& is_;
  172. istream::iostate old_state_;
  173. };
  174. parser::event_type parser::
  175. next ()
  176. {
  177. if (state_ == state_next)
  178. return next_ (false);
  179. else
  180. {
  181. // If we previously peeked at start/end_element, then adjust
  182. // state accordingly.
  183. //
  184. switch (event_)
  185. {
  186. case end_element:
  187. {
  188. if (!element_state_.empty () &&
  189. element_state_.back ().depth == depth_)
  190. pop_element ();
  191. depth_--;
  192. break;
  193. }
  194. case start_element:
  195. {
  196. depth_++;
  197. break;
  198. }
  199. default:
  200. break;
  201. }
  202. state_ = state_next;
  203. return event_;
  204. }
  205. }
  206. const string& parser::
  207. attribute (const qname_type& qn) const
  208. {
  209. if (const element_entry* e = get_element ())
  210. {
  211. attribute_map_type::const_iterator i (e->attr_map_.find (qn));
  212. if (i != e->attr_map_.end ())
  213. {
  214. if (!i->second.handled)
  215. {
  216. i->second.handled = true;
  217. e->attr_unhandled_--;
  218. }
  219. return i->second.value;
  220. }
  221. }
  222. throw parsing (*this, "attribute '" + qn.string () + "' expected");
  223. }
  224. string parser::
  225. attribute (const qname_type& qn, const string& dv) const
  226. {
  227. if (const element_entry* e = get_element ())
  228. {
  229. attribute_map_type::const_iterator i (e->attr_map_.find (qn));
  230. if (i != e->attr_map_.end ())
  231. {
  232. if (!i->second.handled)
  233. {
  234. i->second.handled = true;
  235. e->attr_unhandled_--;
  236. }
  237. return i->second.value;
  238. }
  239. }
  240. return dv;
  241. }
  242. bool parser::
  243. attribute_present (const qname_type& qn) const
  244. {
  245. if (const element_entry* e = get_element ())
  246. {
  247. attribute_map_type::const_iterator i (e->attr_map_.find (qn));
  248. if (i != e->attr_map_.end ())
  249. {
  250. if (!i->second.handled)
  251. {
  252. i->second.handled = true;
  253. e->attr_unhandled_--;
  254. }
  255. return true;
  256. }
  257. }
  258. return false;
  259. }
  260. void parser::
  261. next_expect (event_type e)
  262. {
  263. if (next () != e)
  264. throw parsing (*this, string (parser_event_str[e]) + " expected");
  265. }
  266. void parser::
  267. next_expect (event_type e, const string& ns, const string& n)
  268. {
  269. if (next () != e || namespace_ () != ns || name () != n)
  270. throw parsing (*this,
  271. string (parser_event_str[e]) + " '" +
  272. qname_type (ns, n).string () + "' expected");
  273. }
  274. string parser::
  275. element ()
  276. {
  277. content (content_type::simple);
  278. string r;
  279. // The content of the element can be empty in which case there
  280. // will be no characters event.
  281. //
  282. event_type e (next ());
  283. if (e == characters)
  284. {
  285. r.swap (value ());
  286. e = next ();
  287. }
  288. // We cannot really get anything other than end_element since
  289. // the simple content validation won't allow it.
  290. //
  291. assert (e == end_element);
  292. return r;
  293. }
  294. string parser::
  295. element (const qname_type& qn, const string& dv)
  296. {
  297. if (peek () == start_element && qname () == qn)
  298. {
  299. next ();
  300. return element ();
  301. }
  302. return dv;
  303. }
  304. const parser::element_entry* parser::
  305. get_element_ () const
  306. {
  307. // The start_element_() Expat handler may have already provisioned
  308. // an entry in the element stack. In this case, we need to get the
  309. // one before it, if any.
  310. //
  311. const element_entry* r (0);
  312. element_state::size_type n (element_state_.size () - 1);
  313. if (element_state_[n].depth == depth_)
  314. r = &element_state_[n];
  315. else if (n != 0 && element_state_[n].depth > depth_)
  316. {
  317. n--;
  318. if (element_state_[n].depth == depth_)
  319. r = &element_state_[n];
  320. }
  321. return r;
  322. }
  323. void parser::
  324. pop_element ()
  325. {
  326. // Make sure there are no unhandled attributes left.
  327. //
  328. const element_entry& e (element_state_.back ());
  329. if (e.attr_unhandled_ != 0)
  330. {
  331. // Find the first unhandled attribute and report it.
  332. //
  333. for (attribute_map_type::const_iterator i (e.attr_map_.begin ());
  334. i != e.attr_map_.end (); ++i)
  335. {
  336. if (!i->second.handled)
  337. throw parsing (
  338. *this, "unexpected attribute '" + i->first.string () + "'");
  339. }
  340. assert (false);
  341. }
  342. element_state_.pop_back ();
  343. }
  344. parser::event_type parser::
  345. next_ (bool peek)
  346. {
  347. event_type e (next_body ());
  348. // Content-specific processing. Note that we handle characters in the
  349. // characters_() Expat handler for two reasons. Firstly, it is faster
  350. // to ignore the whitespaces at the source. Secondly, this allows us
  351. // to distinguish between element and attribute characters. We can
  352. // move this processing to the handler because the characters event
  353. // is never queued.
  354. //
  355. switch (e)
  356. {
  357. case end_element:
  358. {
  359. // If this is a peek, then avoid popping the stack just yet.
  360. // This way, the attribute map will still be valid until we
  361. // call next().
  362. //
  363. if (!peek)
  364. {
  365. if (!element_state_.empty () &&
  366. element_state_.back ().depth == depth_)
  367. pop_element ();
  368. depth_--;
  369. }
  370. break;
  371. }
  372. case start_element:
  373. {
  374. if (const element_entry* e = get_element ())
  375. {
  376. switch (e->content)
  377. {
  378. case content_type::empty:
  379. throw parsing (*this, "element in empty content");
  380. case content_type::simple:
  381. throw parsing (*this, "element in simple content");
  382. default:
  383. break;
  384. }
  385. }
  386. // If this is a peek, then delay adjusting the depth.
  387. //
  388. if (!peek)
  389. depth_++;
  390. break;
  391. }
  392. default:
  393. break;
  394. }
  395. return e;
  396. }
  397. parser::event_type parser::
  398. next_body ()
  399. {
  400. // See if we have any start namespace declarations we need to return.
  401. //
  402. if (start_ns_i_ < start_ns_.size ())
  403. {
  404. // Based on the previous event determine what's the next one must be.
  405. //
  406. switch (event_)
  407. {
  408. case start_namespace_decl:
  409. {
  410. if (++start_ns_i_ == start_ns_.size ())
  411. {
  412. start_ns_i_ = 0;
  413. start_ns_.clear ();
  414. pqname_ = &qname_;
  415. break; // No more declarations.
  416. }
  417. // Fall through.
  418. }
  419. case start_element:
  420. {
  421. event_ = start_namespace_decl;
  422. pqname_ = &start_ns_[start_ns_i_];
  423. return event_;
  424. }
  425. default:
  426. {
  427. assert (false);
  428. return event_ = eof;
  429. }
  430. }
  431. }
  432. // See if we have any attributes we need to return as events.
  433. //
  434. if (attr_i_ < attr_.size ())
  435. {
  436. // Based on the previous event determine what's the next one must be.
  437. //
  438. switch (event_)
  439. {
  440. case start_attribute:
  441. {
  442. event_ = characters;
  443. pvalue_ = &attr_[attr_i_].value;
  444. return event_;
  445. }
  446. case characters:
  447. {
  448. event_ = end_attribute; // Name is already set.
  449. return event_;
  450. }
  451. case end_attribute:
  452. {
  453. if (++attr_i_ == attr_.size ())
  454. {
  455. attr_i_ = 0;
  456. attr_.clear ();
  457. pqname_ = &qname_;
  458. pvalue_ = &value_;
  459. break; // No more attributes.
  460. }
  461. // Fall through.
  462. }
  463. case start_element:
  464. case start_namespace_decl:
  465. {
  466. event_ = start_attribute;
  467. pqname_ = &attr_[attr_i_].qname;
  468. return event_;
  469. }
  470. default:
  471. {
  472. assert (false);
  473. return event_ = eof;
  474. }
  475. }
  476. }
  477. // See if we have any end namespace declarations we need to return.
  478. //
  479. if (end_ns_i_ < end_ns_.size ())
  480. {
  481. // Based on the previous event determine what's the next one must be.
  482. //
  483. switch (event_)
  484. {
  485. case end_namespace_decl:
  486. {
  487. if (++end_ns_i_ == end_ns_.size ())
  488. {
  489. end_ns_i_ = 0;
  490. end_ns_.clear ();
  491. pqname_ = &qname_;
  492. break; // No more declarations.
  493. }
  494. // Fall through.
  495. }
  496. // The end namespace declaration comes before the end element
  497. // which means it can follow pretty much any other event.
  498. //
  499. default:
  500. {
  501. event_ = end_namespace_decl;
  502. pqname_ = &end_ns_[end_ns_i_];
  503. return event_;
  504. }
  505. }
  506. }
  507. // Check the queue.
  508. //
  509. if (queue_ != eof)
  510. {
  511. event_ = queue_;
  512. queue_ = eof;
  513. line_ = XML_GetCurrentLineNumber (p_);
  514. column_ = XML_GetCurrentColumnNumber (p_);
  515. return event_;
  516. }
  517. // Reset the character accumulation flag.
  518. //
  519. accumulate_ = false;
  520. XML_ParsingStatus ps;
  521. XML_GetParsingStatus (p_, &ps);
  522. switch (ps.parsing)
  523. {
  524. case XML_INITIALIZED:
  525. {
  526. // As if we finished the previous chunk.
  527. break;
  528. }
  529. case XML_PARSING:
  530. {
  531. assert (false);
  532. return event_ = eof;
  533. }
  534. case XML_FINISHED:
  535. {
  536. return event_ = eof;
  537. }
  538. case XML_SUSPENDED:
  539. {
  540. switch (XML_ResumeParser (p_))
  541. {
  542. case XML_STATUS_SUSPENDED:
  543. {
  544. // If the parser is again in the suspended state, then
  545. // that means we have the next event.
  546. //
  547. return event_;
  548. }
  549. case XML_STATUS_OK:
  550. {
  551. // Otherwise, we need to get and parse the next chunk of data
  552. // unless this was the last chunk, in which case this is eof.
  553. //
  554. if (ps.finalBuffer)
  555. return event_ = eof;
  556. break;
  557. }
  558. case XML_STATUS_ERROR:
  559. handle_error ();
  560. }
  561. break;
  562. }
  563. }
  564. // Get and parse the next chunk of data until we get the next event
  565. // or reach eof.
  566. //
  567. if (!accumulate_)
  568. event_ = eof;
  569. XML_Status s;
  570. do
  571. {
  572. if (size_ != 0)
  573. {
  574. s = XML_Parse (p_,
  575. static_cast <const char*> (data_.buf),
  576. static_cast <int> (size_),
  577. true);
  578. if (s == XML_STATUS_ERROR)
  579. handle_error ();
  580. break;
  581. }
  582. else
  583. {
  584. const size_t cap (4096);
  585. char* b (static_cast<char*> (XML_GetBuffer (p_, cap)));
  586. if (b == 0)
  587. throw bad_alloc ();
  588. // Temporarily unset the exception failbit. Also clear the fail bit
  589. // when we reset the old state if it was caused by eof.
  590. //
  591. istream& is (*data_.is);
  592. {
  593. stream_exception_controller sec (is);
  594. is.read (b, static_cast<streamsize> (cap));
  595. }
  596. // If the caller hasn't configured the stream to use exceptions,
  597. // then use the parsing exception to report an error.
  598. //
  599. if (is.bad () || (is.fail () && !is.eof ()))
  600. throw parsing (*this, "io failure");
  601. bool eof (is.eof ());
  602. s = XML_ParseBuffer (p_, static_cast<int> (is.gcount ()), eof);
  603. if (s == XML_STATUS_ERROR)
  604. handle_error ();
  605. if (eof)
  606. break;
  607. }
  608. } while (s != XML_STATUS_SUSPENDED);
  609. return event_;
  610. }
  611. static void
  612. split_name (const XML_Char* s, qname& qn)
  613. {
  614. string& ns (qn.namespace_ ());
  615. string& name (qn.name ());
  616. string& prefix (qn.prefix ());
  617. const char* p (strchr (s, ' '));
  618. if (p == 0)
  619. {
  620. ns.clear ();
  621. name = s;
  622. prefix.clear ();
  623. }
  624. else
  625. {
  626. ns.assign (s, 0, p - s);
  627. s = p + 1;
  628. p = strchr (s, ' ');
  629. if (p == 0)
  630. {
  631. name = s;
  632. prefix.clear ();
  633. }
  634. else
  635. {
  636. name.assign (s, 0, p - s);
  637. prefix = p + 1;
  638. }
  639. }
  640. }
  641. void XMLCALL parser::start_element_ (void* v, const XML_Char* name, const XML_Char** atts)
  642. {
  643. parser& p (*static_cast<parser*> (v));
  644. XML_ParsingStatus ps;
  645. XML_GetParsingStatus (p.p_, &ps);
  646. // Expat has a (mis)-feature of a possibily calling handlers even
  647. // after the non-resumable XML_StopParser call.
  648. //
  649. if (ps.parsing == XML_FINISHED)
  650. return;
  651. // Cannot be a followup event.
  652. //
  653. assert (ps.parsing == XML_PARSING);
  654. // When accumulating characters in simple content, we expect to
  655. // see more characters or end element. Seeing start element is
  656. // possible but means violation of the content model.
  657. //
  658. if (p.accumulate_)
  659. {
  660. // It would have been easier to throw the exception directly,
  661. // however, the Expat code is most likely not exception safe.
  662. //
  663. p.line_ = XML_GetCurrentLineNumber (p.p_);
  664. p.column_ = XML_GetCurrentColumnNumber (p.p_);
  665. XML_StopParser (p.p_, false);
  666. return;
  667. }
  668. p.event_ = start_element;
  669. split_name (name, p.qname_);
  670. p.line_ = XML_GetCurrentLineNumber (p.p_);
  671. p.column_ = XML_GetCurrentColumnNumber (p.p_);
  672. // Handle attributes.
  673. //
  674. if (*atts != 0)
  675. {
  676. bool am ((p.feature_ & receive_attributes_map) != 0);
  677. bool ae ((p.feature_ & receive_attributes_event) != 0);
  678. // Provision an entry for this element.
  679. //
  680. element_entry* pe (0);
  681. if (am)
  682. {
  683. p.element_state_.push_back (element_entry (p.depth_ + 1));
  684. pe = &p.element_state_.back ();
  685. }
  686. if (am || ae)
  687. {
  688. for (; *atts != 0; atts += 2)
  689. {
  690. if (am)
  691. {
  692. qname_type qn;
  693. split_name (*atts, qn);
  694. attribute_map_type::value_type v (qn, attribute_value_type ());
  695. v.second.value = *(atts + 1);
  696. v.second.handled = false;
  697. pe->attr_map_.insert (v);
  698. }
  699. else
  700. {
  701. p.attr_.push_back (attribute_type ());
  702. split_name (*atts, p.attr_.back ().qname);
  703. p.attr_.back ().value = *(atts + 1);
  704. }
  705. }
  706. if (am)
  707. pe->attr_unhandled_ = pe->attr_map_.size ();
  708. }
  709. }
  710. XML_StopParser (p.p_, true);
  711. }
  712. void XMLCALL parser::end_element_ (void* v, const XML_Char* name)
  713. {
  714. parser& p (*static_cast<parser*> (v));
  715. XML_ParsingStatus ps;
  716. XML_GetParsingStatus (p.p_, &ps);
  717. // Expat has a (mis)-feature of a possibily calling handlers even
  718. // after the non-resumable XML_StopParser call.
  719. //
  720. if (ps.parsing == XML_FINISHED)
  721. return;
  722. // This can be a followup event for empty elements (<foo/>). In this
  723. // case the element name is already set.
  724. //
  725. if (ps.parsing != XML_PARSING)
  726. p.queue_ = end_element;
  727. else
  728. {
  729. split_name (name, p.qname_);
  730. // If we are accumulating characters, then queue this event.
  731. //
  732. if (p.accumulate_)
  733. p.queue_ = end_element;
  734. else
  735. {
  736. p.event_ = end_element;
  737. p.line_ = XML_GetCurrentLineNumber (p.p_);
  738. p.column_ = XML_GetCurrentColumnNumber (p.p_);
  739. }
  740. XML_StopParser (p.p_, true);
  741. }
  742. }
  743. void XMLCALL parser::
  744. characters_ (void* v, const XML_Char* s, int n)
  745. {
  746. parser& p (*static_cast<parser*> (v));
  747. XML_ParsingStatus ps;
  748. XML_GetParsingStatus (p.p_, &ps);
  749. // Expat has a (mis)-feature of a possibily calling handlers even
  750. // after the non-resumable XML_StopParser call.
  751. //
  752. if (ps.parsing == XML_FINISHED)
  753. return;
  754. content_type cont (p.content ());
  755. // If this is empty or complex content, see if these are whitespaces.
  756. //
  757. switch (cont)
  758. {
  759. case content_type::empty:
  760. case content_type::complex:
  761. {
  762. for (int i (0); i != n; ++i)
  763. {
  764. char c (s[i]);
  765. if (c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09)
  766. continue;
  767. // It would have been easier to throw the exception directly,
  768. // however, the Expat code is most likely not exception safe.
  769. //
  770. p.line_ = XML_GetCurrentLineNumber (p.p_);
  771. p.column_ = XML_GetCurrentColumnNumber (p.p_);
  772. XML_StopParser (p.p_, false);
  773. break;
  774. }
  775. return;
  776. }
  777. default:
  778. break;
  779. }
  780. // Append the characters if we are accumulating. This can also be a
  781. // followup event for another character event. In this case also
  782. // append the data.
  783. //
  784. if (p.accumulate_ || ps.parsing != XML_PARSING)
  785. {
  786. assert (p.event_ == characters);
  787. p.value_.append (s, n);
  788. }
  789. else
  790. {
  791. p.event_ = characters;
  792. p.value_.assign (s, n);
  793. p.line_ = XML_GetCurrentLineNumber (p.p_);
  794. p.column_ = XML_GetCurrentColumnNumber (p.p_);
  795. // In simple content we need to accumulate all the characters
  796. // into a single event. To do this we will let the parser run
  797. // until we reach the end of the element.
  798. //
  799. if (cont == content_type::simple)
  800. p.accumulate_ = true;
  801. else
  802. XML_StopParser (p.p_, true);
  803. }
  804. }
  805. void XMLCALL parser::
  806. start_namespace_decl_ (void* v, const XML_Char* prefix, const XML_Char* ns)
  807. {
  808. parser& p (*static_cast<parser*> (v));
  809. XML_ParsingStatus ps;
  810. XML_GetParsingStatus (p.p_, &ps);
  811. // Expat has a (mis)-feature of a possibily calling handlers even
  812. // after the non-resumable XML_StopParser call.
  813. //
  814. if (ps.parsing == XML_FINISHED)
  815. return;
  816. p.start_ns_.push_back (qname_type ());
  817. p.start_ns_.back ().prefix () = (prefix != 0 ? prefix : "");
  818. p.start_ns_.back ().namespace_ () = (ns != 0 ? ns : "");
  819. }
  820. void XMLCALL parser::
  821. end_namespace_decl_ (void* v, const XML_Char* prefix)
  822. {
  823. parser& p (*static_cast<parser*> (v));
  824. XML_ParsingStatus ps;
  825. XML_GetParsingStatus (p.p_, &ps);
  826. // Expat has a (mis)-feature of a possibily calling handlers even
  827. // after the non-resumable XML_StopParser call.
  828. //
  829. if (ps.parsing == XML_FINISHED)
  830. return;
  831. p.end_ns_.push_back (qname_type ());
  832. p.end_ns_.back ().prefix () = (prefix != 0 ? prefix : "");
  833. }
  834. }