parser.cxx 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966
  1. // file : xml/parser.cxx
  2. // copyright : Copyright (c) 2013-2014 Code Synthesis Tools CC
  3. // license : MIT; see accompanying LICENSE file
  4. #include <new> // std::bad_alloc
  5. #include <cassert>
  6. #include <cstring> // std::strchr
  7. #include <istream>
  8. #include <ostream>
  9. #include <sstream>
  10. #include <xml/parser>
  11. using namespace std;
  12. namespace xml
  13. {
  14. // parsing
  15. //
  16. parsing::
  17. ~parsing () throw () {}
  18. parsing::
  19. parsing (const string& n,
  20. unsigned long long l,
  21. unsigned long long c,
  22. const string& d)
  23. : name_ (n), line_ (l), column_ (c), description_ (d)
  24. {
  25. init ();
  26. }
  27. parsing::
  28. parsing (const parser& p, const std::string& d)
  29. : name_ (p.input_name ()),
  30. line_ (p.line ()),
  31. column_ (p.column ()),
  32. description_ (d)
  33. {
  34. init ();
  35. }
  36. void parsing::
  37. init ()
  38. {
  39. ostringstream os;
  40. if (!name_.empty ())
  41. os << name_ << ':';
  42. os << line_ << ':' << column_ << ": error: " << description_;
  43. what_ = os.str ();
  44. }
  45. char const* parsing::
  46. what () const throw ()
  47. {
  48. return what_.c_str ();
  49. }
  50. // parser::event_type
  51. //
  52. static const char* parser_event_str[] =
  53. {
  54. "start element",
  55. "end element",
  56. "start attribute",
  57. "end attribute",
  58. "characters",
  59. "start namespace declaration",
  60. "end namespace declaration",
  61. "end of file"
  62. };
  63. ostream&
  64. operator<< (ostream& os, parser::event_type e)
  65. {
  66. return os << parser_event_str[e];
  67. }
  68. // parser
  69. //
  70. parser::
  71. ~parser ()
  72. {
  73. if (p_ != 0)
  74. XML_ParserFree (p_);
  75. }
  76. void parser::
  77. init ()
  78. {
  79. depth_ = 0;
  80. state_ = state_next;
  81. event_ = eof;
  82. queue_ = eof;
  83. pqname_ = &qname_;
  84. pvalue_ = &value_;
  85. line_ = 0;
  86. column_ = 0;
  87. attr_i_ = 0;
  88. start_ns_i_ = 0;
  89. end_ns_i_ = 0;
  90. if ((feature_ & receive_attributes_map) != 0 &&
  91. (feature_ & receive_attributes_event) != 0)
  92. feature_ &= ~receive_attributes_map;
  93. // Allocate the parser. Make sure nothing else can throw after
  94. // this call since otherwise we will leak it.
  95. //
  96. p_ = XML_ParserCreateNS (0, XML_Char (' '));
  97. if (p_ == 0)
  98. throw bad_alloc ();
  99. // Get prefixes in addition to namespaces and local names.
  100. //
  101. XML_SetReturnNSTriplet (p_, true);
  102. // Set handlers.
  103. //
  104. XML_SetUserData(p_, this);
  105. if ((feature_ & receive_elements) != 0)
  106. {
  107. XML_SetStartElementHandler (p_, &start_element_);
  108. XML_SetEndElementHandler (p_, &end_element_);
  109. }
  110. if ((feature_ & receive_characters) != 0)
  111. XML_SetCharacterDataHandler (p_, &characters_);
  112. if ((feature_ & receive_namespace_decls) != 0)
  113. XML_SetNamespaceDeclHandler (p_,
  114. &start_namespace_decl_,
  115. &end_namespace_decl_);
  116. }
  117. void parser::
  118. handle_error ()
  119. {
  120. XML_Error e (XML_GetErrorCode (p_));
  121. if (e == XML_ERROR_ABORTED)
  122. {
  123. // For now we only abort the parser in the characters_() and
  124. // start_element_() handlers.
  125. //
  126. switch (content ())
  127. {
  128. case content_type::empty:
  129. throw parsing (*this, "characters in empty content");
  130. case content_type::simple:
  131. throw parsing (*this, "element in simple content");
  132. case content_type::complex:
  133. throw parsing (*this, "characters in complex content");
  134. default:
  135. assert (false);
  136. }
  137. }
  138. else
  139. throw parsing (iname_,
  140. XML_GetCurrentLineNumber (p_),
  141. XML_GetCurrentColumnNumber (p_),
  142. XML_ErrorString (e));
  143. }
  144. struct stream_exception_controller
  145. {
  146. ~stream_exception_controller ()
  147. {
  148. istream::iostate s = is_.rdstate ();
  149. s &= ~istream::failbit;
  150. // If our error state (sans failbit) intersects with the
  151. // exception state then that means we have an active
  152. // exception and changing error/exception state will
  153. // cause another to be thrown.
  154. //
  155. if (!(old_state_ & s))
  156. {
  157. // Clear failbit if it was caused by eof.
  158. //
  159. if (is_.fail () && is_.eof ())
  160. is_.clear (s);
  161. is_.exceptions (old_state_);
  162. }
  163. }
  164. stream_exception_controller (istream& is)
  165. : is_ (is), old_state_ (is_.exceptions ())
  166. {
  167. is_.exceptions (old_state_ & ~istream::failbit);
  168. }
  169. private:
  170. stream_exception_controller (const stream_exception_controller&);
  171. stream_exception_controller&
  172. operator= (const stream_exception_controller&);
  173. private:
  174. istream& is_;
  175. istream::iostate old_state_;
  176. };
  177. parser::event_type parser::
  178. next ()
  179. {
  180. if (state_ == state_next)
  181. return next_ (false);
  182. else
  183. {
  184. // If we previously peeked at start/end_element, then adjust
  185. // state accordingly.
  186. //
  187. switch (event_)
  188. {
  189. case end_element:
  190. {
  191. if (!element_state_.empty () &&
  192. element_state_.back ().depth == depth_)
  193. pop_element ();
  194. depth_--;
  195. break;
  196. }
  197. case start_element:
  198. {
  199. depth_++;
  200. break;
  201. }
  202. default:
  203. break;
  204. }
  205. state_ = state_next;
  206. return event_;
  207. }
  208. }
  209. const string& parser::
  210. attribute (const qname_type& qn) const
  211. {
  212. if (const element_entry* e = get_element ())
  213. {
  214. attribute_map_type::const_iterator i (e->attr_map_.find (qn));
  215. if (i != e->attr_map_.end ())
  216. {
  217. if (!i->second.handled)
  218. {
  219. i->second.handled = true;
  220. e->attr_unhandled_--;
  221. }
  222. return i->second.value;
  223. }
  224. }
  225. throw parsing (*this, "attribute '" + qn.string () + "' expected");
  226. }
  227. string parser::
  228. attribute (const qname_type& qn, const string& dv) const
  229. {
  230. if (const element_entry* e = get_element ())
  231. {
  232. attribute_map_type::const_iterator i (e->attr_map_.find (qn));
  233. if (i != e->attr_map_.end ())
  234. {
  235. if (!i->second.handled)
  236. {
  237. i->second.handled = true;
  238. e->attr_unhandled_--;
  239. }
  240. return i->second.value;
  241. }
  242. }
  243. return dv;
  244. }
  245. bool parser::
  246. attribute_present (const qname_type& qn) const
  247. {
  248. if (const element_entry* e = get_element ())
  249. {
  250. attribute_map_type::const_iterator i (e->attr_map_.find (qn));
  251. if (i != e->attr_map_.end ())
  252. {
  253. if (!i->second.handled)
  254. {
  255. i->second.handled = true;
  256. e->attr_unhandled_--;
  257. }
  258. return true;
  259. }
  260. }
  261. return false;
  262. }
  263. void parser::
  264. next_expect (event_type e)
  265. {
  266. if (next () != e)
  267. throw parsing (*this, string (parser_event_str[e]) + " expected");
  268. }
  269. void parser::
  270. next_expect (event_type e, const string& ns, const string& n)
  271. {
  272. if (next () != e || namespace_ () != ns || name () != n)
  273. throw parsing (*this,
  274. string (parser_event_str[e]) + " '" +
  275. qname_type (ns, n).string () + "' expected");
  276. }
  277. string parser::
  278. element ()
  279. {
  280. content (content_type::simple);
  281. string r;
  282. // The content of the element can be empty in which case there
  283. // will be no characters event.
  284. //
  285. event_type e (next ());
  286. if (e == characters)
  287. {
  288. r.swap (value ());
  289. e = next ();
  290. }
  291. // We cannot really get anything other than end_element since
  292. // the simple content validation won't allow it.
  293. //
  294. assert (e == end_element);
  295. return r;
  296. }
  297. string parser::
  298. element (const qname_type& qn, const string& dv)
  299. {
  300. if (peek () == start_element && qname () == qn)
  301. {
  302. next ();
  303. return element ();
  304. }
  305. return dv;
  306. }
  307. const parser::element_entry* parser::
  308. get_element_ () const
  309. {
  310. // The start_element_() Expat handler may have already provisioned
  311. // an entry in the element stack. In this case, we need to get the
  312. // one before it, if any.
  313. //
  314. const element_entry* r (0);
  315. element_state::size_type n (element_state_.size () - 1);
  316. if (element_state_[n].depth == depth_)
  317. r = &element_state_[n];
  318. else if (n != 0 && element_state_[n].depth > depth_)
  319. {
  320. n--;
  321. if (element_state_[n].depth == depth_)
  322. r = &element_state_[n];
  323. }
  324. return r;
  325. }
  326. void parser::
  327. pop_element ()
  328. {
  329. // Make sure there are no unhandled attributes left.
  330. //
  331. const element_entry& e (element_state_.back ());
  332. if (e.attr_unhandled_ != 0)
  333. {
  334. // Find the first unhandled attribute and report it.
  335. //
  336. for (attribute_map_type::const_iterator i (e.attr_map_.begin ());
  337. i != e.attr_map_.end (); ++i)
  338. {
  339. if (!i->second.handled)
  340. throw parsing (
  341. *this, "unexpected attribute '" + i->first.string () + "'");
  342. }
  343. assert (false);
  344. }
  345. element_state_.pop_back ();
  346. }
  347. parser::event_type parser::
  348. next_ (bool peek)
  349. {
  350. event_type e (next_body ());
  351. // Content-specific processing. Note that we handle characters in the
  352. // characters_() Expat handler for two reasons. Firstly, it is faster
  353. // to ignore the whitespaces at the source. Secondly, this allows us
  354. // to distinguish between element and attribute characters. We can
  355. // move this processing to the handler because the characters event
  356. // is never queued.
  357. //
  358. switch (e)
  359. {
  360. case end_element:
  361. {
  362. // If this is a peek, then avoid popping the stack just yet.
  363. // This way, the attribute map will still be valid until we
  364. // call next().
  365. //
  366. if (!peek)
  367. {
  368. if (!element_state_.empty () &&
  369. element_state_.back ().depth == depth_)
  370. pop_element ();
  371. depth_--;
  372. }
  373. break;
  374. }
  375. case start_element:
  376. {
  377. if (const element_entry* e = get_element ())
  378. {
  379. switch (e->content)
  380. {
  381. case content_type::empty:
  382. throw parsing (*this, "element in empty content");
  383. case content_type::simple:
  384. throw parsing (*this, "element in simple content");
  385. default:
  386. break;
  387. }
  388. }
  389. // If this is a peek, then delay adjusting the depth.
  390. //
  391. if (!peek)
  392. depth_++;
  393. break;
  394. }
  395. default:
  396. break;
  397. }
  398. return e;
  399. }
  400. parser::event_type parser::
  401. next_body ()
  402. {
  403. // See if we have any start namespace declarations we need to return.
  404. //
  405. if (start_ns_i_ < start_ns_.size ())
  406. {
  407. // Based on the previous event determine what's the next one must be.
  408. //
  409. switch (event_)
  410. {
  411. case start_namespace_decl:
  412. {
  413. if (++start_ns_i_ == start_ns_.size ())
  414. {
  415. start_ns_i_ = 0;
  416. start_ns_.clear ();
  417. pqname_ = &qname_;
  418. break; // No more declarations.
  419. }
  420. // Fall through.
  421. }
  422. case start_element:
  423. {
  424. event_ = start_namespace_decl;
  425. pqname_ = &start_ns_[start_ns_i_];
  426. return event_;
  427. }
  428. default:
  429. {
  430. assert (false);
  431. return event_ = eof;
  432. }
  433. }
  434. }
  435. // See if we have any attributes we need to return as events.
  436. //
  437. if (attr_i_ < attr_.size ())
  438. {
  439. // Based on the previous event determine what's the next one must be.
  440. //
  441. switch (event_)
  442. {
  443. case start_attribute:
  444. {
  445. event_ = characters;
  446. pvalue_ = &attr_[attr_i_].value;
  447. return event_;
  448. }
  449. case characters:
  450. {
  451. event_ = end_attribute; // Name is already set.
  452. return event_;
  453. }
  454. case end_attribute:
  455. {
  456. if (++attr_i_ == attr_.size ())
  457. {
  458. attr_i_ = 0;
  459. attr_.clear ();
  460. pqname_ = &qname_;
  461. pvalue_ = &value_;
  462. break; // No more attributes.
  463. }
  464. // Fall through.
  465. }
  466. case start_element:
  467. case start_namespace_decl:
  468. {
  469. event_ = start_attribute;
  470. pqname_ = &attr_[attr_i_].qname;
  471. return event_;
  472. }
  473. default:
  474. {
  475. assert (false);
  476. return event_ = eof;
  477. }
  478. }
  479. }
  480. // See if we have any end namespace declarations we need to return.
  481. //
  482. if (end_ns_i_ < end_ns_.size ())
  483. {
  484. // Based on the previous event determine what's the next one must be.
  485. //
  486. switch (event_)
  487. {
  488. case end_namespace_decl:
  489. {
  490. if (++end_ns_i_ == end_ns_.size ())
  491. {
  492. end_ns_i_ = 0;
  493. end_ns_.clear ();
  494. pqname_ = &qname_;
  495. break; // No more declarations.
  496. }
  497. // Fall through.
  498. }
  499. // The end namespace declaration comes before the end element
  500. // which means it can follow pretty much any other event.
  501. //
  502. default:
  503. {
  504. event_ = end_namespace_decl;
  505. pqname_ = &end_ns_[end_ns_i_];
  506. return event_;
  507. }
  508. }
  509. }
  510. // Check the queue.
  511. //
  512. if (queue_ != eof)
  513. {
  514. event_ = queue_;
  515. queue_ = eof;
  516. line_ = XML_GetCurrentLineNumber (p_);
  517. column_ = XML_GetCurrentColumnNumber (p_);
  518. return event_;
  519. }
  520. // Reset the character accumulation flag.
  521. //
  522. accumulate_ = false;
  523. XML_ParsingStatus ps;
  524. XML_GetParsingStatus (p_, &ps);
  525. switch (ps.parsing)
  526. {
  527. case XML_INITIALIZED:
  528. {
  529. // As if we finished the previous chunk.
  530. break;
  531. }
  532. case XML_PARSING:
  533. {
  534. assert (false);
  535. return event_ = eof;
  536. }
  537. case XML_FINISHED:
  538. {
  539. return event_ = eof;
  540. }
  541. case XML_SUSPENDED:
  542. {
  543. switch (XML_ResumeParser (p_))
  544. {
  545. case XML_STATUS_SUSPENDED:
  546. {
  547. // If the parser is again in the suspended state, then
  548. // that means we have the next event.
  549. //
  550. return event_;
  551. }
  552. case XML_STATUS_OK:
  553. {
  554. // Otherwise, we need to get and parse the next chunk of data
  555. // unless this was the last chunk, in which case this is eof.
  556. //
  557. if (ps.finalBuffer)
  558. return event_ = eof;
  559. break;
  560. }
  561. case XML_STATUS_ERROR:
  562. handle_error ();
  563. }
  564. break;
  565. }
  566. }
  567. // Get and parse the next chunk of data until we get the next event
  568. // or reach eof.
  569. //
  570. if (!accumulate_)
  571. event_ = eof;
  572. XML_Status s;
  573. do
  574. {
  575. if (size_ != 0)
  576. {
  577. s = XML_Parse (p_,
  578. static_cast <const char*> (data_.buf),
  579. static_cast <int> (size_),
  580. true);
  581. if (s == XML_STATUS_ERROR)
  582. handle_error ();
  583. break;
  584. }
  585. else
  586. {
  587. const size_t cap (4096);
  588. char* b (static_cast<char*> (XML_GetBuffer (p_, cap)));
  589. if (b == 0)
  590. throw bad_alloc ();
  591. // Temporarily unset the exception failbit. Also clear the fail bit
  592. // when we reset the old state if it was caused by eof.
  593. //
  594. istream& is (*data_.is);
  595. {
  596. stream_exception_controller sec (is);
  597. is.read (b, static_cast<streamsize> (cap));
  598. }
  599. // If the caller hasn't configured the stream to use exceptions,
  600. // then use the parsing exception to report an error.
  601. //
  602. if (is.bad () || (is.fail () && !is.eof ()))
  603. throw parsing (*this, "io failure");
  604. bool eof (is.eof ());
  605. s = XML_ParseBuffer (p_, static_cast<int> (is.gcount ()), eof);
  606. if (s == XML_STATUS_ERROR)
  607. handle_error ();
  608. if (eof)
  609. break;
  610. }
  611. } while (s != XML_STATUS_SUSPENDED);
  612. return event_;
  613. }
  614. static void
  615. split_name (const XML_Char* s, qname& qn)
  616. {
  617. string& ns (qn.namespace_ ());
  618. string& name (qn.name ());
  619. string& prefix (qn.prefix ());
  620. const char* p (strchr (s, ' '));
  621. if (p == 0)
  622. {
  623. ns.clear ();
  624. name = s;
  625. prefix.clear ();
  626. }
  627. else
  628. {
  629. ns.assign (s, 0, p - s);
  630. s = p + 1;
  631. p = strchr (s, ' ');
  632. if (p == 0)
  633. {
  634. name = s;
  635. prefix.clear ();
  636. }
  637. else
  638. {
  639. name.assign (s, 0, p - s);
  640. prefix = p + 1;
  641. }
  642. }
  643. }
  644. void XMLCALL parser::
  645. start_element_ (void* v, const XML_Char* name, const XML_Char** atts)
  646. {
  647. parser& p (*static_cast<parser*> (v));
  648. XML_ParsingStatus ps;
  649. XML_GetParsingStatus (p.p_, &ps);
  650. // Expat has a (mis)-feature of a possibily calling handlers even
  651. // after the non-resumable XML_StopParser call.
  652. //
  653. if (ps.parsing == XML_FINISHED)
  654. return;
  655. // Cannot be a followup event.
  656. //
  657. assert (ps.parsing == XML_PARSING);
  658. // When accumulating characters in simple content, we expect to
  659. // see more characters or end element. Seeing start element is
  660. // possible but means violation of the content model.
  661. //
  662. if (p.accumulate_)
  663. {
  664. // It would have been easier to throw the exception directly,
  665. // however, the Expat code is most likely not exception safe.
  666. //
  667. p.line_ = XML_GetCurrentLineNumber (p.p_);
  668. p.column_ = XML_GetCurrentColumnNumber (p.p_);
  669. XML_StopParser (p.p_, false);
  670. return;
  671. }
  672. p.event_ = start_element;
  673. split_name (name, p.qname_);
  674. p.line_ = XML_GetCurrentLineNumber (p.p_);
  675. p.column_ = XML_GetCurrentColumnNumber (p.p_);
  676. // Handle attributes.
  677. //
  678. if (*atts != 0)
  679. {
  680. bool am ((p.feature_ & receive_attributes_map) != 0);
  681. bool ae ((p.feature_ & receive_attributes_event) != 0);
  682. // Provision an entry for this element.
  683. //
  684. element_entry* pe (0);
  685. if (am)
  686. {
  687. p.element_state_.push_back (element_entry (p.depth_ + 1));
  688. pe = &p.element_state_.back ();
  689. }
  690. if (am || ae)
  691. {
  692. for (; *atts != 0; atts += 2)
  693. {
  694. if (am)
  695. {
  696. qname_type qn;
  697. split_name (*atts, qn);
  698. attribute_map_type::value_type v (qn, attribute_value_type ());
  699. v.second.value = *(atts + 1);
  700. v.second.handled = false;
  701. pe->attr_map_.insert (v);
  702. }
  703. else
  704. {
  705. p.attr_.push_back (attribute_type ());
  706. split_name (*atts, p.attr_.back ().qname);
  707. p.attr_.back ().value = *(atts + 1);
  708. }
  709. }
  710. if (am)
  711. pe->attr_unhandled_ = pe->attr_map_.size ();
  712. }
  713. }
  714. XML_StopParser (p.p_, true);
  715. }
  716. void XMLCALL parser::
  717. end_element_ (void* v, const XML_Char* name)
  718. {
  719. parser& p (*static_cast<parser*> (v));
  720. XML_ParsingStatus ps;
  721. XML_GetParsingStatus (p.p_, &ps);
  722. // Expat has a (mis)-feature of a possibily calling handlers even
  723. // after the non-resumable XML_StopParser call.
  724. //
  725. if (ps.parsing == XML_FINISHED)
  726. return;
  727. // This can be a followup event for empty elements (<foo/>). In this
  728. // case the element name is already set.
  729. //
  730. if (ps.parsing != XML_PARSING)
  731. p.queue_ = end_element;
  732. else
  733. {
  734. split_name (name, p.qname_);
  735. // If we are accumulating characters, then queue this event.
  736. //
  737. if (p.accumulate_)
  738. p.queue_ = end_element;
  739. else
  740. {
  741. p.event_ = end_element;
  742. p.line_ = XML_GetCurrentLineNumber (p.p_);
  743. p.column_ = XML_GetCurrentColumnNumber (p.p_);
  744. }
  745. XML_StopParser (p.p_, true);
  746. }
  747. }
  748. void XMLCALL parser::
  749. characters_ (void* v, const XML_Char* s, int n)
  750. {
  751. parser& p (*static_cast<parser*> (v));
  752. XML_ParsingStatus ps;
  753. XML_GetParsingStatus (p.p_, &ps);
  754. // Expat has a (mis)-feature of a possibily calling handlers even
  755. // after the non-resumable XML_StopParser call.
  756. //
  757. if (ps.parsing == XML_FINISHED)
  758. return;
  759. content_type cont (p.content ());
  760. // If this is empty or complex content, see if these are whitespaces.
  761. //
  762. switch (cont)
  763. {
  764. case content_type::empty:
  765. case content_type::complex:
  766. {
  767. for (int i (0); i != n; ++i)
  768. {
  769. char c (s[i]);
  770. if (c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09)
  771. continue;
  772. // It would have been easier to throw the exception directly,
  773. // however, the Expat code is most likely not exception safe.
  774. //
  775. p.line_ = XML_GetCurrentLineNumber (p.p_);
  776. p.column_ = XML_GetCurrentColumnNumber (p.p_);
  777. XML_StopParser (p.p_, false);
  778. break;
  779. }
  780. return;
  781. }
  782. default:
  783. break;
  784. }
  785. // Append the characters if we are accumulating. This can also be a
  786. // followup event for another character event. In this case also
  787. // append the data.
  788. //
  789. if (p.accumulate_ || ps.parsing != XML_PARSING)
  790. {
  791. assert (p.event_ == characters);
  792. p.value_.append (s, n);
  793. }
  794. else
  795. {
  796. p.event_ = characters;
  797. p.value_.assign (s, n);
  798. p.line_ = XML_GetCurrentLineNumber (p.p_);
  799. p.column_ = XML_GetCurrentColumnNumber (p.p_);
  800. // In simple content we need to accumulate all the characters
  801. // into a single event. To do this we will let the parser run
  802. // until we reach the end of the element.
  803. //
  804. if (cont == content_type::simple)
  805. p.accumulate_ = true;
  806. else
  807. XML_StopParser (p.p_, true);
  808. }
  809. }
  810. void XMLCALL parser::
  811. start_namespace_decl_ (void* v, const XML_Char* prefix, const XML_Char* ns)
  812. {
  813. parser& p (*static_cast<parser*> (v));
  814. XML_ParsingStatus ps;
  815. XML_GetParsingStatus (p.p_, &ps);
  816. // Expat has a (mis)-feature of a possibily calling handlers even
  817. // after the non-resumable XML_StopParser call.
  818. //
  819. if (ps.parsing == XML_FINISHED)
  820. return;
  821. p.start_ns_.push_back (qname_type ());
  822. p.start_ns_.back ().prefix () = (prefix != 0 ? prefix : "");
  823. p.start_ns_.back ().namespace_ () = (ns != 0 ? ns : "");
  824. }
  825. void XMLCALL parser::
  826. end_namespace_decl_ (void* v, const XML_Char* prefix)
  827. {
  828. parser& p (*static_cast<parser*> (v));
  829. XML_ParsingStatus ps;
  830. XML_GetParsingStatus (p.p_, &ps);
  831. // Expat has a (mis)-feature of a possibily calling handlers even
  832. // after the non-resumable XML_StopParser call.
  833. //
  834. if (ps.parsing == XML_FINISHED)
  835. return;
  836. p.end_ns_.push_back (qname_type ());
  837. p.end_ns_.back ().prefix () = (prefix != 0 ? prefix : "");
  838. }
  839. }