xlsx_consumer.cpp 124 KB


  1. // Copyright (c) 2014-2021 Thomas Fussell
  2. //
  3. // Permission is hereby granted, free of charge, to any person obtaining a copy
  4. // of this software and associated documentation files (the "Software"), to deal
  5. // in the Software without restriction, including without limitation the rights
  6. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. // copies of the Software, and to permit persons to whom the Software is
  8. // furnished to do so, subject to the following conditions:
  9. //
  10. // The above copyright notice and this permission notice shall be included in
  11. // all copies or substantial portions of the Software.
  12. //
  13. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. // THE SOFTWARE
  20. //
  21. // @license: http://www.opensource.org/licenses/mit-license.php
  22. // @author: see AUTHORS file
  23. #include <cassert>
  24. #include <cctype>
  25. #include <numeric> // for std::accumulate
  26. #include <sstream>
  27. #include <unordered_map>
  28. #include <xlnt/cell/cell.hpp>
  29. #include <xlnt/cell/comment.hpp>
  30. #include <xlnt/cell/hyperlink.hpp>
  31. #include <xlnt/drawing/spreadsheet_drawing.hpp>
  32. #include <xlnt/packaging/manifest.hpp>
  33. #include <xlnt/utils/optional.hpp>
  34. #include <xlnt/utils/path.hpp>
  35. #include <xlnt/workbook/workbook.hpp>
  36. #include <xlnt/worksheet/selection.hpp>
  37. #include <xlnt/worksheet/worksheet.hpp>
  38. #include <detail/constants.hpp>
  39. #include <detail/header_footer/header_footer_code.hpp>
  40. #include <detail/implementations/workbook_impl.hpp>
  41. #include <detail/serialization/custom_value_traits.hpp>
  42. #include <detail/serialization/defined_name.hpp>
  43. #include <detail/serialization/serialisation_helpers.hpp>
  44. #include <detail/serialization/vector_streambuf.hpp>
  45. #include <detail/serialization/xlsx_consumer.hpp>
  46. #include <detail/serialization/zstream.hpp>
  47. namespace {
  48. /// string_equal
  49. /// for comparison between std::string and string literals
  50. /// improves on std::string::operator==(char*) by knowing the length ahead of time
  51. template <size_t N>
  52. inline bool string_arr_loop_equal(const std::string &lhs, const char (&rhs)[N])
  53. {
  54. for (size_t i = 0; i < N - 1; ++i)
  55. {
  56. if (lhs[i] != rhs[i])
  57. {
  58. return false;
  59. }
  60. }
  61. return true;
  62. }
  63. template <size_t N>
  64. inline bool string_equal(const std::string &lhs, const char (&rhs)[N])
  65. {
  66. if (lhs.size() != N - 1)
  67. {
  68. return false;
  69. }
  70. // split function to assist with inlining of the size check
  71. return string_arr_loop_equal(lhs, rhs);
  72. }
  73. xml::qname &qn(const std::string &namespace_, const std::string &name)
  74. {
  75. using qname_map = std::unordered_map<std::string, xml::qname>;
  76. static auto memo = std::unordered_map<std::string, qname_map>();
  77. auto &ns_memo = memo[namespace_];
  78. if (ns_memo.find(name) == ns_memo.end())
  79. {
  80. return ns_memo.emplace(name, xml::qname(xlnt::constants::ns(namespace_), name)).first->second;
  81. }
  82. return ns_memo[name];
  83. }
  84. /// <summary>
  85. /// Returns true if bool_string represents a true xsd:boolean.
  86. /// </summary>
  87. bool is_true(const std::string &bool_string)
  88. {
  89. if (bool_string == "1" || bool_string == "true")
  90. {
  91. return true;
  92. }
  93. #ifdef THROW_ON_INVALID_XML
  94. if (bool_string == "0" || bool_string == "false")
  95. {
  96. return false;
  97. }
  98. throw xlnt::exception("xsd:boolean should be one of: 0, 1, true, or false, found " + bool_string);
  99. #else
  100. return false;
  101. #endif
  102. }
  103. using style_id_pair = std::pair<xlnt::detail::style_impl, std::size_t>;
  104. /// <summary>
  105. /// Try to find given xfid value in the styles vector and, if succeeded, set's the optional style.
  106. /// </summary>
  107. void set_style_by_xfid(const std::vector<style_id_pair> &styles,
  108. std::size_t xfid, xlnt::optional<std::string> &style)
  109. {
  110. for (auto &item : styles)
  111. {
  112. if (item.second == xfid)
  113. {
  114. style = item.first.name;
  115. }
  116. }
  117. }
  118. // <sheetData> element
  119. struct Sheet_Data
  120. {
  121. std::vector<std::pair<xlnt::row_properties, xlnt::row_t>> parsed_rows;
  122. std::vector<xlnt::detail::Cell> parsed_cells;
  123. };
  124. xlnt::cell_type type_from_string(const std::string &str)
  125. {
  126. if (string_equal(str, "s"))
  127. {
  128. return xlnt::cell::type::shared_string;
  129. }
  130. else if (string_equal(str, "n"))
  131. {
  132. return xlnt::cell::type::number;
  133. }
  134. else if (string_equal(str, "b"))
  135. {
  136. return xlnt::cell::type::boolean;
  137. }
  138. else if (string_equal(str, "e"))
  139. {
  140. return xlnt::cell::type::error;
  141. }
  142. else if (string_equal(str, "inlineStr"))
  143. {
  144. return xlnt::cell::type::inline_string;
  145. }
  146. else if (string_equal(str, "str"))
  147. {
  148. return xlnt::cell::type::formula_string;
  149. }
  150. return xlnt::cell::type::shared_string;
  151. }
  152. xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser, std::unordered_map<std::string, std::string> &array_formulae, std::unordered_map<int, std::string> &shared_formulae)
  153. {
  154. xlnt::detail::Cell c;
  155. for (auto &attr : parser->attribute_map())
  156. {
  157. if (string_equal(attr.first.name(), "r"))
  158. {
  159. c.ref = xlnt::detail::Cell_Reference(row_arg, attr.second.value);
  160. }
  161. else if (string_equal(attr.first.name(), "t"))
  162. {
  163. c.type = type_from_string(attr.second.value);
  164. }
  165. else if (string_equal(attr.first.name(), "s"))
  166. {
  167. c.style_index = static_cast<int>(strtol(attr.second.value.c_str(), nullptr, 10));
  168. }
  169. else if (string_equal(attr.first.name(), "ph"))
  170. {
  171. c.is_phonetic = is_true(attr.second.value);
  172. }
  173. else if (string_equal(attr.first.name(), "cm"))
  174. {
  175. c.cell_metatdata_idx = static_cast<int>(strtol(attr.second.value.c_str(), nullptr, 10));
  176. }
  177. }
  178. int level = 1; // nesting level
  179. // 1 == <c>
  180. // 2 == <v>/<f>
  181. // 3 == <is><t>
  182. // exit loop at </c>
  183. while (level > 0)
  184. {
  185. xml::parser::event_type e = parser->next();
  186. switch (e)
  187. {
  188. case xml::parser::start_element: {
  189. if (string_equal(parser->name(), "f") && parser->attribute_present("t"))
  190. {
  191. // Skip shared formulas with a ref attribute because it indicates that this
  192. // is the master cell which will be handled in the xml::parser::characters case.
  193. if (parser->attribute("t") == "shared" && !parser->attribute_present("ref"))
  194. {
  195. auto shared_index = parser->attribute<int>("si");
  196. c.formula_string = shared_formulae[shared_index];
  197. }
  198. }
  199. ++level;
  200. break;
  201. }
  202. case xml::parser::end_element: {
  203. --level;
  204. break;
  205. }
  206. case xml::parser::characters: {
  207. // only want the characters inside one of the nested tags
  208. // without this a lot of formatting whitespace can get added
  209. if (level == 2)
  210. {
  211. // <v> -> numeric values
  212. if (string_equal(parser->name(), "v"))
  213. {
  214. c.value += std::move(parser->value());
  215. }
  216. // <f> formula
  217. else if (string_equal(parser->name(), "f"))
  218. {
  219. c.formula_string += std::move(parser->value());
  220. if (parser->attribute_present("t"))
  221. {
  222. auto formula_ref = parser->attribute("ref");
  223. auto formula_type = parser->attribute("t");
  224. if (formula_type == "shared")
  225. {
  226. auto shared_index = parser->attribute<int>("si");
  227. shared_formulae[shared_index] = c.formula_string;
  228. }
  229. else if (formula_type == "array")
  230. {
  231. array_formulae[formula_ref] = c.formula_string;
  232. }
  233. }
  234. }
  235. }
  236. else if (level == 3)
  237. {
  238. // <is><t> -> inline string
  239. if (string_equal(parser->name(), "t"))
  240. {
  241. c.value += std::move(parser->value());
  242. }
  243. }
  244. break;
  245. }
  246. case xml::parser::start_namespace_decl:
  247. case xml::parser::end_namespace_decl:
  248. case xml::parser::start_attribute:
  249. case xml::parser::end_attribute:
  250. case xml::parser::eof:
  251. default: {
  252. throw xlnt::exception("unexcpected XML parsing event");
  253. }
  254. }
  255. // Prevents unhandled exceptions from being triggered.
  256. parser->attribute_map();
  257. }
  258. return c;
  259. }
  260. // <row> inside <sheetData> element
  261. std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector<xlnt::detail::Cell> &parsed_cells, std::unordered_map<std::string, std::string> &array_formulae, std::unordered_map<int, std::string> &shared_formulae)
  262. {
  263. std::pair<xlnt::row_properties, int> props;
  264. for (auto &attr : parser->attribute_map())
  265. {
  266. if (string_equal(attr.first.name(), "dyDescent"))
  267. {
  268. props.first.dy_descent = converter.deserialise(attr.second.value);
  269. }
  270. else if (string_equal(attr.first.name(), "spans"))
  271. {
  272. props.first.spans = attr.second.value;
  273. }
  274. else if (string_equal(attr.first.name(), "ht"))
  275. {
  276. props.first.height = converter.deserialise(attr.second.value);
  277. }
  278. else if (string_equal(attr.first.name(), "s"))
  279. {
  280. props.first.style = strtoul(attr.second.value.c_str(), nullptr, 10);
  281. }
  282. else if (string_equal(attr.first.name(), "hidden"))
  283. {
  284. props.first.hidden = is_true(attr.second.value);
  285. }
  286. else if (string_equal(attr.first.name(), "customFormat"))
  287. {
  288. props.first.custom_format = is_true(attr.second.value);
  289. }
  290. else if (string_equal(attr.first.name(), "ph"))
  291. {
  292. is_true(attr.second.value);
  293. }
  294. else if (string_equal(attr.first.name(), "r"))
  295. {
  296. props.second = static_cast<int>(strtol(attr.second.value.c_str(), nullptr, 10));
  297. }
  298. else if (string_equal(attr.first.name(), "customHeight"))
  299. {
  300. props.first.custom_height = is_true(attr.second.value.c_str());
  301. }
  302. }
  303. int level = 1;
  304. while (level > 0)
  305. {
  306. xml::parser::event_type e = parser->next();
  307. switch (e)
  308. {
  309. case xml::parser::start_element: {
  310. parsed_cells.push_back(parse_cell(static_cast<xlnt::row_t>(props.second), parser, array_formulae, shared_formulae));
  311. break;
  312. }
  313. case xml::parser::end_element: {
  314. --level;
  315. break;
  316. }
  317. case xml::parser::characters: {
  318. // ignore whitespace
  319. break;
  320. }
  321. case xml::parser::start_namespace_decl:
  322. case xml::parser::start_attribute:
  323. case xml::parser::end_namespace_decl:
  324. case xml::parser::end_attribute:
  325. case xml::parser::eof:
  326. default: {
  327. throw xlnt::exception("unexcpected XML parsing event");
  328. }
  329. }
  330. }
  331. return props;
  332. }
  333. // <sheetData> inside <worksheet> element
  334. Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::unordered_map<std::string, std::string> &array_formulae, std::unordered_map<int, std::string> &shared_formulae)
  335. {
  336. Sheet_Data sheet_data;
  337. int level = 1; // nesting level
  338. // 1 == <sheetData>
  339. // 2 == <row>
  340. while (level > 0)
  341. {
  342. xml::parser::event_type e = parser->next();
  343. switch (e)
  344. {
  345. case xml::parser::start_element: {
  346. sheet_data.parsed_rows.push_back(parse_row(parser, converter, sheet_data.parsed_cells, array_formulae, shared_formulae));
  347. break;
  348. }
  349. case xml::parser::end_element: {
  350. --level;
  351. break;
  352. }
  353. case xml::parser::characters: {
  354. // ignore, whitespace formatting normally
  355. break;
  356. }
  357. case xml::parser::start_namespace_decl:
  358. case xml::parser::start_attribute:
  359. case xml::parser::end_namespace_decl:
  360. case xml::parser::end_attribute:
  361. case xml::parser::eof:
  362. default: {
  363. throw xlnt::exception("unexcpected XML parsing event");
  364. }
  365. }
  366. }
  367. return sheet_data;
  368. }
  369. } // namespace
  370. /*
  371. class parsing_context
  372. {
  373. public:
  374. parsing_context(xlnt::detail::zip_file_reader &archive, const std::string &filename)
  375. : parser_(stream_, filename)
  376. {
  377. }
  378. xml::parser &parser();
  379. private:
  380. std::istream stream_;
  381. xml::parser parser_;
  382. };
  383. */
  384. namespace xlnt {
  385. namespace detail {
  386. xlsx_consumer::xlsx_consumer(workbook &target)
  387. : target_(target),
  388. parser_(nullptr)
  389. {
  390. }
  391. xlsx_consumer::~xlsx_consumer()
  392. {
  393. }
  394. void xlsx_consumer::read(std::istream &source)
  395. {
  396. archive_.reset(new izstream(source));
  397. populate_workbook(false);
  398. }
  399. void xlsx_consumer::open(std::istream &source)
  400. {
  401. archive_.reset(new izstream(source));
  402. populate_workbook(true);
  403. }
  404. cell xlsx_consumer::read_cell()
  405. {
  406. return cell(streaming_cell_.get());
  407. }
  408. void xlsx_consumer::read_worksheet(const std::string &rel_id)
  409. {
  410. read_worksheet_begin(rel_id);
  411. if (!streaming_)
  412. {
  413. read_worksheet_sheetdata();
  414. read_worksheet_end(rel_id);
  415. }
  416. }
  417. void read_defined_names(worksheet ws, std::vector<defined_name> defined_names)
  418. {
  419. for (auto &name : defined_names)
  420. {
  421. if (name.sheet_id != ws.id() - 1)
  422. {
  423. continue;
  424. }
  425. if (name.name == "_xlnm.Print_Titles")
  426. {
  427. // Basic print titles parser
  428. // A print title definition looks like "'Sheet3'!$B:$E,'Sheet3'!$2:$4"
  429. // There are three cases: columns only, rows only, and both (separated by a comma).
  430. // For this reason, we loop up to two times parsing each component.
  431. // Titles may be quoted (with single quotes) or unquoted. We ignore them for now anyways.
  432. // References are always absolute.
  433. // Move this into a separate function if it needs to be used in other places.
  434. auto i = std::size_t(0);
  435. for (auto count = 0; count < 2; count++)
  436. {
  437. // Split into components based on "!", ":", and "," characters
  438. auto j = i;
  439. i = name.value.find("!", j);
  440. auto title = name.value.substr(j, i - j);
  441. j = i + 2; // skip "!$"
  442. i = name.value.find(":", j);
  443. auto from = name.value.substr(j, i - j);
  444. j = i + 2; // skip ":$"
  445. i = name.value.find(",", j);
  446. auto to = name.value.substr(j, i - j);
  447. // Apply to the worksheet
  448. if (isalpha(from.front())) // alpha=>columns
  449. {
  450. ws.print_title_cols(from, to);
  451. }
  452. else // numeric=>rows
  453. {
  454. ws.print_title_rows(std::stoul(from), std::stoul(to));
  455. }
  456. // Check for end condition
  457. if (i == std::string::npos)
  458. {
  459. break;
  460. }
  461. i++; // skip "," for next iteration
  462. }
  463. }
  464. else if (name.name == "_xlnm._FilterDatabase")
  465. {
  466. auto i = name.value.find("!");
  467. ws.auto_filter(name.value.substr(i + 1));
  468. }
  469. else if (name.name == "_xlnm.Print_Area")
  470. {
  471. auto i = name.value.find("!");
  472. ws.print_area(name.value.substr(i + 1));
  473. }
  474. }
  475. }
  476. std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id)
  477. {
  478. if (streaming_ && streaming_cell_ == nullptr)
  479. {
  480. streaming_cell_.reset(new detail::cell_impl());
  481. }
  482. array_formulae_.clear();
  483. shared_formulae_.clear();
  484. auto title = std::find_if(target_.d_->sheet_title_rel_id_map_.begin(),
  485. target_.d_->sheet_title_rel_id_map_.end(),
  486. [&](const std::pair<std::string, std::string> &p) {
  487. return p.second == rel_id;
  488. })->first;
  489. auto ws = worksheet(current_worksheet_);
  490. expect_start_element(qn("spreadsheetml", "worksheet"), xml::content::complex); // CT_Worksheet
  491. skip_attributes({qn("mc", "Ignorable")});
  492. read_defined_names(ws, defined_names_);
  493. while (in_element(qn("spreadsheetml", "worksheet")))
  494. {
  495. auto current_worksheet_element = expect_start_element(xml::content::complex);
  496. if (current_worksheet_element == qn("spreadsheetml", "sheetPr")) // CT_SheetPr 0-1
  497. {
  498. sheet_pr props;
  499. if (parser().attribute_present("syncHorizontal"))
  500. { // optional, boolean, false
  501. props.sync_horizontal.set(parser().attribute<bool>("syncHorizontal"));
  502. }
  503. if (parser().attribute_present("syncVertical"))
  504. { // optional, boolean, false
  505. props.sync_vertical.set(parser().attribute<bool>("syncVertical"));
  506. }
  507. if (parser().attribute_present("syncRef"))
  508. { // optional, ST_Ref, false
  509. props.sync_ref.set(cell_reference(parser().attribute("syncRef")));
  510. }
  511. if (parser().attribute_present("transitionEvaluation"))
  512. { // optional, boolean, false
  513. props.transition_evaluation.set(parser().attribute<bool>("transitionEvaluation"));
  514. }
  515. if (parser().attribute_present("transitionEntry"))
  516. { // optional, boolean, false
  517. props.transition_entry.set(parser().attribute<bool>("transitionEntry"));
  518. }
  519. if (parser().attribute_present("published"))
  520. { // optional, boolean, true
  521. props.published.set(parser().attribute<bool>("published"));
  522. }
  523. if (parser().attribute_present("codeName"))
  524. { // optional, string
  525. props.code_name.set(parser().attribute<std::string>("codeName"));
  526. }
  527. if (parser().attribute_present("filterMode"))
  528. { // optional, boolean, false
  529. props.filter_mode.set(parser().attribute<bool>("filterMode"));
  530. }
  531. if (parser().attribute_present("enableFormatConditionsCalculation"))
  532. { // optional, boolean, true
  533. props.enable_format_condition_calculation.set(parser().attribute<bool>("enableFormatConditionsCalculation"));
  534. }
  535. ws.d_->sheet_properties_.set(props);
  536. while (in_element(current_worksheet_element))
  537. {
  538. auto sheet_pr_child_element = expect_start_element(xml::content::simple);
  539. if (sheet_pr_child_element == qn("spreadsheetml", "tabColor")) // CT_Color 0-1
  540. {
  541. read_color();
  542. }
  543. else if (sheet_pr_child_element == qn("spreadsheetml", "outlinePr")) // CT_OutlinePr 0-1
  544. {
  545. skip_attribute("applyStyles"); // optional, boolean, false
  546. skip_attribute("summaryBelow"); // optional, boolean, true
  547. skip_attribute("summaryRight"); // optional, boolean, true
  548. skip_attribute("showOutlineSymbols"); // optional, boolean, true
  549. }
  550. else if (sheet_pr_child_element == qn("spreadsheetml", "pageSetUpPr")) // CT_PageSetUpPr 0-1
  551. {
  552. skip_attribute("autoPageBreaks"); // optional, boolean, true
  553. skip_attribute("fitToPage"); // optional, boolean, false
  554. }
  555. else
  556. {
  557. unexpected_element(sheet_pr_child_element);
  558. }
  559. expect_end_element(sheet_pr_child_element);
  560. }
  561. }
  562. else if (current_worksheet_element == qn("spreadsheetml", "dimension")) // CT_SheetDimension 0-1
  563. {
  564. skip_remaining_content(current_worksheet_element);
  565. }
  566. else if (current_worksheet_element == qn("spreadsheetml", "sheetViews")) // CT_SheetViews 0-1
  567. {
  568. while (in_element(current_worksheet_element))
  569. {
  570. expect_start_element(qn("spreadsheetml", "sheetView"), xml::content::complex); // CT_SheetView 1+
  571. sheet_view new_view;
  572. new_view.id(parser().attribute<std::size_t>("workbookViewId"));
  573. if (parser().attribute_present("showGridLines")) // default="true"
  574. {
  575. new_view.show_grid_lines(is_true(parser().attribute("showGridLines")));
  576. }
  577. if (parser().attribute_present("topLeftCell"))
  578. {
  579. new_view.top_left_cell(cell_reference(parser().attribute("topLeftCell")));
  580. }
  581. if (parser().attribute_present("defaultGridColor")) // default="true"
  582. {
  583. new_view.default_grid_color(is_true(parser().attribute("defaultGridColor")));
  584. }
  585. if (parser().attribute_present("view")
  586. && parser().attribute("view") != "normal")
  587. {
  588. new_view.type(parser().attribute("view") == "pageBreakPreview"
  589. ? sheet_view_type::page_break_preview
  590. : sheet_view_type::page_layout);
  591. }
  592. if (parser().attribute_present("tabSelected")
  593. && is_true(parser().attribute("tabSelected")))
  594. {
  595. target_.d_->view_.get().active_tab = ws.id() - 1;
  596. }
  597. skip_attributes({"windowProtection", "showFormulas", "showRowColHeaders", "showZeros", "rightToLeft", "showRuler", "showOutlineSymbols", "showWhiteSpace",
  598. "view", "topLeftCell", "colorId", "zoomScale", "zoomScaleNormal", "zoomScaleSheetLayoutView",
  599. "zoomScalePageLayoutView"});
  600. while (in_element(qn("spreadsheetml", "sheetView")))
  601. {
  602. auto sheet_view_child_element = expect_start_element(xml::content::simple);
  603. if (sheet_view_child_element == qn("spreadsheetml", "pane")) // CT_Pane 0-1
  604. {
  605. pane new_pane;
  606. if (parser().attribute_present("topLeftCell"))
  607. {
  608. new_pane.top_left_cell = cell_reference(parser().attribute("topLeftCell"));
  609. }
  610. if (parser().attribute_present("xSplit"))
  611. {
  612. new_pane.x_split = parser().attribute<column_t::index_t>("xSplit");
  613. }
  614. if (parser().attribute_present("ySplit"))
  615. {
  616. new_pane.y_split = parser().attribute<row_t>("ySplit");
  617. }
  618. if (parser().attribute_present("activePane"))
  619. {
  620. new_pane.active_pane = parser().attribute<pane_corner>("activePane");
  621. }
  622. if (parser().attribute_present("state"))
  623. {
  624. new_pane.state = parser().attribute<pane_state>("state");
  625. }
  626. new_view.pane(new_pane);
  627. }
  628. else if (sheet_view_child_element == qn("spreadsheetml", "selection")) // CT_Selection 0-4
  629. {
  630. selection current_selection;
  631. if (parser().attribute_present("activeCell"))
  632. {
  633. current_selection.active_cell(parser().attribute("activeCell"));
  634. }
  635. if (parser().attribute_present("sqref"))
  636. {
  637. const auto sqref = range_reference(parser().attribute("sqref"));
  638. current_selection.sqref(sqref);
  639. }
  640. if (parser().attribute_present("pane"))
  641. {
  642. current_selection.pane(parser().attribute<pane_corner>("pane"));
  643. }
  644. new_view.add_selection(current_selection);
  645. skip_remaining_content(sheet_view_child_element);
  646. }
  647. else if (sheet_view_child_element == qn("spreadsheetml", "pivotSelection")) // CT_PivotSelection 0-4
  648. {
  649. skip_remaining_content(sheet_view_child_element);
  650. }
  651. else if (sheet_view_child_element == qn("spreadsheetml", "extLst")) // CT_ExtensionList 0-1
  652. {
  653. skip_remaining_content(sheet_view_child_element);
  654. }
  655. else
  656. {
  657. unexpected_element(sheet_view_child_element);
  658. }
  659. expect_end_element(sheet_view_child_element);
  660. }
  661. expect_end_element(qn("spreadsheetml", "sheetView"));
  662. ws.d_->views_.push_back(new_view);
  663. }
  664. }
  665. else if (current_worksheet_element == qn("spreadsheetml", "sheetFormatPr")) // CT_SheetFormatPr 0-1
  666. {
  667. if (parser().attribute_present("baseColWidth"))
  668. {
  669. ws.d_->format_properties_.base_col_width =
  670. converter_.deserialise(parser().attribute("baseColWidth"));
  671. }
  672. if (parser().attribute_present("defaultColWidth"))
  673. {
  674. ws.d_->format_properties_.default_column_width =
  675. converter_.deserialise(parser().attribute("defaultColWidth"));
  676. }
  677. if (parser().attribute_present("defaultRowHeight"))
  678. {
  679. ws.d_->format_properties_.default_row_height =
  680. converter_.deserialise(parser().attribute("defaultRowHeight"));
  681. }
  682. if (parser().attribute_present(qn("x14ac", "dyDescent")))
  683. {
  684. ws.d_->format_properties_.dy_descent =
  685. converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
  686. }
  687. skip_attributes();
  688. }
  689. else if (current_worksheet_element == qn("spreadsheetml", "cols")) // CT_Cols 0+
  690. {
  691. while (in_element(qn("spreadsheetml", "cols")))
  692. {
  693. expect_start_element(qn("spreadsheetml", "col"), xml::content::simple);
  694. skip_attributes(std::vector<std::string>{"collapsed", "outlineLevel"});
  695. auto min = static_cast<column_t::index_t>(std::stoull(parser().attribute("min")));
  696. auto max = static_cast<column_t::index_t>(std::stoull(parser().attribute("max")));
  697. // avoid uninitialised warnings in GCC by using a lambda to make the conditional initialisation
  698. optional<double> width = [this](xml::parser &p) -> xlnt::optional<double> {
  699. if (p.attribute_present("width"))
  700. {
  701. return (converter_.deserialise(p.attribute("width")) * 7 - 5) / 7;
  702. }
  703. return xlnt::optional<double>();
  704. }(parser());
  705. // avoid uninitialised warnings in GCC by using a lambda to make the conditional initialisation
  706. optional<std::size_t> column_style = [](xml::parser &p) -> xlnt::optional<std::size_t> {
  707. if (p.attribute_present("style"))
  708. {
  709. return p.attribute<std::size_t>("style");
  710. }
  711. return xlnt::optional<std::size_t>();
  712. }(parser());
  713. auto custom = parser().attribute_present("customWidth")
  714. ? is_true(parser().attribute("customWidth"))
  715. : false;
  716. auto hidden = parser().attribute_present("hidden")
  717. ? is_true(parser().attribute("hidden"))
  718. : false;
  719. auto best_fit = parser().attribute_present("bestFit")
  720. ? is_true(parser().attribute("bestFit"))
  721. : false;
  722. expect_end_element(qn("spreadsheetml", "col"));
  723. for (auto column = min; column <= max; column++)
  724. {
  725. column_properties props;
  726. if (width.is_set())
  727. {
  728. props.width = width.get();
  729. }
  730. if (column_style.is_set())
  731. {
  732. props.style = column_style.get();
  733. }
  734. props.hidden = hidden;
  735. props.custom_width = custom;
  736. props.best_fit = best_fit;
  737. ws.add_column_properties(column, props);
  738. }
  739. }
  740. }
  741. else if (current_worksheet_element == qn("spreadsheetml", "sheetData")) // CT_SheetData 1
  742. {
  743. return title;
  744. }
  745. expect_end_element(current_worksheet_element);
  746. }
  747. return title;
  748. }
  749. void xlsx_consumer::read_worksheet_sheetdata()
  750. {
  751. if (stack_.back() != qn("spreadsheetml", "sheetData"))
  752. {
  753. return;
  754. }
  755. auto ws_data = parse_sheet_data(parser_, converter_, array_formulae_, shared_formulae_);
  756. // NOTE: parse->construct are seperated here and could easily be threaded
  757. // with a SPSC queue for what is likely to be an easy performance win
  758. for (auto &row : ws_data.parsed_rows)
  759. {
  760. current_worksheet_->row_properties_.emplace(row.second, std::move(row.first));
  761. }
  762. auto impl = detail::cell_impl();
  763. for (Cell &cell : ws_data.parsed_cells)
  764. {
  765. impl.parent_ = current_worksheet_;
  766. impl.column_ = cell.ref.column;
  767. impl.row_ = cell.ref.row;
  768. detail::cell_impl *ws_cell_impl = &current_worksheet_->cell_map_.emplace(cell_reference(impl.column_, impl.row_), std::move(impl)).first->second;
  769. if (cell.style_index != -1)
  770. {
  771. ws_cell_impl->format_ = target_.format(static_cast<size_t>(cell.style_index)).d_;
  772. }
  773. if (cell.cell_metatdata_idx != -1)
  774. {
  775. }
  776. ws_cell_impl->phonetics_visible_ = cell.is_phonetic;
  777. if (!cell.formula_string.empty())
  778. {
  779. ws_cell_impl->formula_ = cell.formula_string[0] == '=' ? cell.formula_string.substr(1) : std::move(cell.formula_string);
  780. }
  781. if (!cell.value.empty())
  782. {
  783. ws_cell_impl->type_ = cell.type;
  784. switch (cell.type)
  785. {
  786. case cell::type::boolean: {
  787. ws_cell_impl->value_numeric_ = is_true(cell.value) ? 1.0 : 0.0;
  788. break;
  789. }
  790. case cell::type::empty:
  791. case cell::type::number:
  792. case cell::type::date: {
  793. ws_cell_impl->value_numeric_ = converter_.deserialise(cell.value);
  794. break;
  795. }
  796. case cell::type::shared_string: {
  797. ws_cell_impl->value_numeric_ = static_cast<double>(strtol(cell.value.c_str(), nullptr, 10));
  798. break;
  799. }
  800. case cell::type::inline_string: {
  801. ws_cell_impl->value_text_ = std::move(cell.value);
  802. break;
  803. }
  804. case cell::type::formula_string: {
  805. ws_cell_impl->value_text_ = std::move(cell.value);
  806. break;
  807. }
  808. case cell::type::error: {
  809. ws_cell_impl->value_text_.plain_text(cell.value, false);
  810. break;
  811. }
  812. }
  813. }
  814. }
  815. stack_.pop_back();
  816. }
  817. worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id)
  818. {
  819. auto &manifest = target_.manifest();
  820. const auto workbook_rel = manifest.relationship(path("/"), relationship_type::office_document);
  821. const auto sheet_rel = manifest.relationship(workbook_rel.target().path(), rel_id);
  822. path sheet_path(sheet_rel.source().path().parent().append(sheet_rel.target().path()));
  823. auto hyperlinks = manifest.relationships(sheet_path, xlnt::relationship_type::hyperlink);
  824. auto ws = worksheet(current_worksheet_);
  825. while (in_element(qn("spreadsheetml", "worksheet")))
  826. {
  827. auto current_worksheet_element = expect_start_element(xml::content::complex);
  828. if (current_worksheet_element == qn("spreadsheetml", "sheetCalcPr")) // CT_SheetCalcPr 0-1
  829. {
  830. skip_remaining_content(current_worksheet_element);
  831. }
  832. else if (current_worksheet_element == qn("spreadsheetml", "sheetProtection")) // CT_SheetProtection 0-1
  833. {
  834. skip_remaining_content(current_worksheet_element);
  835. }
  836. else if (current_worksheet_element == qn("spreadsheetml", "protectedRanges")) // CT_ProtectedRanges 0-1
  837. {
  838. skip_remaining_content(current_worksheet_element);
  839. }
  840. else if (current_worksheet_element == qn("spreadsheetml", "scenarios")) // CT_Scenarios 0-1
  841. {
  842. skip_remaining_content(current_worksheet_element);
  843. }
  844. else if (current_worksheet_element == qn("spreadsheetml", "autoFilter")) // CT_AutoFilter 0-1
  845. {
  846. ws.auto_filter(xlnt::range_reference(parser().attribute("ref")));
  847. // auto filter complex
  848. skip_remaining_content(current_worksheet_element);
  849. }
  850. else if (current_worksheet_element == qn("spreadsheetml", "sortState")) // CT_SortState 0-1
  851. {
  852. skip_remaining_content(current_worksheet_element);
  853. }
  854. else if (current_worksheet_element == qn("spreadsheetml", "dataConsolidate")) // CT_DataConsolidate 0-1
  855. {
  856. skip_remaining_content(current_worksheet_element);
  857. }
  858. else if (current_worksheet_element == qn("spreadsheetml", "customSheetViews")) // CT_CustomSheetViews 0-1
  859. {
  860. skip_remaining_content(current_worksheet_element);
  861. }
  862. else if (current_worksheet_element == qn("spreadsheetml", "mergeCells")) // CT_MergeCells 0-1
  863. {
  864. parser().attribute_map();
  865. while (in_element(qn("spreadsheetml", "mergeCells")))
  866. {
  867. expect_start_element(qn("spreadsheetml", "mergeCell"), xml::content::simple);
  868. ws.merge_cells(range_reference(parser().attribute("ref")));
  869. expect_end_element(qn("spreadsheetml", "mergeCell"));
  870. }
  871. }
  872. else if (current_worksheet_element == qn("spreadsheetml", "phoneticPr")) // CT_PhoneticPr 0-1
  873. {
  874. phonetic_pr phonetic_properties(parser().attribute<std::uint32_t>("fontId"));
  875. if (parser().attribute_present("type"))
  876. {
  877. phonetic_properties.type(phonetic_pr::type_from_string(parser().attribute("type")));
  878. }
  879. if (parser().attribute_present("alignment"))
  880. {
  881. phonetic_properties.alignment(phonetic_pr::alignment_from_string(parser().attribute("alignment")));
  882. }
  883. current_worksheet_->phonetic_properties_.set(phonetic_properties);
  884. }
  885. else if (current_worksheet_element == qn("spreadsheetml", "conditionalFormatting")) // CT_ConditionalFormatting 0+
  886. {
  887. skip_remaining_content(current_worksheet_element);
  888. }
  889. else if (current_worksheet_element == qn("spreadsheetml", "dataValidations")) // CT_DataValidations 0-1
  890. {
  891. skip_remaining_content(current_worksheet_element);
  892. }
  893. else if (current_worksheet_element == qn("spreadsheetml", "hyperlinks")) // CT_Hyperlinks 0-1
  894. {
  895. while (in_element(current_worksheet_element))
  896. {
  897. // CT_Hyperlink
  898. expect_start_element(qn("spreadsheetml", "hyperlink"), xml::content::simple);
  899. auto cell = ws.cell(parser().attribute("ref"));
  900. if (parser().attribute_present(qn("r", "id")))
  901. {
  902. auto hyperlink_rel_id = parser().attribute(qn("r", "id"));
  903. auto hyperlink_rel = std::find_if(hyperlinks.begin(), hyperlinks.end(),
  904. [&](const relationship &r) { return r.id() == hyperlink_rel_id; });
  905. if (hyperlink_rel != hyperlinks.end())
  906. {
  907. auto url = hyperlink_rel->target().path().string();
  908. if (cell.has_value())
  909. {
  910. cell.hyperlink(url, cell.value<std::string>());
  911. }
  912. else
  913. {
  914. cell.hyperlink(url);
  915. }
  916. }
  917. }
  918. else if (parser().attribute_present("location"))
  919. {
  920. auto hyperlink = hyperlink_impl();
  921. auto location = parser().attribute("location");
  922. hyperlink.relationship = relationship("", relationship_type::hyperlink,
  923. uri(""), uri(location), target_mode::internal);
  924. if (parser().attribute_present("display"))
  925. {
  926. hyperlink.display = parser().attribute("display");
  927. }
  928. if (parser().attribute_present("tooltip"))
  929. {
  930. hyperlink.tooltip = parser().attribute("tooltip");
  931. }
  932. cell.d_->hyperlink_ = hyperlink;
  933. }
  934. expect_end_element(qn("spreadsheetml", "hyperlink"));
  935. }
  936. }
  937. else if (current_worksheet_element == qn("spreadsheetml", "printOptions")) // CT_PrintOptions 0-1
  938. {
  939. print_options opts;
  940. if (parser().attribute_present("gridLines"))
  941. {
  942. opts.print_grid_lines.set(parser().attribute<bool>("gridLines"));
  943. }
  944. if (parser().attribute_present("gridLinesSet"))
  945. {
  946. opts.grid_lines_set.set(parser().attribute<bool>("gridLinesSet"));
  947. }
  948. if (parser().attribute_present("headings"))
  949. {
  950. opts.print_headings.set(parser().attribute<bool>("headings"));
  951. }
  952. if (parser().attribute_present("horizontalCentered"))
  953. {
  954. opts.horizontal_centered.set(parser().attribute<bool>("horizontalCentered"));
  955. }
  956. if (parser().attribute_present("verticalCentered"))
  957. {
  958. opts.vertical_centered.set(parser().attribute<bool>("verticalCentered"));
  959. }
  960. ws.d_->print_options_.set(opts);
  961. skip_remaining_content(current_worksheet_element);
  962. }
  963. else if (current_worksheet_element == qn("spreadsheetml", "pageMargins")) // CT_PageMargins 0-1
  964. {
  965. page_margins margins;
  966. margins.top(converter_.deserialise(parser().attribute("top")));
  967. margins.bottom(converter_.deserialise(parser().attribute("bottom")));
  968. margins.left(converter_.deserialise(parser().attribute("left")));
  969. margins.right(converter_.deserialise(parser().attribute("right")));
  970. margins.header(converter_.deserialise(parser().attribute("header")));
  971. margins.footer(converter_.deserialise(parser().attribute("footer")));
  972. ws.page_margins(margins);
  973. }
  974. else if (current_worksheet_element == qn("spreadsheetml", "pageSetup")) // CT_PageSetup 0-1
  975. {
  976. page_setup setup;
  977. if (parser().attribute_present("orientation"))
  978. {
  979. setup.orientation_.set(parser().attribute<orientation>("orientation"));
  980. }
  981. if (parser().attribute_present("horizontalDpi"))
  982. {
  983. setup.horizontal_dpi_.set(parser().attribute<std::size_t>("horizontalDpi"));
  984. }
  985. if (parser().attribute_present("verticalDpi"))
  986. {
  987. setup.vertical_dpi_.set(parser().attribute<std::size_t>("verticalDpi"));
  988. }
  989. if (parser().attribute_present("paperSize"))
  990. {
  991. setup.paper_size(static_cast<xlnt::paper_size>(parser().attribute<std::size_t>("paperSize")));
  992. }
  993. if (parser().attribute_present("scale"))
  994. {
  995. setup.scale(parser().attribute<double>("scale"));
  996. }
  997. if (parser().attribute_present(qn("r", "id")))
  998. {
  999. setup.rel_id(parser().attribute(qn("r", "id")));
  1000. }
  1001. ws.page_setup(setup);
  1002. skip_remaining_content(current_worksheet_element);
  1003. }
  1004. else if (current_worksheet_element == qn("spreadsheetml", "headerFooter")) // CT_HeaderFooter 0-1
  1005. {
  1006. header_footer hf;
  1007. hf.align_with_margins(!parser().attribute_present("alignWithMargins")
  1008. || is_true(parser().attribute("alignWithMargins")));
  1009. hf.scale_with_doc(!parser().attribute_present("alignWithMargins")
  1010. || is_true(parser().attribute("alignWithMargins")));
  1011. auto different_odd_even = parser().attribute_present("differentOddEven")
  1012. && is_true(parser().attribute("differentOddEven"));
  1013. auto different_first = parser().attribute_present("differentFirst")
  1014. && is_true(parser().attribute("differentFirst"));
  1015. optional<std::array<optional<rich_text>, 3>> odd_header;
  1016. optional<std::array<optional<rich_text>, 3>> odd_footer;
  1017. optional<std::array<optional<rich_text>, 3>> even_header;
  1018. optional<std::array<optional<rich_text>, 3>> even_footer;
  1019. optional<std::array<optional<rich_text>, 3>> first_header;
  1020. optional<std::array<optional<rich_text>, 3>> first_footer;
  1021. using xlnt::detail::decode_header_footer;
  1022. while (in_element(current_worksheet_element))
  1023. {
  1024. auto current_hf_element = expect_start_element(xml::content::simple);
  1025. if (current_hf_element == qn("spreadsheetml", "oddHeader"))
  1026. {
  1027. odd_header = decode_header_footer(read_text(), converter_);
  1028. }
  1029. else if (current_hf_element == qn("spreadsheetml", "oddFooter"))
  1030. {
  1031. odd_footer = decode_header_footer(read_text(), converter_);
  1032. }
  1033. else if (current_hf_element == qn("spreadsheetml", "evenHeader"))
  1034. {
  1035. even_header = decode_header_footer(read_text(), converter_);
  1036. }
  1037. else if (current_hf_element == qn("spreadsheetml", "evenFooter"))
  1038. {
  1039. even_footer = decode_header_footer(read_text(), converter_);
  1040. }
  1041. else if (current_hf_element == qn("spreadsheetml", "firstHeader"))
  1042. {
  1043. first_header = decode_header_footer(read_text(), converter_);
  1044. }
  1045. else if (current_hf_element == qn("spreadsheetml", "firstFooter"))
  1046. {
  1047. first_footer = decode_header_footer(read_text(), converter_);
  1048. }
  1049. else
  1050. {
  1051. unexpected_element(current_hf_element);
  1052. }
  1053. expect_end_element(current_hf_element);
  1054. }
  1055. for (std::size_t i = 0; i < 3; ++i)
  1056. {
  1057. auto loc = i == 0 ? header_footer::location::left
  1058. : i == 1 ? header_footer::location::center : header_footer::location::right;
  1059. if (different_odd_even)
  1060. {
  1061. if (odd_header.is_set()
  1062. && odd_header.get().at(i).is_set()
  1063. && even_header.is_set()
  1064. && even_header.get().at(i).is_set())
  1065. {
  1066. hf.odd_even_header(loc, odd_header.get().at(i).get(), even_header.get().at(i).get());
  1067. }
  1068. if (odd_footer.is_set()
  1069. && odd_footer.get().at(i).is_set()
  1070. && even_footer.is_set()
  1071. && even_footer.get().at(i).is_set())
  1072. {
  1073. hf.odd_even_footer(loc, odd_footer.get().at(i).get(), even_footer.get().at(i).get());
  1074. }
  1075. }
  1076. else
  1077. {
  1078. if (odd_header.is_set() && odd_header.get().at(i).is_set())
  1079. {
  1080. hf.header(loc, odd_header.get().at(i).get());
  1081. }
  1082. if (odd_footer.is_set() && odd_footer.get().at(i).is_set())
  1083. {
  1084. hf.footer(loc, odd_footer.get().at(i).get());
  1085. }
  1086. }
  1087. if (different_first)
  1088. {
  1089. }
  1090. }
  1091. ws.header_footer(hf);
  1092. }
  1093. else if (current_worksheet_element == qn("spreadsheetml", "rowBreaks")) // CT_PageBreak 0-1
  1094. {
  1095. auto count = parser().attribute_present("count") ? parser().attribute<std::size_t>("count") : 0;
  1096. auto manual_break_count = parser().attribute_present("manualBreakCount")
  1097. ? parser().attribute<std::size_t>("manualBreakCount")
  1098. : 0;
  1099. while (in_element(qn("spreadsheetml", "rowBreaks")))
  1100. {
  1101. expect_start_element(qn("spreadsheetml", "brk"), xml::content::simple);
  1102. if (parser().attribute_present("id"))
  1103. {
  1104. ws.page_break_at_row(parser().attribute<row_t>("id"));
  1105. --count;
  1106. }
  1107. if (parser().attribute_present("man") && is_true(parser().attribute("man")))
  1108. {
  1109. --manual_break_count;
  1110. }
  1111. skip_attributes({"min", "max", "pt"});
  1112. expect_end_element(qn("spreadsheetml", "brk"));
  1113. }
  1114. }
  1115. else if (current_worksheet_element == qn("spreadsheetml", "colBreaks")) // CT_PageBreak 0-1
  1116. {
  1117. auto count = parser().attribute_present("count") ? parser().attribute<std::size_t>("count") : 0;
  1118. auto manual_break_count = parser().attribute_present("manualBreakCount")
  1119. ? parser().attribute<std::size_t>("manualBreakCount")
  1120. : 0;
  1121. while (in_element(qn("spreadsheetml", "colBreaks")))
  1122. {
  1123. expect_start_element(qn("spreadsheetml", "brk"), xml::content::simple);
  1124. if (parser().attribute_present("id"))
  1125. {
  1126. ws.page_break_at_column(parser().attribute<column_t::index_t>("id"));
  1127. --count;
  1128. }
  1129. if (parser().attribute_present("man") && is_true(parser().attribute("man")))
  1130. {
  1131. --manual_break_count;
  1132. }
  1133. skip_attributes({"min", "max", "pt"});
  1134. expect_end_element(qn("spreadsheetml", "brk"));
  1135. }
  1136. }
  1137. else if (current_worksheet_element == qn("spreadsheetml", "customProperties")) // CT_CustomProperties 0-1
  1138. {
  1139. skip_remaining_content(current_worksheet_element);
  1140. }
  1141. else if (current_worksheet_element == qn("spreadsheetml", "cellWatches")) // CT_CellWatches 0-1
  1142. {
  1143. skip_remaining_content(current_worksheet_element);
  1144. }
  1145. else if (current_worksheet_element == qn("spreadsheetml", "ignoredErrors")) // CT_IgnoredErrors 0-1
  1146. {
  1147. skip_remaining_content(current_worksheet_element);
  1148. }
  1149. else if (current_worksheet_element == qn("spreadsheetml", "smartTags")) // CT_SmartTags 0-1
  1150. {
  1151. skip_remaining_content(current_worksheet_element);
  1152. }
  1153. else if (current_worksheet_element == qn("spreadsheetml", "drawing")) // CT_Drawing 0-1
  1154. {
  1155. if (parser().attribute_present(qn("r", "id")))
  1156. {
  1157. auto drawing_rel_id = parser().attribute(qn("r", "id"));
  1158. ws.d_->drawing_rel_id_ = drawing_rel_id;
  1159. }
  1160. }
  1161. else if (current_worksheet_element == qn("spreadsheetml", "legacyDrawing"))
  1162. {
  1163. skip_remaining_content(current_worksheet_element);
  1164. }
  1165. else if (current_worksheet_element == qn("spreadsheetml", "extLst"))
  1166. {
  1167. ext_list extensions(parser(), current_worksheet_element.namespace_());
  1168. ws.d_->extension_list_.set(extensions);
  1169. }
  1170. else
  1171. {
  1172. unexpected_element(current_worksheet_element);
  1173. }
  1174. expect_end_element(current_worksheet_element);
  1175. }
  1176. expect_end_element(qn("spreadsheetml", "worksheet"));
  1177. if (manifest.has_relationship(sheet_path, xlnt::relationship_type::comments))
  1178. {
  1179. auto comments_part = manifest.canonicalize({workbook_rel, sheet_rel,
  1180. manifest.relationship(sheet_path, xlnt::relationship_type::comments)});
  1181. auto receive = xml::parser::receive_default;
  1182. auto comments_part_streambuf = archive_->open(comments_part);
  1183. std::istream comments_part_stream(comments_part_streambuf.get());
  1184. xml::parser parser(comments_part_stream, comments_part.string(), receive);
  1185. parser_ = &parser;
  1186. read_comments(ws);
  1187. if (manifest.has_relationship(sheet_path, xlnt::relationship_type::vml_drawing))
  1188. {
  1189. auto vml_drawings_part = manifest.canonicalize({workbook_rel, sheet_rel,
  1190. manifest.relationship(sheet_path, xlnt::relationship_type::vml_drawing)});
  1191. auto vml_drawings_part_streambuf = archive_->open(comments_part);
  1192. std::istream vml_drawings_part_stream(comments_part_streambuf.get());
  1193. xml::parser vml_parser(vml_drawings_part_stream, vml_drawings_part.string(), receive);
  1194. parser_ = &vml_parser;
  1195. read_vml_drawings(ws);
  1196. }
  1197. }
  1198. if (manifest.has_relationship(sheet_path, xlnt::relationship_type::drawings))
  1199. {
  1200. auto drawings_part = manifest.canonicalize({workbook_rel, sheet_rel,
  1201. manifest.relationship(sheet_path, xlnt::relationship_type::drawings)});
  1202. auto receive = xml::parser::receive_default;
  1203. auto drawings_part_streambuf = archive_->open(drawings_part);
  1204. std::istream drawings_part_stream(drawings_part_streambuf.get());
  1205. xml::parser parser(drawings_part_stream, drawings_part.string(), receive);
  1206. parser_ = &parser;
  1207. read_drawings(ws, drawings_part);
  1208. }
  1209. if (manifest.has_relationship(sheet_path, xlnt::relationship_type::printer_settings))
  1210. {
  1211. read_part({workbook_rel, sheet_rel,
  1212. manifest.relationship(sheet_path,
  1213. relationship_type::printer_settings)});
  1214. }
  1215. for (auto array_formula : array_formulae_)
  1216. {
  1217. for (auto row : ws.range(array_formula.first))
  1218. {
  1219. for (auto cell : row)
  1220. {
  1221. cell.formula(array_formula.second);
  1222. }
  1223. }
  1224. }
  1225. return ws;
  1226. }
  1227. xml::parser &xlsx_consumer::parser()
  1228. {
  1229. return *parser_;
  1230. }
  1231. bool xlsx_consumer::has_cell()
  1232. {
  1233. auto ws = worksheet(current_worksheet_);
  1234. while (streaming_cell_ // we're not at the end of the file
  1235. && !in_element(qn("spreadsheetml", "row"))) // we're at the end of a row, or between rows
  1236. {
  1237. if (parser().peek() == xml::parser::event_type::end_element
  1238. && stack_.back() == qn("spreadsheetml", "row"))
  1239. {
  1240. // We're at the end of a row.
  1241. expect_end_element(qn("spreadsheetml", "row"));
  1242. // ... and keep parsing.
  1243. }
  1244. if (parser().peek() == xml::parser::event_type::end_element
  1245. && stack_.back() == qn("spreadsheetml", "sheetData"))
  1246. {
  1247. // End of sheet. Mark it by setting streaming_cell_ to nullptr, so we never get here again.
  1248. expect_end_element(qn("spreadsheetml", "sheetData"));
  1249. streaming_cell_.reset(nullptr);
  1250. break;
  1251. }
  1252. expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row
  1253. auto row_index = static_cast<row_t>(std::stoul(parser().attribute("r")));
  1254. auto &row_properties = ws.row_properties(row_index);
  1255. if (parser().attribute_present("ht"))
  1256. {
  1257. row_properties.height = converter_.deserialise(parser().attribute("ht"));
  1258. }
  1259. if (parser().attribute_present("customHeight"))
  1260. {
  1261. row_properties.custom_height = is_true(parser().attribute("customHeight"));
  1262. }
  1263. if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden")))
  1264. {
  1265. row_properties.hidden = true;
  1266. }
  1267. if (parser().attribute_present(qn("x14ac", "dyDescent")))
  1268. {
  1269. row_properties.dy_descent = converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
  1270. }
  1271. if (parser().attribute_present("spans"))
  1272. {
  1273. row_properties.spans = parser().attribute("spans");
  1274. }
  1275. skip_attributes({"customFormat", "s", "customFont",
  1276. "outlineLevel", "collapsed", "thickTop", "thickBot",
  1277. "ph"});
  1278. }
  1279. if (!streaming_cell_)
  1280. {
  1281. // We're at the end of the worksheet
  1282. return false;
  1283. }
  1284. expect_start_element(qn("spreadsheetml", "c"), xml::content::complex);
  1285. assert(streaming_);
  1286. streaming_cell_.reset(new detail::cell_impl()); // Clean cell state - otherwise it might contain information from the previously streamed cell.
  1287. auto cell = xlnt::cell(streaming_cell_.get());
  1288. auto reference = cell_reference(parser().attribute("r"));
  1289. cell.d_->parent_ = current_worksheet_;
  1290. cell.d_->column_ = reference.column_index();
  1291. cell.d_->row_ = reference.row();
  1292. if (parser().attribute_present("ph"))
  1293. {
  1294. cell.d_->phonetics_visible_ = parser().attribute<bool>("ph");
  1295. }
  1296. auto has_type = parser().attribute_present("t");
  1297. auto type = has_type ? parser().attribute("t") : "n";
  1298. if (parser().attribute_present("s"))
  1299. {
  1300. cell.format(target_.format(static_cast<std::size_t>(std::stoull(parser().attribute("s")))));
  1301. }
  1302. auto has_value = false;
  1303. auto value_string = std::string();
  1304. auto formula_string = std::string();
  1305. while (in_element(qn("spreadsheetml", "c")))
  1306. {
  1307. auto current_element = expect_start_element(xml::content::mixed);
  1308. if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring
  1309. {
  1310. has_value = true;
  1311. value_string = read_text();
  1312. }
  1313. else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula
  1314. {
  1315. auto has_shared_formula = false;
  1316. auto has_array_formula = false;
  1317. auto is_master_cell = false;
  1318. auto shared_formula_index = 0;
  1319. auto formula_range = range_reference();
  1320. if (parser().attribute_present("t"))
  1321. {
  1322. auto formula_type = parser().attribute("t");
  1323. if (formula_type == "shared")
  1324. {
  1325. has_shared_formula = true;
  1326. shared_formula_index = parser().attribute<int>("si");
  1327. if (parser().attribute_present("ref"))
  1328. {
  1329. is_master_cell = true;
  1330. }
  1331. }
  1332. else if (formula_type == "array")
  1333. {
  1334. has_array_formula = true;
  1335. formula_range = range_reference(parser().attribute("ref"));
  1336. is_master_cell = true;
  1337. }
  1338. }
  1339. skip_attributes({"aca", "dt2D", "dtr", "del1", "del2", "r1",
  1340. "r2", "ca", "bx"});
  1341. formula_string = read_text();
  1342. if (is_master_cell)
  1343. {
  1344. if (has_shared_formula)
  1345. {
  1346. shared_formulae_[shared_formula_index] = formula_string;
  1347. }
  1348. else if (has_array_formula)
  1349. {
  1350. array_formulae_[formula_range.to_string()] = formula_string;
  1351. }
  1352. }
  1353. else if (has_shared_formula)
  1354. {
  1355. auto shared_formula = shared_formulae_.find(shared_formula_index);
  1356. if (shared_formula != shared_formulae_.end())
  1357. {
  1358. formula_string = shared_formula->second;
  1359. }
  1360. }
  1361. }
  1362. else if (current_element == qn("spreadsheetml", "is")) // CT_Rst
  1363. {
  1364. expect_start_element(qn("spreadsheetml", "t"), xml::content::simple);
  1365. has_value = true;
  1366. value_string = read_text();
  1367. expect_end_element(qn("spreadsheetml", "t"));
  1368. }
  1369. else
  1370. {
  1371. unexpected_element(current_element);
  1372. }
  1373. expect_end_element(current_element);
  1374. }
  1375. expect_end_element(qn("spreadsheetml", "c"));
  1376. if (!formula_string.empty())
  1377. {
  1378. cell.formula(formula_string);
  1379. }
  1380. if (has_value)
  1381. {
  1382. if (type == "str")
  1383. {
  1384. cell.d_->value_text_ = value_string;
  1385. cell.data_type(cell::type::formula_string);
  1386. }
  1387. else if (type == "inlineStr")
  1388. {
  1389. cell.d_->value_text_ = value_string;
  1390. cell.data_type(cell::type::inline_string);
  1391. }
  1392. else if (type == "s")
  1393. {
  1394. cell.d_->value_numeric_ = converter_.deserialise(value_string);
  1395. cell.data_type(cell::type::shared_string);
  1396. }
  1397. else if (type == "b") // boolean
  1398. {
  1399. cell.value(is_true(value_string));
  1400. }
  1401. else if (type == "n") // numeric
  1402. {
  1403. cell.value(converter_.deserialise(value_string));
  1404. }
  1405. else if (!value_string.empty() && value_string[0] == '#')
  1406. {
  1407. cell.error(value_string);
  1408. }
  1409. }
  1410. return true;
  1411. }
  1412. std::vector<relationship> xlsx_consumer::read_relationships(const path &part)
  1413. {
  1414. const auto part_rels_path = part.parent().append("_rels").append(part.filename() + ".rels").relative_to(path("/"));
  1415. std::vector<xlnt::relationship> relationships;
  1416. if (!archive_->has_file(part_rels_path)) return relationships;
  1417. auto rels_streambuf = archive_->open(part_rels_path);
  1418. std::istream rels_stream(rels_streambuf.get());
  1419. xml::parser parser(rels_stream, part_rels_path.string());
  1420. parser_ = &parser;
  1421. expect_start_element(qn("relationships", "Relationships"), xml::content::complex);
  1422. while (in_element(qn("relationships", "Relationships")))
  1423. {
  1424. expect_start_element(qn("relationships", "Relationship"), xml::content::simple);
  1425. const auto target_mode = parser.attribute_present("TargetMode")
  1426. ? parser.attribute<xlnt::target_mode>("TargetMode")
  1427. : xlnt::target_mode::internal;
  1428. auto target = xlnt::uri(parser.attribute("Target"));
  1429. if (target.path().is_absolute() && target_mode == xlnt::target_mode::internal)
  1430. {
  1431. target = uri(target.path().relative_to(path(part.string()).resolve(path("/"))).string());
  1432. }
  1433. relationships.emplace_back(parser.attribute("Id"),
  1434. parser.attribute<xlnt::relationship_type>("Type"),
  1435. xlnt::uri(part.string()), target, target_mode);
  1436. expect_end_element(qn("relationships", "Relationship"));
  1437. }
  1438. expect_end_element(qn("relationships", "Relationships"));
  1439. parser_ = nullptr;
  1440. return relationships;
  1441. }
  1442. void xlsx_consumer::read_part(const std::vector<relationship> &rel_chain)
  1443. {
  1444. const auto &manifest = target_.manifest();
  1445. const auto part_path = manifest.canonicalize(rel_chain);
  1446. auto part_streambuf = archive_->open(part_path);
  1447. std::istream part_stream(part_streambuf.get());
  1448. xml::parser parser(part_stream, part_path.string());
  1449. parser_ = &parser;
  1450. switch (rel_chain.back().type())
  1451. {
  1452. case relationship_type::core_properties:
  1453. read_core_properties();
  1454. break;
  1455. case relationship_type::extended_properties:
  1456. read_extended_properties();
  1457. break;
  1458. case relationship_type::custom_properties:
  1459. read_custom_properties();
  1460. break;
  1461. case relationship_type::office_document:
  1462. read_office_document(manifest.content_type(part_path));
  1463. break;
  1464. case relationship_type::connections:
  1465. read_connections();
  1466. break;
  1467. case relationship_type::custom_xml_mappings:
  1468. read_custom_xml_mappings();
  1469. break;
  1470. case relationship_type::external_workbook_references:
  1471. read_external_workbook_references();
  1472. break;
  1473. case relationship_type::pivot_table:
  1474. read_pivot_table();
  1475. break;
  1476. case relationship_type::shared_workbook_revision_headers:
  1477. read_shared_workbook_revision_headers();
  1478. break;
  1479. case relationship_type::volatile_dependencies:
  1480. read_volatile_dependencies();
  1481. break;
  1482. case relationship_type::shared_string_table:
  1483. read_shared_string_table();
  1484. break;
  1485. case relationship_type::stylesheet:
  1486. read_stylesheet();
  1487. break;
  1488. case relationship_type::theme:
  1489. read_theme();
  1490. break;
  1491. case relationship_type::chartsheet:
  1492. read_chartsheet(rel_chain.back().id());
  1493. break;
  1494. case relationship_type::dialogsheet:
  1495. read_dialogsheet(rel_chain.back().id());
  1496. break;
  1497. case relationship_type::worksheet:
  1498. read_worksheet(rel_chain.back().id());
  1499. break;
  1500. case relationship_type::thumbnail:
  1501. read_image(part_path);
  1502. break;
  1503. case relationship_type::calculation_chain:
  1504. read_calculation_chain();
  1505. break;
  1506. case relationship_type::hyperlink:
  1507. break;
  1508. case relationship_type::comments:
  1509. break;
  1510. case relationship_type::vml_drawing:
  1511. break;
  1512. case relationship_type::unknown:
  1513. break;
  1514. case relationship_type::printer_settings:
  1515. read_binary(part_path);
  1516. break;
  1517. case relationship_type::custom_property:
  1518. break;
  1519. case relationship_type::drawings:
  1520. break;
  1521. case relationship_type::pivot_table_cache_definition:
  1522. break;
  1523. case relationship_type::pivot_table_cache_records:
  1524. break;
  1525. case relationship_type::query_table:
  1526. break;
  1527. case relationship_type::shared_workbook:
  1528. break;
  1529. case relationship_type::revision_log:
  1530. break;
  1531. case relationship_type::shared_workbook_user_data:
  1532. break;
  1533. case relationship_type::single_cell_table_definitions:
  1534. break;
  1535. case relationship_type::table_definition:
  1536. break;
  1537. case relationship_type::vbaproject:
  1538. read_binary(part_path);
  1539. break;
  1540. case relationship_type::image:
  1541. read_image(part_path);
  1542. break;
  1543. }
  1544. parser_ = nullptr;
  1545. }
  1546. void xlsx_consumer::populate_workbook(bool streaming)
  1547. {
  1548. streaming_ = streaming;
  1549. target_.clear();
  1550. read_content_types();
  1551. const auto root_path = path("/");
  1552. for (const auto &package_rel : read_relationships(root_path))
  1553. {
  1554. manifest().register_relationship(package_rel);
  1555. }
  1556. for (auto package_rel : manifest().relationships(root_path))
  1557. {
  1558. if (package_rel.type() == relationship_type::office_document)
  1559. {
  1560. // Read the workbook after all the other package parts
  1561. continue;
  1562. }
  1563. read_part({package_rel});
  1564. }
  1565. for (const auto &relationship_source_string : archive_->files())
  1566. {
  1567. for (const auto &part_rel : read_relationships(path(relationship_source_string)))
  1568. {
  1569. manifest().register_relationship(part_rel);
  1570. }
  1571. }
  1572. read_part({manifest().relationship(root_path,
  1573. relationship_type::office_document)});
  1574. }
  1575. // Package Parts
  1576. void xlsx_consumer::read_content_types()
  1577. {
  1578. auto &manifest = target_.manifest();
  1579. auto content_types_streambuf = archive_->open(path("[Content_Types].xml"));
  1580. std::istream content_types_stream(content_types_streambuf.get());
  1581. xml::parser parser(content_types_stream, "[Content_Types].xml");
  1582. parser_ = &parser;
  1583. expect_start_element(qn("content-types", "Types"), xml::content::complex);
  1584. while (in_element(qn("content-types", "Types")))
  1585. {
  1586. auto current_element = expect_start_element(xml::content::complex);
  1587. if (current_element == qn("content-types", "Default"))
  1588. {
  1589. auto extension = parser.attribute("Extension");
  1590. auto content_type = parser.attribute("ContentType");
  1591. manifest.register_default_type(extension, content_type);
  1592. }
  1593. else if (current_element == qn("content-types", "Override"))
  1594. {
  1595. auto part_name = parser.attribute("PartName");
  1596. auto content_type = parser.attribute("ContentType");
  1597. manifest.register_override_type(path(part_name), content_type);
  1598. }
  1599. else
  1600. {
  1601. unexpected_element(current_element);
  1602. }
  1603. expect_end_element(current_element);
  1604. }
  1605. expect_end_element(qn("content-types", "Types"));
  1606. }
  1607. void xlsx_consumer::read_core_properties()
  1608. {
  1609. //qn("extended-properties", "Properties");
  1610. //qn("custom-properties", "Properties");
  1611. expect_start_element(qn("core-properties", "coreProperties"), xml::content::complex);
  1612. while (in_element(qn("core-properties", "coreProperties")))
  1613. {
  1614. const auto property_element = expect_start_element(xml::content::simple);
  1615. const auto prop = detail::from_string<core_property>(property_element.name());
  1616. if (prop == core_property::created || prop == core_property::modified)
  1617. {
  1618. skip_attribute(qn("xsi", "type"));
  1619. }
  1620. target_.core_property(prop, read_text());
  1621. expect_end_element(property_element);
  1622. }
  1623. expect_end_element(qn("core-properties", "coreProperties"));
  1624. }
  1625. void xlsx_consumer::read_extended_properties()
  1626. {
  1627. expect_start_element(qn("extended-properties", "Properties"), xml::content::complex);
  1628. while (in_element(qn("extended-properties", "Properties")))
  1629. {
  1630. const auto property_element = expect_start_element(xml::content::mixed);
  1631. const auto prop = detail::from_string<extended_property>(property_element.name());
  1632. target_.extended_property(prop, read_variant());
  1633. expect_end_element(property_element);
  1634. }
  1635. expect_end_element(qn("extended-properties", "Properties"));
  1636. }
  1637. void xlsx_consumer::read_custom_properties()
  1638. {
  1639. expect_start_element(qn("custom-properties", "Properties"), xml::content::complex);
  1640. while (in_element(qn("custom-properties", "Properties")))
  1641. {
  1642. const auto property_element = expect_start_element(xml::content::complex);
  1643. const auto prop = parser().attribute("name");
  1644. const auto format_id = parser().attribute("fmtid");
  1645. const auto property_id = parser().attribute("pid");
  1646. target_.custom_property(prop, read_variant());
  1647. expect_end_element(property_element);
  1648. }
  1649. expect_end_element(qn("custom-properties", "Properties"));
  1650. }
  1651. void xlsx_consumer::read_office_document(const std::string &content_type) // CT_Workbook
  1652. {
  1653. if (content_type !=
  1654. "application/vnd."
  1655. "openxmlformats-officedocument.spreadsheetml.sheet.main+xml"
  1656. && content_type !=
  1657. "application/vnd."
  1658. "openxmlformats-officedocument.spreadsheetml.template.main+xml"
  1659. && content_type !=
  1660. "application/vnd."
  1661. "ms-excel.sheet.macroEnabled.main+xml")
  1662. {
  1663. throw xlnt::invalid_file(content_type);
  1664. }
  1665. target_.d_->calculation_properties_.clear();
  1666. expect_start_element(qn("workbook", "workbook"), xml::content::complex);
  1667. skip_attribute(qn("mc", "Ignorable"));
  1668. while (in_element(qn("workbook", "workbook")))
  1669. {
  1670. auto current_workbook_element = expect_start_element(xml::content::complex);
  1671. if (current_workbook_element == qn("workbook", "fileVersion")) // CT_FileVersion 0-1
  1672. {
  1673. detail::workbook_impl::file_version_t file_version;
  1674. if (parser().attribute_present("appName"))
  1675. {
  1676. file_version.app_name = parser().attribute("appName");
  1677. }
  1678. if (parser().attribute_present("lastEdited"))
  1679. {
  1680. file_version.last_edited = parser().attribute<std::size_t>("lastEdited");
  1681. }
  1682. if (parser().attribute_present("lowestEdited"))
  1683. {
  1684. file_version.lowest_edited = parser().attribute<std::size_t>("lowestEdited");
  1685. }
  1686. if (parser().attribute_present("lowestEdited"))
  1687. {
  1688. file_version.rup_build = parser().attribute<std::size_t>("rupBuild");
  1689. }
  1690. skip_attribute("codeName");
  1691. target_.d_->file_version_ = file_version;
  1692. }
  1693. else if (current_workbook_element == qn("workbook", "fileSharing")) // CT_FileSharing 0-1
  1694. {
  1695. skip_remaining_content(current_workbook_element);
  1696. }
  1697. else if (current_workbook_element == qn("mc", "AlternateContent"))
  1698. {
  1699. while (in_element(qn("mc", "AlternateContent")))
  1700. {
  1701. auto alternate_content_element = expect_start_element(xml::content::complex);
  1702. if (alternate_content_element == qn("mc", "Choice")
  1703. && parser().attribute_present("Requires")
  1704. && parser().attribute("Requires") == "x15")
  1705. {
  1706. auto x15_element = expect_start_element(xml::content::simple);
  1707. if (x15_element == qn("x15ac", "absPath"))
  1708. {
  1709. target_.d_->abs_path_ = parser().attribute("url");
  1710. }
  1711. skip_remaining_content(x15_element);
  1712. expect_end_element(x15_element);
  1713. }
  1714. skip_remaining_content(alternate_content_element);
  1715. expect_end_element(alternate_content_element);
  1716. }
  1717. }
  1718. else if (current_workbook_element == qn("workbook", "workbookPr")) // CT_WorkbookPr 0-1
  1719. {
  1720. target_.base_date(parser().attribute_present("date1904") // optional, bool=false
  1721. && is_true(parser().attribute("date1904"))
  1722. ? calendar::mac_1904
  1723. : calendar::windows_1900);
  1724. skip_attribute("showObjects"); // optional, ST_Objects="all"
  1725. skip_attribute("showBorderUnselectedTables"); // optional, bool=true
  1726. skip_attribute("filterPrivacy"); // optional, bool=false
  1727. skip_attribute("promptedSolutions"); // optional, bool=false
  1728. skip_attribute("showInkAnnotation"); // optional, bool=true
  1729. skip_attribute("backupFile"); // optional, bool=false
  1730. skip_attribute("saveExternalLinkValues"); // optional, bool=true
  1731. skip_attribute("updateLinks"); // optional, ST_UpdateLinks="userSet"
  1732. skip_attribute("codeName"); // optional, string
  1733. skip_attribute("hidePivotFieldList"); // optional, bool=false
  1734. skip_attribute("showPivotChartFilter"); // optional, bool=false
  1735. skip_attribute("allowRefreshQuery"); // optional, bool=false
  1736. skip_attribute("publishItems"); // optional, bool=false
  1737. skip_attribute("checkCompatibility"); // optional, bool=false
  1738. skip_attribute("autoCompressPictures"); // optional, bool=true
  1739. skip_attribute("refreshAllConnections"); // optional, bool=false
  1740. skip_attribute("defaultThemeVersion"); // optional, uint
  1741. skip_attribute("dateCompatibility"); // optional, bool (undocumented)
  1742. }
  1743. else if (current_workbook_element == qn("workbook", "workbookProtection")) // CT_WorkbookProtection 0-1
  1744. {
  1745. skip_remaining_content(current_workbook_element);
  1746. }
  1747. else if (current_workbook_element == qn("workbook", "bookViews")) // CT_BookViews 0-1
  1748. {
  1749. while (in_element(qn("workbook", "bookViews")))
  1750. {
  1751. expect_start_element(qn("workbook", "workbookView"), xml::content::simple);
  1752. skip_attributes({"firstSheet", "showHorizontalScroll",
  1753. "showSheetTabs", "showVerticalScroll"});
  1754. workbook_view view;
  1755. if (parser().attribute_present("xWindow"))
  1756. {
  1757. view.x_window = parser().attribute<int>("xWindow");
  1758. }
  1759. if (parser().attribute_present("yWindow"))
  1760. {
  1761. view.y_window = parser().attribute<int>("yWindow");
  1762. }
  1763. if (parser().attribute_present("windowWidth"))
  1764. {
  1765. view.window_width = parser().attribute<std::size_t>("windowWidth");
  1766. }
  1767. if (parser().attribute_present("windowHeight"))
  1768. {
  1769. view.window_height = parser().attribute<std::size_t>("windowHeight");
  1770. }
  1771. if (parser().attribute_present("tabRatio"))
  1772. {
  1773. view.tab_ratio = parser().attribute<std::size_t>("tabRatio");
  1774. }
  1775. if (parser().attribute_present("activeTab"))
  1776. {
  1777. view.active_tab = parser().attribute<std::size_t>("activeTab");
  1778. target_.d_->active_sheet_index_.set(view.active_tab.get());
  1779. }
  1780. target_.view(view);
  1781. skip_attributes();
  1782. expect_end_element(qn("workbook", "workbookView"));
  1783. }
  1784. }
  1785. else if (current_workbook_element == qn("workbook", "sheets")) // CT_Sheets 1
  1786. {
  1787. std::size_t index = 0;
  1788. while (in_element(qn("workbook", "sheets")))
  1789. {
  1790. expect_start_element(qn("spreadsheetml", "sheet"), xml::content::simple);
  1791. auto title = parser().attribute("name");
  1792. sheet_title_index_map_[title] = index++;
  1793. sheet_title_id_map_[title] = parser().attribute<std::size_t>("sheetId");
  1794. target_.d_->sheet_title_rel_id_map_[title] = parser().attribute(qn("r", "id"));
  1795. bool hidden = parser().attribute<std::string>("state", "") == "hidden";
  1796. target_.d_->sheet_hidden_.push_back(hidden);
  1797. expect_end_element(qn("spreadsheetml", "sheet"));
  1798. }
  1799. }
  1800. else if (current_workbook_element == qn("workbook", "functionGroups")) // CT_FunctionGroups 0-1
  1801. {
  1802. skip_remaining_content(current_workbook_element);
  1803. }
  1804. else if (current_workbook_element == qn("workbook", "externalReferences")) // CT_ExternalReferences 0-1
  1805. {
  1806. skip_remaining_content(current_workbook_element);
  1807. }
  1808. else if (current_workbook_element == qn("workbook", "definedNames")) // CT_DefinedNames 0-1
  1809. {
  1810. while (in_element(qn("workbook", "definedNames")))
  1811. {
  1812. expect_start_element(qn("spreadsheetml", "definedName"), xml::content::mixed);
  1813. defined_name name;
  1814. name.name = parser().attribute("name");
  1815. name.sheet_id = parser().attribute<std::size_t>("localSheetId");
  1816. name.hidden = false;
  1817. if (parser().attribute_present("hidden"))
  1818. {
  1819. name.hidden = is_true(parser().attribute("hidden"));
  1820. }
  1821. parser().attribute_map(); // skip remaining attributes
  1822. name.value = read_text();
  1823. defined_names_.push_back(name);
  1824. expect_end_element(qn("spreadsheetml", "definedName"));
  1825. }
  1826. }
  1827. else if (current_workbook_element == qn("workbook", "calcPr")) // CT_CalcPr 0-1
  1828. {
  1829. xlnt::calculation_properties calc_props;
  1830. if (parser().attribute_present("calcId"))
  1831. {
  1832. calc_props.calc_id = parser().attribute<std::size_t>("calcId");
  1833. }
  1834. if (parser().attribute_present("concurrentCalc"))
  1835. {
  1836. calc_props.concurrent_calc = is_true(parser().attribute("concurrentCalc"));
  1837. }
  1838. target_.calculation_properties(calc_props);
  1839. parser().attribute_map(); // skip remaining
  1840. }
  1841. else if (current_workbook_element == qn("workbook", "oleSize")) // CT_OleSize 0-1
  1842. {
  1843. skip_remaining_content(current_workbook_element);
  1844. }
  1845. else if (current_workbook_element == qn("workbook", "customWorkbookViews")) // CT_CustomWorkbookViews 0-1
  1846. {
  1847. skip_remaining_content(current_workbook_element);
  1848. }
  1849. else if (current_workbook_element == qn("workbook", "pivotCaches")) // CT_PivotCaches 0-1
  1850. {
  1851. skip_remaining_content(current_workbook_element);
  1852. }
  1853. else if (current_workbook_element == qn("workbook", "smartTagPr")) // CT_SmartTagPr 0-1
  1854. {
  1855. skip_remaining_content(current_workbook_element);
  1856. }
  1857. else if (current_workbook_element == qn("workbook", "smartTagTypes")) // CT_SmartTagTypes 0-1
  1858. {
  1859. skip_remaining_content(current_workbook_element);
  1860. }
  1861. else if (current_workbook_element == qn("workbook", "webPublishing")) // CT_WebPublishing 0-1
  1862. {
  1863. skip_remaining_content(current_workbook_element);
  1864. }
  1865. else if (current_workbook_element == qn("workbook", "fileRecoveryPr")) // CT_FileRecoveryPr 0+
  1866. {
  1867. skip_remaining_content(current_workbook_element);
  1868. }
  1869. else if (current_workbook_element == qn("workbook", "webPublishObjects")) // CT_WebPublishObjects 0-1
  1870. {
  1871. skip_remaining_content(current_workbook_element);
  1872. }
  1873. else if (current_workbook_element == qn("workbook", "extLst")) // CT_ExtensionList 0-1
  1874. {
  1875. while (in_element(qn("workbook", "extLst")))
  1876. {
  1877. auto extension_element = expect_start_element(xml::content::complex);
  1878. if (extension_element == qn("workbook", "ext")
  1879. && parser().attribute_present("uri")
  1880. && parser().attribute("uri") == "{7523E5D3-25F3-A5E0-1632-64F254C22452}")
  1881. {
  1882. auto arch_id_extension_element = expect_start_element(xml::content::simple);
  1883. if (arch_id_extension_element == qn("mx", "ArchID"))
  1884. {
  1885. target_.d_->arch_id_flags_ = parser().attribute<std::size_t>("Flags");
  1886. }
  1887. skip_remaining_content(arch_id_extension_element);
  1888. expect_end_element(arch_id_extension_element);
  1889. }
  1890. skip_remaining_content(extension_element);
  1891. expect_end_element(extension_element);
  1892. }
  1893. }
  1894. else
  1895. {
  1896. unexpected_element(current_workbook_element);
  1897. }
  1898. expect_end_element(current_workbook_element);
  1899. }
  1900. expect_end_element(qn("workbook", "workbook"));
  1901. auto workbook_rel = manifest().relationship(path("/"), relationship_type::office_document);
  1902. auto workbook_path = workbook_rel.target().path();
  1903. const auto rel_types = {
  1904. relationship_type::shared_string_table,
  1905. relationship_type::stylesheet,
  1906. relationship_type::theme,
  1907. relationship_type::vbaproject,
  1908. };
  1909. for (auto rel_type : rel_types)
  1910. {
  1911. if (manifest().has_relationship(workbook_path, rel_type))
  1912. {
  1913. read_part({workbook_rel,
  1914. manifest().relationship(workbook_path, rel_type)});
  1915. }
  1916. }
  1917. for (auto worksheet_rel : manifest().relationships(workbook_path, relationship_type::worksheet))
  1918. {
  1919. auto title = std::find_if(target_.d_->sheet_title_rel_id_map_.begin(),
  1920. target_.d_->sheet_title_rel_id_map_.end(),
  1921. [&](const std::pair<std::string, std::string> &p) {
  1922. return p.second == worksheet_rel.id();
  1923. })->first;
  1924. auto id = sheet_title_id_map_[title];
  1925. auto index = sheet_title_index_map_[title];
  1926. auto insertion_iter = target_.d_->worksheets_.begin();
  1927. while (insertion_iter != target_.d_->worksheets_.end()
  1928. && sheet_title_index_map_[insertion_iter->title_] < index)
  1929. {
  1930. ++insertion_iter;
  1931. }
  1932. current_worksheet_ = &*target_.d_->worksheets_.emplace(insertion_iter, &target_, id, title);
  1933. if (!streaming_)
  1934. {
  1935. read_part({workbook_rel, worksheet_rel});
  1936. }
  1937. }
  1938. }
  1939. // Write Workbook Relationship Target Parts
  1940. void xlsx_consumer::read_calculation_chain()
  1941. {
  1942. }
  1943. void xlsx_consumer::read_chartsheet(const std::string & /*title*/)
  1944. {
  1945. }
  1946. void xlsx_consumer::read_connections()
  1947. {
  1948. }
  1949. void xlsx_consumer::read_custom_property()
  1950. {
  1951. }
  1952. void xlsx_consumer::read_custom_xml_mappings()
  1953. {
  1954. }
  1955. void xlsx_consumer::read_dialogsheet(const std::string & /*title*/)
  1956. {
  1957. }
  1958. void xlsx_consumer::read_external_workbook_references()
  1959. {
  1960. }
  1961. void xlsx_consumer::read_pivot_table()
  1962. {
  1963. }
  1964. void xlsx_consumer::read_shared_string_table()
  1965. {
  1966. expect_start_element(qn("spreadsheetml", "sst"), xml::content::complex);
  1967. skip_attributes({"count"});
  1968. bool has_unique_count = false;
  1969. std::size_t unique_count = 0;
  1970. if (parser().attribute_present("uniqueCount"))
  1971. {
  1972. has_unique_count = true;
  1973. unique_count = parser().attribute<std::size_t>("uniqueCount");
  1974. }
  1975. while (in_element(qn("spreadsheetml", "sst")))
  1976. {
  1977. expect_start_element(qn("spreadsheetml", "si"), xml::content::complex);
  1978. auto rt = read_rich_text(qn("spreadsheetml", "si"));
  1979. target_.add_shared_string(rt, true);
  1980. expect_end_element(qn("spreadsheetml", "si"));
  1981. }
  1982. expect_end_element(qn("spreadsheetml", "sst"));
  1983. if (has_unique_count && unique_count != target_.shared_strings().size())
  1984. {
  1985. throw invalid_file("sizes don't match");
  1986. }
  1987. }
  1988. void xlsx_consumer::read_shared_workbook_revision_headers()
  1989. {
  1990. }
  1991. void xlsx_consumer::read_shared_workbook()
  1992. {
  1993. }
  1994. void xlsx_consumer::read_shared_workbook_user_data()
  1995. {
  1996. }
  1997. void xlsx_consumer::read_stylesheet()
  1998. {
  1999. target_.impl().stylesheet_ = detail::stylesheet();
  2000. auto &stylesheet = target_.impl().stylesheet_.get();
  2001. expect_start_element(qn("spreadsheetml", "styleSheet"), xml::content::complex);
  2002. skip_attributes({qn("mc", "Ignorable")});
  2003. std::vector<std::pair<style_impl, std::size_t>> styles;
  2004. std::vector<std::pair<format_impl, std::size_t>> format_records;
  2005. std::vector<std::pair<format_impl, std::size_t>> style_records;
  2006. while (in_element(qn("spreadsheetml", "styleSheet")))
  2007. {
  2008. auto current_style_element = expect_start_element(xml::content::complex);
  2009. if (current_style_element == qn("spreadsheetml", "borders"))
  2010. {
  2011. auto &borders = stylesheet.borders;
  2012. auto count = parser().attribute<std::size_t>("count");
  2013. while (in_element(qn("spreadsheetml", "borders")))
  2014. {
  2015. borders.push_back(xlnt::border());
  2016. auto &border = borders.back();
  2017. expect_start_element(qn("spreadsheetml", "border"), xml::content::complex);
  2018. auto diagonal = diagonal_direction::neither;
  2019. if (parser().attribute_present("diagonalDown") && parser().attribute("diagonalDown") == "1")
  2020. {
  2021. diagonal = diagonal_direction::down;
  2022. }
  2023. if (parser().attribute_present("diagonalUp") && parser().attribute("diagonalUp") == "1")
  2024. {
  2025. diagonal = diagonal == diagonal_direction::down ? diagonal_direction::both : diagonal_direction::up;
  2026. }
  2027. if (diagonal != diagonal_direction::neither)
  2028. {
  2029. border.diagonal(diagonal);
  2030. }
  2031. while (in_element(qn("spreadsheetml", "border")))
  2032. {
  2033. auto current_side_element = expect_start_element(xml::content::complex);
  2034. xlnt::border::border_property side;
  2035. if (parser().attribute_present("style"))
  2036. {
  2037. side.style(parser().attribute<xlnt::border_style>("style"));
  2038. }
  2039. if (in_element(current_side_element))
  2040. {
  2041. expect_start_element(qn("spreadsheetml", "color"), xml::content::complex);
  2042. side.color(read_color());
  2043. expect_end_element(qn("spreadsheetml", "color"));
  2044. }
  2045. expect_end_element(current_side_element);
  2046. auto side_type = xml::value_traits<xlnt::border_side>::parse(current_side_element.name(), parser());
  2047. border.side(side_type, side);
  2048. }
  2049. expect_end_element(qn("spreadsheetml", "border"));
  2050. }
  2051. if (count != borders.size())
  2052. {
  2053. throw xlnt::exception("border counts don't match");
  2054. }
  2055. }
  2056. else if (current_style_element == qn("spreadsheetml", "fills"))
  2057. {
  2058. auto &fills = stylesheet.fills;
  2059. auto count = parser().attribute<std::size_t>("count");
  2060. while (in_element(qn("spreadsheetml", "fills")))
  2061. {
  2062. fills.push_back(xlnt::fill());
  2063. auto &new_fill = fills.back();
  2064. expect_start_element(qn("spreadsheetml", "fill"), xml::content::complex);
  2065. auto fill_element = expect_start_element(xml::content::complex);
  2066. if (fill_element == qn("spreadsheetml", "patternFill"))
  2067. {
  2068. xlnt::pattern_fill pattern;
  2069. if (parser().attribute_present("patternType"))
  2070. {
  2071. pattern.type(parser().attribute<xlnt::pattern_fill_type>("patternType"));
  2072. while (in_element(qn("spreadsheetml", "patternFill")))
  2073. {
  2074. auto pattern_type_element = expect_start_element(xml::content::complex);
  2075. if (pattern_type_element == qn("spreadsheetml", "fgColor"))
  2076. {
  2077. pattern.foreground(read_color());
  2078. }
  2079. else if (pattern_type_element == qn("spreadsheetml", "bgColor"))
  2080. {
  2081. pattern.background(read_color());
  2082. }
  2083. else
  2084. {
  2085. unexpected_element(pattern_type_element);
  2086. }
  2087. expect_end_element(pattern_type_element);
  2088. }
  2089. }
  2090. new_fill = pattern;
  2091. }
  2092. else if (fill_element == qn("spreadsheetml", "gradientFill"))
  2093. {
  2094. xlnt::gradient_fill gradient;
  2095. if (parser().attribute_present("type"))
  2096. {
  2097. gradient.type(parser().attribute<xlnt::gradient_fill_type>("type"));
  2098. }
  2099. else
  2100. {
  2101. gradient.type(xlnt::gradient_fill_type::linear);
  2102. }
  2103. while (in_element(qn("spreadsheetml", "gradientFill")))
  2104. {
  2105. expect_start_element(qn("spreadsheetml", "stop"), xml::content::complex);
  2106. auto position = converter_.deserialise(parser().attribute("position"));
  2107. expect_start_element(qn("spreadsheetml", "color"), xml::content::complex);
  2108. auto color = read_color();
  2109. expect_end_element(qn("spreadsheetml", "color"));
  2110. expect_end_element(qn("spreadsheetml", "stop"));
  2111. gradient.add_stop(position, color);
  2112. }
  2113. new_fill = gradient;
  2114. }
  2115. else
  2116. {
  2117. unexpected_element(fill_element);
  2118. }
  2119. expect_end_element(fill_element);
  2120. expect_end_element(qn("spreadsheetml", "fill"));
  2121. }
  2122. if (count != fills.size())
  2123. {
  2124. throw xlnt::exception("counts don't match");
  2125. }
  2126. }
  2127. else if (current_style_element == qn("spreadsheetml", "fonts"))
  2128. {
  2129. auto &fonts = stylesheet.fonts;
  2130. auto count = parser().attribute<std::size_t>("count", 0);
  2131. if (parser().attribute_present(qn("x14ac", "knownFonts")))
  2132. {
  2133. target_.enable_known_fonts();
  2134. }
  2135. while (in_element(qn("spreadsheetml", "fonts")))
  2136. {
  2137. fonts.push_back(xlnt::font());
  2138. auto &new_font = stylesheet.fonts.back();
  2139. expect_start_element(qn("spreadsheetml", "font"), xml::content::complex);
  2140. while (in_element(qn("spreadsheetml", "font")))
  2141. {
  2142. auto font_property_element = expect_start_element(xml::content::simple);
  2143. if (font_property_element == qn("spreadsheetml", "sz"))
  2144. {
  2145. new_font.size(converter_.deserialise(parser().attribute("val")));
  2146. }
  2147. else if (font_property_element == qn("spreadsheetml", "name"))
  2148. {
  2149. new_font.name(parser().attribute("val"));
  2150. }
  2151. else if (font_property_element == qn("spreadsheetml", "color"))
  2152. {
  2153. new_font.color(read_color());
  2154. }
  2155. else if (font_property_element == qn("spreadsheetml", "family"))
  2156. {
  2157. new_font.family(parser().attribute<std::size_t>("val"));
  2158. }
  2159. else if (font_property_element == qn("spreadsheetml", "scheme"))
  2160. {
  2161. new_font.scheme(parser().attribute("val"));
  2162. }
  2163. else if (font_property_element == qn("spreadsheetml", "b"))
  2164. {
  2165. if (parser().attribute_present("val"))
  2166. {
  2167. new_font.bold(is_true(parser().attribute("val")));
  2168. }
  2169. else
  2170. {
  2171. new_font.bold(true);
  2172. }
  2173. }
  2174. else if (font_property_element == qn("spreadsheetml", "vertAlign"))
  2175. {
  2176. auto vert_align = parser().attribute("val");
  2177. if (vert_align == "superscript")
  2178. {
  2179. new_font.superscript(true);
  2180. }
  2181. else if (vert_align == "subscript")
  2182. {
  2183. new_font.subscript(true);
  2184. }
  2185. }
  2186. else if (font_property_element == qn("spreadsheetml", "strike"))
  2187. {
  2188. if (parser().attribute_present("val"))
  2189. {
  2190. new_font.strikethrough(is_true(parser().attribute("val")));
  2191. }
  2192. else
  2193. {
  2194. new_font.strikethrough(true);
  2195. }
  2196. }
  2197. else if (font_property_element == qn("spreadsheetml", "outline"))
  2198. {
  2199. if (parser().attribute_present("val"))
  2200. {
  2201. new_font.outline(is_true(parser().attribute("val")));
  2202. }
  2203. else
  2204. {
  2205. new_font.outline(true);
  2206. }
  2207. }
  2208. else if (font_property_element == qn("spreadsheetml", "shadow"))
  2209. {
  2210. if (parser().attribute_present("val"))
  2211. {
  2212. new_font.shadow(is_true(parser().attribute("val")));
  2213. }
  2214. else
  2215. {
  2216. new_font.shadow(true);
  2217. }
  2218. }
  2219. else if (font_property_element == qn("spreadsheetml", "i"))
  2220. {
  2221. if (parser().attribute_present("val"))
  2222. {
  2223. new_font.italic(is_true(parser().attribute("val")));
  2224. }
  2225. else
  2226. {
  2227. new_font.italic(true);
  2228. }
  2229. }
  2230. else if (font_property_element == qn("spreadsheetml", "u"))
  2231. {
  2232. if (parser().attribute_present("val"))
  2233. {
  2234. new_font.underline(parser().attribute<xlnt::font::underline_style>("val"));
  2235. }
  2236. else
  2237. {
  2238. new_font.underline(xlnt::font::underline_style::single);
  2239. }
  2240. }
  2241. else if (font_property_element == qn("spreadsheetml", "charset"))
  2242. {
  2243. if (parser().attribute_present("val"))
  2244. {
  2245. parser().attribute("val");
  2246. }
  2247. }
  2248. else
  2249. {
  2250. unexpected_element(font_property_element);
  2251. }
  2252. expect_end_element(font_property_element);
  2253. }
  2254. expect_end_element(qn("spreadsheetml", "font"));
  2255. }
  2256. if (count != stylesheet.fonts.size())
  2257. {
  2258. // throw xlnt::exception("counts don't match");
  2259. }
  2260. }
  2261. else if (current_style_element == qn("spreadsheetml", "numFmts"))
  2262. {
  2263. auto &number_formats = stylesheet.number_formats;
  2264. auto count = parser().attribute<std::size_t>("count");
  2265. while (in_element(qn("spreadsheetml", "numFmts")))
  2266. {
  2267. expect_start_element(qn("spreadsheetml", "numFmt"), xml::content::simple);
  2268. auto format_string = parser().attribute("formatCode");
  2269. if (format_string == "GENERAL")
  2270. {
  2271. format_string = "General";
  2272. }
  2273. xlnt::number_format nf;
  2274. nf.format_string(format_string);
  2275. nf.id(parser().attribute<std::size_t>("numFmtId"));
  2276. expect_end_element(qn("spreadsheetml", "numFmt"));
  2277. number_formats.push_back(nf);
  2278. }
  2279. if (count != number_formats.size())
  2280. {
  2281. throw xlnt::exception("counts don't match");
  2282. }
  2283. }
  2284. else if (current_style_element == qn("spreadsheetml", "cellStyles"))
  2285. {
  2286. auto count = parser().attribute<std::size_t>("count");
  2287. while (in_element(qn("spreadsheetml", "cellStyles")))
  2288. {
  2289. auto &data = *styles.emplace(styles.end());
  2290. expect_start_element(qn("spreadsheetml", "cellStyle"), xml::content::simple);
  2291. data.first.name = parser().attribute("name");
  2292. data.second = parser().attribute<std::size_t>("xfId");
  2293. if (parser().attribute_present("builtinId"))
  2294. {
  2295. data.first.builtin_id = parser().attribute<std::size_t>("builtinId");
  2296. }
  2297. if (parser().attribute_present("hidden"))
  2298. {
  2299. data.first.hidden_style = is_true(parser().attribute("hidden"));
  2300. }
  2301. if (parser().attribute_present("customBuiltin"))
  2302. {
  2303. data.first.custom_builtin = is_true(parser().attribute("customBuiltin"));
  2304. }
  2305. expect_end_element(qn("spreadsheetml", "cellStyle"));
  2306. }
  2307. if (count != styles.size())
  2308. {
  2309. throw xlnt::exception("counts don't match");
  2310. }
  2311. }
  2312. else if (current_style_element == qn("spreadsheetml", "cellStyleXfs")
  2313. || current_style_element == qn("spreadsheetml", "cellXfs"))
  2314. {
  2315. auto in_style_records = current_style_element.name() == "cellStyleXfs";
  2316. auto count = parser().attribute<std::size_t>("count");
  2317. while (in_element(current_style_element))
  2318. {
  2319. expect_start_element(qn("spreadsheetml", "xf"), xml::content::complex);
  2320. auto &record = *(!in_style_records
  2321. ? format_records.emplace(format_records.end())
  2322. : style_records.emplace(style_records.end()));
  2323. if (parser().attribute_present("applyBorder"))
  2324. {
  2325. record.first.border_applied = is_true(parser().attribute("applyBorder"));
  2326. }
  2327. record.first.border_id = parser().attribute_present("borderId")
  2328. ? parser().attribute<std::size_t>("borderId")
  2329. : optional<std::size_t>();
  2330. if (parser().attribute_present("applyFill"))
  2331. {
  2332. record.first.fill_applied = is_true(parser().attribute("applyFill"));
  2333. }
  2334. record.first.fill_id = parser().attribute_present("fillId")
  2335. ? parser().attribute<std::size_t>("fillId")
  2336. : optional<std::size_t>();
  2337. if (parser().attribute_present("applyFont"))
  2338. {
  2339. record.first.font_applied = is_true(parser().attribute("applyFont"));
  2340. }
  2341. record.first.font_id = parser().attribute_present("fontId")
  2342. ? parser().attribute<std::size_t>("fontId")
  2343. : optional<std::size_t>();
  2344. if (parser().attribute_present("applyNumberFormat"))
  2345. {
  2346. record.first.number_format_applied = is_true(parser().attribute("applyNumberFormat"));
  2347. }
  2348. record.first.number_format_id = parser().attribute_present("numFmtId")
  2349. ? parser().attribute<std::size_t>("numFmtId")
  2350. : optional<std::size_t>();
  2351. auto apply_alignment_present = parser().attribute_present("applyAlignment");
  2352. if (apply_alignment_present)
  2353. {
  2354. record.first.alignment_applied = is_true(parser().attribute("applyAlignment"));
  2355. }
  2356. auto apply_protection_present = parser().attribute_present("applyProtection");
  2357. if (apply_protection_present)
  2358. {
  2359. record.first.protection_applied = is_true(parser().attribute("applyProtection"));
  2360. }
  2361. record.first.pivot_button_ = parser().attribute_present("pivotButton")
  2362. && is_true(parser().attribute("pivotButton"));
  2363. record.first.quote_prefix_ = parser().attribute_present("quotePrefix")
  2364. && is_true(parser().attribute("quotePrefix"));
  2365. if (parser().attribute_present("xfId"))
  2366. {
  2367. record.second = parser().attribute<std::size_t>("xfId");
  2368. }
  2369. while (in_element(qn("spreadsheetml", "xf")))
  2370. {
  2371. auto xf_child_element = expect_start_element(xml::content::simple);
  2372. if (xf_child_element == qn("spreadsheetml", "alignment"))
  2373. {
  2374. record.first.alignment_id = stylesheet.alignments.size();
  2375. auto &alignment = *stylesheet.alignments.emplace(stylesheet.alignments.end());
  2376. if (parser().attribute_present("wrapText"))
  2377. {
  2378. alignment.wrap(is_true(parser().attribute("wrapText")));
  2379. }
  2380. if (parser().attribute_present("shrinkToFit"))
  2381. {
  2382. alignment.shrink(is_true(parser().attribute("shrinkToFit")));
  2383. }
  2384. if (parser().attribute_present("indent"))
  2385. {
  2386. alignment.indent(parser().attribute<int>("indent"));
  2387. }
  2388. if (parser().attribute_present("textRotation"))
  2389. {
  2390. alignment.rotation(parser().attribute<int>("textRotation"));
  2391. }
  2392. if (parser().attribute_present("vertical"))
  2393. {
  2394. alignment.vertical(parser().attribute<xlnt::vertical_alignment>("vertical"));
  2395. }
  2396. if (parser().attribute_present("horizontal"))
  2397. {
  2398. alignment.horizontal(parser().attribute<xlnt::horizontal_alignment>("horizontal"));
  2399. }
  2400. if (parser().attribute_present("readingOrder"))
  2401. {
  2402. parser().attribute<int>("readingOrder");
  2403. }
  2404. }
  2405. else if (xf_child_element == qn("spreadsheetml", "protection"))
  2406. {
  2407. record.first.protection_id = stylesheet.protections.size();
  2408. auto &protection = *stylesheet.protections.emplace(stylesheet.protections.end());
  2409. protection.locked(parser().attribute_present("locked")
  2410. && is_true(parser().attribute("locked")));
  2411. protection.hidden(parser().attribute_present("hidden")
  2412. && is_true(parser().attribute("hidden")));
  2413. }
  2414. else
  2415. {
  2416. unexpected_element(xf_child_element);
  2417. }
  2418. expect_end_element(xf_child_element);
  2419. }
  2420. expect_end_element(qn("spreadsheetml", "xf"));
  2421. }
  2422. if ((in_style_records && count != style_records.size())
  2423. || (!in_style_records && count != format_records.size()))
  2424. {
  2425. throw xlnt::exception("counts don't match");
  2426. }
  2427. }
  2428. else if (current_style_element == qn("spreadsheetml", "dxfs"))
  2429. {
  2430. auto count = parser().attribute<std::size_t>("count");
  2431. std::size_t processed = 0;
  2432. while (in_element(current_style_element))
  2433. {
  2434. auto current_element = expect_start_element(xml::content::mixed);
  2435. skip_remaining_content(current_element);
  2436. expect_end_element(current_element);
  2437. ++processed;
  2438. }
  2439. if (count != processed)
  2440. {
  2441. throw xlnt::exception("counts don't match");
  2442. }
  2443. }
  2444. else if (current_style_element == qn("spreadsheetml", "tableStyles"))
  2445. {
  2446. skip_attribute("defaultTableStyle");
  2447. skip_attribute("defaultPivotStyle");
  2448. auto count = parser().attribute<std::size_t>("count");
  2449. std::size_t processed = 0;
  2450. while (in_element(qn("spreadsheetml", "tableStyles")))
  2451. {
  2452. auto current_element = expect_start_element(xml::content::complex);
  2453. skip_remaining_content(current_element);
  2454. expect_end_element(current_element);
  2455. ++processed;
  2456. }
  2457. if (count != processed)
  2458. {
  2459. throw xlnt::exception("counts don't match");
  2460. }
  2461. }
  2462. else if (current_style_element == qn("spreadsheetml", "extLst"))
  2463. {
  2464. while (in_element(qn("spreadsheetml", "extLst")))
  2465. {
  2466. expect_start_element(qn("spreadsheetml", "ext"), xml::content::complex);
  2467. const auto uri = parser().attribute("uri");
  2468. if (uri == "{EB79DEF2-80B8-43e5-95BD-54CBDDF9020C}") // slicerStyles
  2469. {
  2470. expect_start_element(qn("x14", "slicerStyles"), xml::content::simple);
  2471. stylesheet.default_slicer_style = parser().attribute("defaultSlicerStyle");
  2472. expect_end_element(qn("x14", "slicerStyles"));
  2473. }
  2474. else
  2475. {
  2476. skip_remaining_content(qn("spreadsheetml", "ext"));
  2477. }
  2478. expect_end_element(qn("spreadsheetml", "ext"));
  2479. }
  2480. }
  2481. else if (current_style_element == qn("spreadsheetml", "colors")) // CT_Colors 0-1
  2482. {
  2483. while (in_element(qn("spreadsheetml", "colors")))
  2484. {
  2485. auto colors_child_element = expect_start_element(xml::content::complex);
  2486. if (colors_child_element == qn("spreadsheetml", "indexedColors")) // CT_IndexedColors 0-1
  2487. {
  2488. while (in_element(colors_child_element))
  2489. {
  2490. expect_start_element(qn("spreadsheetml", "rgbColor"), xml::content::simple);
  2491. stylesheet.colors.push_back(read_color());
  2492. expect_end_element(qn("spreadsheetml", "rgbColor"));
  2493. }
  2494. }
  2495. else if (colors_child_element == qn("spreadsheetml", "mruColors")) // CT_MRUColors
  2496. {
  2497. skip_remaining_content(colors_child_element);
  2498. }
  2499. else
  2500. {
  2501. unexpected_element(colors_child_element);
  2502. }
  2503. expect_end_element(colors_child_element);
  2504. }
  2505. }
  2506. else
  2507. {
  2508. unexpected_element(current_style_element);
  2509. }
  2510. expect_end_element(current_style_element);
  2511. }
  2512. expect_end_element(qn("spreadsheetml", "styleSheet"));
  2513. std::size_t xf_id = 0;
  2514. for (const auto &record : style_records)
  2515. {
  2516. auto style_iter = std::find_if(styles.begin(), styles.end(),
  2517. [&xf_id](const std::pair<style_impl, std::size_t> &s) { return s.second == xf_id; });
  2518. ++xf_id;
  2519. if (style_iter == styles.end()) continue;
  2520. auto new_style = stylesheet.create_style(style_iter->first.name);
  2521. new_style.d_->pivot_button_ = style_iter->first.pivot_button_;
  2522. new_style.d_->quote_prefix_ = style_iter->first.quote_prefix_;
  2523. new_style.d_->formatting_record_id = style_iter->first.formatting_record_id;
  2524. new_style.d_->hidden_style = style_iter->first.hidden_style;
  2525. new_style.d_->custom_builtin = style_iter->first.custom_builtin;
  2526. new_style.d_->hidden_style = style_iter->first.hidden_style;
  2527. new_style.d_->builtin_id = style_iter->first.builtin_id;
  2528. new_style.d_->outline_style = style_iter->first.outline_style;
  2529. new_style.d_->alignment_applied = record.first.alignment_applied;
  2530. new_style.d_->alignment_id = record.first.alignment_id;
  2531. new_style.d_->border_applied = record.first.border_applied;
  2532. new_style.d_->border_id = record.first.border_id;
  2533. new_style.d_->fill_applied = record.first.fill_applied;
  2534. new_style.d_->fill_id = record.first.fill_id;
  2535. new_style.d_->font_applied = record.first.font_applied;
  2536. new_style.d_->font_id = record.first.font_id;
  2537. new_style.d_->number_format_applied = record.first.number_format_applied;
  2538. new_style.d_->number_format_id = record.first.number_format_id;
  2539. }
  2540. std::size_t record_index = 0;
  2541. for (const auto &record : format_records)
  2542. {
  2543. stylesheet.format_impls.push_back(format_impl());
  2544. auto &new_format = stylesheet.format_impls.back();
  2545. new_format.id = record_index++;
  2546. new_format.parent = &stylesheet;
  2547. ++new_format.references;
  2548. new_format.alignment_id = record.first.alignment_id;
  2549. new_format.alignment_applied = record.first.alignment_applied;
  2550. new_format.border_id = record.first.border_id;
  2551. new_format.border_applied = record.first.border_applied;
  2552. new_format.fill_id = record.first.fill_id;
  2553. new_format.fill_applied = record.first.fill_applied;
  2554. new_format.font_id = record.first.font_id;
  2555. new_format.font_applied = record.first.font_applied;
  2556. new_format.number_format_id = record.first.number_format_id;
  2557. new_format.number_format_applied = record.first.number_format_applied;
  2558. new_format.protection_id = record.first.protection_id;
  2559. new_format.protection_applied = record.first.protection_applied;
  2560. new_format.pivot_button_ = record.first.pivot_button_;
  2561. new_format.quote_prefix_ = record.first.quote_prefix_;
  2562. set_style_by_xfid(styles, record.second, new_format.style);
  2563. }
  2564. }
  2565. void xlsx_consumer::read_theme()
  2566. {
  2567. auto workbook_rel = manifest().relationship(path("/"),
  2568. relationship_type::office_document);
  2569. auto theme_rel = manifest().relationship(workbook_rel.target().path(),
  2570. relationship_type::theme);
  2571. auto theme_path = manifest().canonicalize({workbook_rel, theme_rel});
  2572. target_.theme(theme());
  2573. if (manifest().has_relationship(theme_path, relationship_type::image))
  2574. {
  2575. read_part({workbook_rel, theme_rel,
  2576. manifest().relationship(theme_path,
  2577. relationship_type::image)});
  2578. }
  2579. }
  2580. void xlsx_consumer::read_volatile_dependencies()
  2581. {
  2582. }
  2583. // Sheet Relationship Target Parts
  2584. void xlsx_consumer::read_vml_drawings(worksheet /*ws*/)
  2585. {
  2586. }
  2587. void xlsx_consumer::read_comments(worksheet ws)
  2588. {
  2589. std::vector<std::string> authors;
  2590. expect_start_element(qn("spreadsheetml", "comments"), xml::content::complex);
  2591. // name space can be ignored
  2592. skip_attribute(qn("mc", "Ignorable"));
  2593. expect_start_element(qn("spreadsheetml", "authors"), xml::content::complex);
  2594. while (in_element(qn("spreadsheetml", "authors")))
  2595. {
  2596. expect_start_element(qn("spreadsheetml", "author"), xml::content::simple);
  2597. authors.push_back(read_text());
  2598. expect_end_element(qn("spreadsheetml", "author"));
  2599. }
  2600. expect_end_element(qn("spreadsheetml", "authors"));
  2601. expect_start_element(qn("spreadsheetml", "commentList"), xml::content::complex);
  2602. while (in_element(xml::qname(qn("spreadsheetml", "commentList"))))
  2603. {
  2604. expect_start_element(qn("spreadsheetml", "comment"), xml::content::complex);
  2605. skip_attribute("shapeId");
  2606. auto cell_ref = parser().attribute("ref");
  2607. auto author_id = parser().attribute<std::size_t>("authorId");
  2608. expect_start_element(qn("spreadsheetml", "text"), xml::content::complex);
  2609. ws.cell(cell_ref).comment(comment(read_rich_text(qn("spreadsheetml", "text")), authors.at(author_id)));
  2610. expect_end_element(qn("spreadsheetml", "text"));
  2611. if (in_element(xml::qname(qn("spreadsheetml", "comment"))))
  2612. {
  2613. expect_start_element(qn("mc", "AlternateContent"), xml::content::complex);
  2614. skip_remaining_content(qn("mc", "AlternateContent"));
  2615. expect_end_element(qn("mc", "AlternateContent"));
  2616. }
  2617. expect_end_element(qn("spreadsheetml", "comment"));
  2618. }
  2619. expect_end_element(qn("spreadsheetml", "commentList"));
  2620. expect_end_element(qn("spreadsheetml", "comments"));
  2621. }
  2622. void xlsx_consumer::read_drawings(worksheet ws, const path &part)
  2623. {
  2624. auto images = manifest().relationships(part, relationship_type::image);
  2625. auto sd = drawing::spreadsheet_drawing(parser());
  2626. for (const auto &image_rel_id : sd.get_embed_ids())
  2627. {
  2628. auto image_rel = std::find_if(images.begin(), images.end(),
  2629. [&](const relationship &r) { return r.id() == image_rel_id; });
  2630. if (image_rel != images.end())
  2631. {
  2632. const auto url = image_rel->target().path().resolve(part.parent());
  2633. read_image(url);
  2634. }
  2635. }
  2636. ws.d_->drawing_ = sd;
  2637. }
  2638. // Unknown Parts
  2639. void xlsx_consumer::read_unknown_parts()
  2640. {
  2641. }
  2642. void xlsx_consumer::read_unknown_relationships()
  2643. {
  2644. }
  2645. void xlsx_consumer::read_image(const xlnt::path &image_path)
  2646. {
  2647. auto image_streambuf = archive_->open(image_path);
  2648. vector_ostreambuf buffer(target_.d_->images_[image_path.string()]);
  2649. std::ostream out_stream(&buffer);
  2650. out_stream << image_streambuf.get();
  2651. }
  2652. void xlsx_consumer::read_binary(const xlnt::path &binary_path)
  2653. {
  2654. auto binary_streambuf = archive_->open(binary_path);
  2655. vector_ostreambuf buffer(target_.d_->binaries_[binary_path.string()]);
  2656. std::ostream out_stream(&buffer);
  2657. out_stream << binary_streambuf.get();
  2658. }
  2659. std::string xlsx_consumer::read_text()
  2660. {
  2661. auto text = std::string();
  2662. while (parser().peek() == xml::parser::event_type::characters)
  2663. {
  2664. parser().next_expect(xml::parser::event_type::characters);
  2665. text.append(parser().value());
  2666. }
  2667. return text;
  2668. }
  2669. variant xlsx_consumer::read_variant()
  2670. {
  2671. auto value = variant(read_text());
  2672. if (in_element(stack_.back()))
  2673. {
  2674. auto element = expect_start_element(xml::content::mixed);
  2675. auto text = read_text();
  2676. if (element == qn("vt", "lpwstr") || element == qn("vt", "lpstr"))
  2677. {
  2678. value = variant(text);
  2679. }
  2680. if (element == qn("vt", "i4"))
  2681. {
  2682. value = variant(std::stoi(text));
  2683. }
  2684. if (element == qn("vt", "bool"))
  2685. {
  2686. value = variant(is_true(text));
  2687. }
  2688. else if (element == qn("vt", "vector"))
  2689. {
  2690. auto size = parser().attribute<std::size_t>("size");
  2691. auto base_type = parser().attribute("baseType");
  2692. std::vector<variant> vector;
  2693. for (auto i = std::size_t(0); i < size; ++i)
  2694. {
  2695. if (base_type == "variant")
  2696. {
  2697. expect_start_element(qn("vt", "variant"), xml::content::complex);
  2698. }
  2699. vector.push_back(read_variant());
  2700. if (base_type == "variant")
  2701. {
  2702. expect_end_element(qn("vt", "variant"));
  2703. read_text();
  2704. }
  2705. }
  2706. value = variant(vector);
  2707. }
  2708. expect_end_element(element);
  2709. read_text();
  2710. }
  2711. return value;
  2712. }
  2713. void xlsx_consumer::skip_attributes(const std::vector<std::string> &names)
  2714. {
  2715. for (const auto &name : names)
  2716. {
  2717. if (parser().attribute_present(name))
  2718. {
  2719. parser().attribute(name);
  2720. }
  2721. }
  2722. }
  2723. void xlsx_consumer::skip_attributes(const std::vector<xml::qname> &names)
  2724. {
  2725. for (const auto &name : names)
  2726. {
  2727. if (parser().attribute_present(name))
  2728. {
  2729. parser().attribute(name);
  2730. }
  2731. }
  2732. }
  2733. void xlsx_consumer::skip_attributes()
  2734. {
  2735. parser().attribute_map();
  2736. }
  2737. void xlsx_consumer::skip_attribute(const xml::qname &name)
  2738. {
  2739. if (parser().attribute_present(name))
  2740. {
  2741. parser().attribute(name);
  2742. }
  2743. }
  2744. void xlsx_consumer::skip_attribute(const std::string &name)
  2745. {
  2746. if (parser().attribute_present(name))
  2747. {
  2748. parser().attribute(name);
  2749. }
  2750. }
  2751. void xlsx_consumer::skip_remaining_content(const xml::qname &name)
  2752. {
  2753. // start by assuming we've already parsed the opening tag
  2754. skip_attributes();
  2755. read_text();
  2756. // continue until the closing tag is reached
  2757. while (in_element(name))
  2758. {
  2759. auto child_element = expect_start_element(xml::content::mixed);
  2760. skip_remaining_content(child_element);
  2761. expect_end_element(child_element);
  2762. read_text(); // trailing character content (usually whitespace)
  2763. }
  2764. }
  2765. bool xlsx_consumer::in_element(const xml::qname &name)
  2766. {
  2767. return parser().peek() != xml::parser::event_type::end_element
  2768. && stack_.back() == name;
  2769. }
  2770. xml::qname xlsx_consumer::expect_start_element(xml::content content)
  2771. {
  2772. parser().next_expect(xml::parser::event_type::start_element);
  2773. parser().content(content);
  2774. stack_.push_back(parser().qname());
  2775. const auto xml_space = qn("xml", "space");
  2776. preserve_space_ = parser().attribute_present(xml_space) ? parser().attribute(xml_space) == "preserve" : false;
  2777. return stack_.back();
  2778. }
  2779. void xlsx_consumer::expect_start_element(const xml::qname &name, xml::content content)
  2780. {
  2781. parser().next_expect(xml::parser::event_type::start_element, name);
  2782. parser().content(content);
  2783. stack_.push_back(name);
  2784. const auto xml_space = qn("xml", "space");
  2785. preserve_space_ = parser().attribute_present(xml_space) ? parser().attribute(xml_space) == "preserve" : false;
  2786. }
  2787. void xlsx_consumer::expect_end_element(const xml::qname &name)
  2788. {
  2789. parser().attribute_map();
  2790. parser().next_expect(xml::parser::event_type::end_element, name);
  2791. stack_.pop_back();
  2792. }
  2793. void xlsx_consumer::unexpected_element(const xml::qname &name)
  2794. {
  2795. #ifdef THROW_ON_INVALID_XML
  2796. throw xlnt::exception(name.string());
  2797. #else
  2798. skip_remaining_content(name);
  2799. #endif
  2800. }
  2801. rich_text xlsx_consumer::read_rich_text(const xml::qname &parent)
  2802. {
  2803. const auto &xmlns = parent.namespace_();
  2804. rich_text t;
  2805. while (in_element(parent))
  2806. {
  2807. auto text_element = expect_start_element(xml::content::mixed);
  2808. const auto xml_space = qn("xml", "space");
  2809. const auto preserve_space = parser().attribute_present(xml_space)
  2810. ? parser().attribute(xml_space) == "preserve"
  2811. : false;
  2812. skip_attributes();
  2813. auto text = read_text();
  2814. if (text_element == xml::qname(xmlns, "t"))
  2815. {
  2816. t.plain_text(text, preserve_space);
  2817. }
  2818. else if (text_element == xml::qname(xmlns, "r"))
  2819. {
  2820. rich_text_run run;
  2821. run.preserve_space = preserve_space;
  2822. while (in_element(xml::qname(xmlns, "r")))
  2823. {
  2824. auto run_element = expect_start_element(xml::content::mixed);
  2825. auto run_text = read_text();
  2826. if (run_element == xml::qname(xmlns, "rPr"))
  2827. {
  2828. run.second = xlnt::font();
  2829. while (in_element(xml::qname(xmlns, "rPr")))
  2830. {
  2831. auto current_run_property_element = expect_start_element(xml::content::simple);
  2832. if (current_run_property_element == xml::qname(xmlns, "sz"))
  2833. {
  2834. run.second.get().size(converter_.deserialise(parser().attribute("val")));
  2835. }
  2836. else if (current_run_property_element == xml::qname(xmlns, "rFont"))
  2837. {
  2838. run.second.get().name(parser().attribute("val"));
  2839. }
  2840. else if (current_run_property_element == xml::qname(xmlns, "color"))
  2841. {
  2842. run.second.get().color(read_color());
  2843. }
  2844. else if (current_run_property_element == xml::qname(xmlns, "family"))
  2845. {
  2846. run.second.get().family(parser().attribute<std::size_t>("val"));
  2847. }
  2848. else if (current_run_property_element == xml::qname(xmlns, "charset"))
  2849. {
  2850. run.second.get().charset(parser().attribute<std::size_t>("val"));
  2851. }
  2852. else if (current_run_property_element == xml::qname(xmlns, "scheme"))
  2853. {
  2854. run.second.get().scheme(parser().attribute("val"));
  2855. }
  2856. else if (current_run_property_element == xml::qname(xmlns, "b"))
  2857. {
  2858. run.second.get().bold(parser().attribute_present("val")
  2859. ? is_true(parser().attribute("val"))
  2860. : true);
  2861. }
  2862. else if (current_run_property_element == xml::qname(xmlns, "i"))
  2863. {
  2864. run.second.get().italic(parser().attribute_present("val")
  2865. ? is_true(parser().attribute("val"))
  2866. : true);
  2867. }
  2868. else if (current_run_property_element == xml::qname(xmlns, "u"))
  2869. {
  2870. if (parser().attribute_present("val"))
  2871. {
  2872. run.second.get().underline(parser().attribute<font::underline_style>("val"));
  2873. }
  2874. else
  2875. {
  2876. run.second.get().underline(font::underline_style::single);
  2877. }
  2878. }
  2879. else if (current_run_property_element == xml::qname(xmlns, "strike"))
  2880. {
  2881. run.second.get().strikethrough(parser().attribute_present("val")
  2882. ? is_true(parser().attribute("val"))
  2883. : true);
  2884. }
  2885. else
  2886. {
  2887. unexpected_element(current_run_property_element);
  2888. }
  2889. expect_end_element(current_run_property_element);
  2890. read_text();
  2891. }
  2892. }
  2893. else if (run_element == xml::qname(xmlns, "t"))
  2894. {
  2895. run.first = run_text;
  2896. }
  2897. else
  2898. {
  2899. unexpected_element(run_element);
  2900. }
  2901. read_text();
  2902. expect_end_element(run_element);
  2903. read_text();
  2904. }
  2905. t.add_run(run);
  2906. }
  2907. else if (text_element == xml::qname(xmlns, "rPh"))
  2908. {
  2909. phonetic_run pr;
  2910. pr.start = parser().attribute<std::uint32_t>("sb");
  2911. pr.end = parser().attribute<std::uint32_t>("eb");
  2912. expect_start_element(xml::qname(xmlns, "t"), xml::content::simple);
  2913. pr.text = read_text();
  2914. if (parser().attribute_present(xml_space))
  2915. {
  2916. pr.preserve_space = parser().attribute(xml_space) == "preserve";
  2917. }
  2918. expect_end_element(xml::qname(xmlns, "t"));
  2919. t.add_phonetic_run(pr);
  2920. }
  2921. else if (text_element == xml::qname(xmlns, "phoneticPr"))
  2922. {
  2923. phonetic_pr ph(parser().attribute<phonetic_pr::font_id_t>("fontId"));
  2924. if (parser().attribute_present("type"))
  2925. {
  2926. ph.type(phonetic_pr::type_from_string(parser().attribute("type")));
  2927. }
  2928. if (parser().attribute_present("alignment"))
  2929. {
  2930. ph.alignment(phonetic_pr::alignment_from_string(parser().attribute("alignment")));
  2931. }
  2932. t.phonetic_properties(ph);
  2933. }
  2934. else
  2935. {
  2936. unexpected_element(text_element);
  2937. }
  2938. read_text();
  2939. expect_end_element(text_element);
  2940. }
  2941. return t;
  2942. }
  2943. xlnt::color xlsx_consumer::read_color()
  2944. {
  2945. xlnt::color result;
  2946. if (parser().attribute_present("auto") && is_true(parser().attribute("auto")))
  2947. {
  2948. result.auto_(true);
  2949. return result;
  2950. }
  2951. if (parser().attribute_present("rgb"))
  2952. {
  2953. result = xlnt::rgb_color(parser().attribute("rgb"));
  2954. }
  2955. else if (parser().attribute_present("theme"))
  2956. {
  2957. result = xlnt::theme_color(parser().attribute<std::size_t>("theme"));
  2958. }
  2959. else if (parser().attribute_present("indexed"))
  2960. {
  2961. result = xlnt::indexed_color(parser().attribute<std::size_t>("indexed"));
  2962. }
  2963. if (parser().attribute_present("tint"))
  2964. {
  2965. result.tint(converter_.deserialise(parser().attribute("tint")));
  2966. }
  2967. return result;
  2968. }
  2969. manifest &xlsx_consumer::manifest()
  2970. {
  2971. return target_.manifest();
  2972. }
  2973. } // namespace detail
  2974. } // namespace xlnt