compound_document.cpp 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364
  1. // Copyright (C) 2016-2021 Thomas Fussell
  2. // Copyright (C) 2002-2007 Ariya Hidayat (ariya@kde.org).
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions
  6. // are met:
  7. //
  8. // 1. Redistributions of source code must retain the above copyright
  9. // notice, this list of conditions and the following disclaimer.
  10. // 2. Redistributions in binary form must reproduce the above copyright
  11. // notice, this list of conditions and the following disclaimer in the
  12. // documentation and/or other materials provided with the distribution.
  13. //
  14. // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  15. // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. #include <algorithm>
  25. #include <array>
  26. #include <cstring>
  27. #include <iostream>
  28. #include <locale>
  29. #include <string>
  30. #include <vector>
  31. #include <xlnt/utils/exceptions.hpp>
  32. #include <detail/binary.hpp>
  33. #include <detail/cryptography/compound_document.hpp>
  34. #include <detail/unicode.hpp>
  35. namespace {
  36. using namespace xlnt::detail;
  37. int compare_keys(const std::string &left, const std::string &right)
  38. {
  39. auto to_lower = [](std::string s) {
  40. static const auto locale = std::locale();
  41. std::use_facet<std::ctype<char>>(locale).tolower(&s[0], &s[0] + s.size());
  42. return s;
  43. };
  44. return to_lower(left).compare(to_lower(right));
  45. }
  46. std::vector<std::string> split_path(const std::string &path)
  47. {
  48. auto split = std::vector<std::string>();
  49. auto current = path.find('/');
  50. auto prev = std::size_t(0);
  51. while (current != std::string::npos)
  52. {
  53. split.push_back(path.substr(prev, current - prev));
  54. prev = current + 1;
  55. current = path.find('/', prev);
  56. }
  57. split.push_back(path.substr(prev));
  58. return split;
  59. }
  60. std::string join_path(const std::vector<std::string> &path)
  61. {
  62. auto joined = std::string();
  63. for (auto part : path)
  64. {
  65. joined.append(part);
  66. joined.push_back('/');
  67. }
  68. return joined;
  69. }
  70. const sector_id FreeSector = -1;
  71. const sector_id EndOfChain = -2;
  72. const sector_id SATSector = -3;
  73. //const sector_id MSATSector = -4;
  74. const directory_id End = -1;
  75. } // namespace
  76. namespace xlnt {
  77. namespace detail {
  78. /// <summary>
  79. /// Allows a std::vector to be read through a std::istream.
  80. /// </summary>
  81. class compound_document_istreambuf : public std::streambuf
  82. {
  83. using int_type = std::streambuf::int_type;
  84. public:
  85. compound_document_istreambuf(const compound_document_entry &entry, compound_document &document)
  86. : entry_(entry),
  87. document_(document),
  88. sector_writer_(current_sector_),
  89. position_(0)
  90. {
  91. }
  92. compound_document_istreambuf(const compound_document_istreambuf &) = delete;
  93. compound_document_istreambuf &operator=(const compound_document_istreambuf &) = delete;
  94. ~compound_document_istreambuf() override;
  95. private:
  96. std::streamsize xsgetn(char *c, std::streamsize count) override
  97. {
  98. auto bytes_read = std::streamsize(0);
  99. const auto sector_chain = short_stream() ? document_.ssat_ : document_.sat_;
  100. const auto chain = document_.follow_chain(entry_.start, sector_chain);
  101. const auto sector_size = short_stream() ? document_.short_sector_size() : document_.sector_size();
  102. auto current_sector = chain[position_ / sector_size];
  103. auto remaining = std::min(std::size_t(entry_.size) - position_, std::size_t(count));
  104. while (remaining)
  105. {
  106. if (current_sector_.empty() || chain[position_ / sector_size] != current_sector)
  107. {
  108. current_sector = chain[position_ / sector_size];
  109. sector_writer_.reset();
  110. if (short_stream())
  111. {
  112. document_.read_short_sector(current_sector, sector_writer_);
  113. }
  114. else
  115. {
  116. document_.read_sector(current_sector, sector_writer_);
  117. }
  118. }
  119. const auto available = std::min(entry_.size - position_,
  120. sector_size - position_ % sector_size);
  121. const auto to_read = std::min(available, std::size_t(remaining));
  122. auto start = current_sector_.begin() + static_cast<std::ptrdiff_t>(position_ % sector_size);
  123. auto end = start + static_cast<std::ptrdiff_t>(to_read);
  124. for (auto i = start; i < end; ++i)
  125. {
  126. *(c++) = static_cast<char>(*i);
  127. }
  128. remaining -= to_read;
  129. position_ += to_read;
  130. bytes_read += to_read;
  131. }
  132. if (position_ < entry_.size && chain[position_ / sector_size] != current_sector)
  133. {
  134. current_sector = chain[position_ / sector_size];
  135. sector_writer_.reset();
  136. if (short_stream())
  137. {
  138. document_.read_short_sector(current_sector, sector_writer_);
  139. }
  140. else
  141. {
  142. document_.read_sector(current_sector, sector_writer_);
  143. }
  144. }
  145. return bytes_read;
  146. }
  147. bool short_stream()
  148. {
  149. return entry_.size < document_.header_.threshold;
  150. }
  151. int_type underflow() override
  152. {
  153. if (position_ >= entry_.size)
  154. {
  155. return traits_type::eof();
  156. }
  157. auto old_position = position_;
  158. auto result = '\0';
  159. xsgetn(&result, 1);
  160. position_ = old_position;
  161. return result;
  162. }
  163. int_type uflow() override
  164. {
  165. auto result = underflow();
  166. ++position_;
  167. return result;
  168. }
  169. std::streamsize showmanyc() override
  170. {
  171. if (position_ == entry_.size)
  172. {
  173. return static_cast<std::streamsize>(-1);
  174. }
  175. return static_cast<std::streamsize>(entry_.size - position_);
  176. }
  177. std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode) override
  178. {
  179. if (way == std::ios_base::beg)
  180. {
  181. position_ = 0;
  182. }
  183. else if (way == std::ios_base::end)
  184. {
  185. position_ = entry_.size;
  186. }
  187. if (off < 0)
  188. {
  189. if (static_cast<std::size_t>(-off) > position_)
  190. {
  191. position_ = 0;
  192. return static_cast<std::ptrdiff_t>(-1);
  193. }
  194. else
  195. {
  196. position_ -= static_cast<std::size_t>(-off);
  197. }
  198. }
  199. else if (off > 0)
  200. {
  201. if (static_cast<std::size_t>(off) + position_ > entry_.size)
  202. {
  203. position_ = entry_.size;
  204. return static_cast<std::ptrdiff_t>(-1);
  205. }
  206. else
  207. {
  208. position_ += static_cast<std::size_t>(off);
  209. }
  210. }
  211. return static_cast<std::ptrdiff_t>(position_);
  212. }
  213. std::streampos seekpos(std::streampos sp, std::ios_base::openmode) override
  214. {
  215. if (sp < 0)
  216. {
  217. position_ = 0;
  218. }
  219. else if (static_cast<std::size_t>(sp) > entry_.size)
  220. {
  221. position_ = entry_.size;
  222. }
  223. else
  224. {
  225. position_ = static_cast<std::size_t>(sp);
  226. }
  227. return static_cast<std::ptrdiff_t>(position_);
  228. }
  229. private:
  230. const compound_document_entry &entry_;
  231. compound_document &document_;
  232. binary_writer<byte> sector_writer_;
  233. std::vector<byte> current_sector_;
  234. std::size_t position_;
  235. };
  236. compound_document_istreambuf::~compound_document_istreambuf()
  237. {
  238. }
  239. /// <summary>
  240. /// Allows a std::vector to be written through a std::ostream.
  241. /// </summary>
  242. class compound_document_ostreambuf : public std::streambuf
  243. {
  244. using int_type = std::streambuf::int_type;
  245. public:
  246. compound_document_ostreambuf(compound_document_entry &entry, compound_document &document)
  247. : entry_(entry),
  248. document_(document),
  249. sector_reader_(current_sector_),
  250. current_sector_(document.header_.threshold),
  251. position_(0)
  252. {
  253. setp(reinterpret_cast<char *>(current_sector_.data()),
  254. reinterpret_cast<char *>(current_sector_.data() + current_sector_.size()));
  255. }
  256. compound_document_ostreambuf(const compound_document_ostreambuf &) = delete;
  257. compound_document_ostreambuf &operator=(const compound_document_ostreambuf &) = delete;
  258. ~compound_document_ostreambuf() override;
  259. private:
  260. int sync() override
  261. {
  262. auto written = static_cast<std::size_t>(pptr() - pbase());
  263. if (written == std::size_t(0))
  264. {
  265. return 0;
  266. }
  267. sector_reader_.reset();
  268. if (short_stream())
  269. {
  270. if (position_ + written >= document_.header_.threshold)
  271. {
  272. convert_to_long_stream();
  273. }
  274. else
  275. {
  276. if (entry_.start < 0)
  277. {
  278. auto num_sectors = (position_ + written + document_.short_sector_size() - 1) / document_.short_sector_size();
  279. chain_ = document_.allocate_short_sectors(num_sectors);
  280. entry_.start = chain_.front();
  281. }
  282. for (auto link : chain_)
  283. {
  284. document_.write_short_sector(sector_reader_, link);
  285. sector_reader_.offset(sector_reader_.offset() + document_.short_sector_size());
  286. }
  287. }
  288. }
  289. else
  290. {
  291. const auto sector_index = position_ / document_.sector_size();
  292. document_.write_sector(sector_reader_, chain_[sector_index]);
  293. }
  294. position_ += written;
  295. entry_.size = std::max(entry_.size, static_cast<std::uint32_t>(position_));
  296. document_.write_directory();
  297. std::fill(current_sector_.begin(), current_sector_.end(), byte(0));
  298. setp(reinterpret_cast<char *>(current_sector_.data()),
  299. reinterpret_cast<char *>(current_sector_.data() + current_sector_.size()));
  300. return 0;
  301. }
  302. bool short_stream()
  303. {
  304. return entry_.size < document_.header_.threshold;
  305. }
  306. int_type overflow(int_type c = traits_type::eof()) override
  307. {
  308. sync();
  309. if (short_stream())
  310. {
  311. auto next_sector = document_.allocate_short_sector();
  312. document_.ssat_[static_cast<std::size_t>(chain_.back())] = next_sector;
  313. chain_.push_back(next_sector);
  314. document_.write_ssat();
  315. }
  316. else
  317. {
  318. auto next_sector = document_.allocate_sector();
  319. document_.sat_[static_cast<std::size_t>(chain_.back())] = next_sector;
  320. chain_.push_back(next_sector);
  321. document_.write_sat();
  322. }
  323. auto value = static_cast<std::uint8_t>(c);
  324. if (c != traits_type::eof())
  325. {
  326. current_sector_[position_ % current_sector_.size()] = value;
  327. }
  328. pbump(1);
  329. return traits_type::to_int_type(static_cast<char>(value));
  330. }
  331. void convert_to_long_stream()
  332. {
  333. sector_reader_.reset();
  334. auto num_sectors = current_sector_.size() / document_.sector_size();
  335. auto new_chain = document_.allocate_sectors(num_sectors);
  336. for (auto link : new_chain)
  337. {
  338. document_.write_sector(sector_reader_, link);
  339. sector_reader_.offset(sector_reader_.offset() + document_.short_sector_size());
  340. }
  341. current_sector_.resize(document_.sector_size(), 0);
  342. std::fill(current_sector_.begin(), current_sector_.end(), byte(0));
  343. if (entry_.start < 0)
  344. {
  345. // TODO: deallocate short sectors here
  346. if (document_.header_.num_short_sectors == 0)
  347. {
  348. document_.entries_[0].start = EndOfChain;
  349. }
  350. }
  351. chain_ = new_chain;
  352. entry_.start = chain_.front();
  353. document_.write_directory();
  354. }
  355. std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode) override
  356. {
  357. if (way == std::ios_base::beg)
  358. {
  359. position_ = 0;
  360. }
  361. else if (way == std::ios_base::end)
  362. {
  363. position_ = entry_.size;
  364. }
  365. if (off < 0)
  366. {
  367. if (static_cast<std::size_t>(-off) > position_)
  368. {
  369. position_ = 0;
  370. return static_cast<std::ptrdiff_t>(-1);
  371. }
  372. else
  373. {
  374. position_ -= static_cast<std::size_t>(-off);
  375. }
  376. }
  377. else if (off > 0)
  378. {
  379. if (static_cast<std::size_t>(off) + position_ > entry_.size)
  380. {
  381. position_ = entry_.size;
  382. return static_cast<std::ptrdiff_t>(-1);
  383. }
  384. else
  385. {
  386. position_ += static_cast<std::size_t>(off);
  387. }
  388. }
  389. return static_cast<std::ptrdiff_t>(position_);
  390. }
  391. std::streampos seekpos(std::streampos sp, std::ios_base::openmode) override
  392. {
  393. if (sp < 0)
  394. {
  395. position_ = 0;
  396. }
  397. else if (static_cast<std::size_t>(sp) > entry_.size)
  398. {
  399. position_ = entry_.size;
  400. }
  401. else
  402. {
  403. position_ = static_cast<std::size_t>(sp);
  404. }
  405. return static_cast<std::ptrdiff_t>(position_);
  406. }
  407. private:
  408. compound_document_entry &entry_;
  409. compound_document &document_;
  410. binary_reader<byte> sector_reader_;
  411. std::vector<byte> current_sector_;
  412. std::size_t position_;
  413. sector_chain chain_;
  414. };
  415. compound_document_ostreambuf::~compound_document_ostreambuf()
  416. {
  417. sync();
  418. }
  419. compound_document::compound_document(std::ostream &out)
  420. : out_(&out),
  421. stream_in_(nullptr),
  422. stream_out_(nullptr)
  423. {
  424. header_.msat.fill(FreeSector);
  425. write_header();
  426. insert_entry("/Root Entry", compound_document_entry::entry_type::RootStorage);
  427. }
  428. compound_document::compound_document(std::istream &in)
  429. : in_(&in),
  430. stream_in_(nullptr),
  431. stream_out_(nullptr)
  432. {
  433. read_header();
  434. read_msat();
  435. read_sat();
  436. read_ssat();
  437. read_directory();
  438. }
  439. compound_document::~compound_document()
  440. {
  441. close();
  442. }
  443. void compound_document::close()
  444. {
  445. stream_out_buffer_.reset(nullptr);
  446. }
  447. std::size_t compound_document::sector_size()
  448. {
  449. return static_cast<std::size_t>(1) << header_.sector_size_power;
  450. }
  451. std::size_t compound_document::short_sector_size()
  452. {
  453. return static_cast<std::size_t>(1) << header_.short_sector_size_power;
  454. }
  455. std::istream &compound_document::open_read_stream(const std::string &name)
  456. {
  457. if (!contains_entry(name, compound_document_entry::entry_type::UserStream))
  458. {
  459. throw xlnt::exception("not found");
  460. }
  461. const auto entry_id = find_entry(name, compound_document_entry::entry_type::UserStream);
  462. const auto &entry = entries_.at(static_cast<std::size_t>(entry_id));
  463. stream_in_buffer_.reset(new compound_document_istreambuf(entry, *this));
  464. stream_in_.rdbuf(stream_in_buffer_.get());
  465. return stream_in_;
  466. }
  467. std::ostream &compound_document::open_write_stream(const std::string &name)
  468. {
  469. auto entry_id = contains_entry(name, compound_document_entry::entry_type::UserStream)
  470. ? find_entry(name, compound_document_entry::entry_type::UserStream)
  471. : insert_entry(name, compound_document_entry::entry_type::UserStream);
  472. auto &entry = entries_.at(static_cast<std::size_t>(entry_id));
  473. stream_out_buffer_.reset(new compound_document_ostreambuf(entry, *this));
  474. stream_out_.rdbuf(stream_out_buffer_.get());
  475. return stream_out_;
  476. }
  477. template <typename T>
  478. void compound_document::write_sector(binary_reader<T> &reader, sector_id id)
  479. {
  480. out_->seekp(static_cast<std::ptrdiff_t>(sector_data_start() + sector_size() * static_cast<std::size_t>(id)));
  481. out_->write(reinterpret_cast<const char *>(reader.data() + reader.offset()),
  482. static_cast<std::ptrdiff_t>(std::min(sector_size(), reader.bytes() - reader.offset())));
  483. }
  484. template <typename T>
  485. void compound_document::write_short_sector(binary_reader<T> &reader, sector_id id)
  486. {
  487. auto chain = follow_chain(entries_[0].start, sat_);
  488. auto sector_id = chain[static_cast<std::size_t>(id) / (sector_size() / short_sector_size())];
  489. auto sector_offset = static_cast<std::size_t>(id) % (sector_size() / short_sector_size()) * short_sector_size();
  490. out_->seekp(static_cast<std::ptrdiff_t>(sector_data_start() + sector_size() * static_cast<std::size_t>(sector_id) + sector_offset));
  491. out_->write(reinterpret_cast<const char *>(reader.data() + reader.offset()),
  492. static_cast<std::ptrdiff_t>(std::min(short_sector_size(), reader.bytes() - reader.offset())));
  493. }
  494. template <typename T>
  495. void compound_document::read_sector(sector_id id, binary_writer<T> &writer)
  496. {
  497. in_->seekg(static_cast<std::ptrdiff_t>(sector_data_start() + sector_size() * static_cast<std::size_t>(id)));
  498. std::vector<byte> sector(sector_size(), 0);
  499. in_->read(reinterpret_cast<char *>(sector.data()), static_cast<std::ptrdiff_t>(sector_size()));
  500. writer.append(sector);
  501. }
  502. template <typename T>
  503. void compound_document::read_sector_chain(sector_id start, binary_writer<T> &writer)
  504. {
  505. for (auto link : follow_chain(start, sat_))
  506. {
  507. read_sector(link, writer);
  508. }
  509. }
  510. template <typename T>
  511. void compound_document::read_sector_chain(sector_id start, binary_writer<T> &writer, sector_id offset, std::size_t count)
  512. {
  513. auto chain = follow_chain(start, sat_);
  514. for (auto i = std::size_t(0); i < count; ++i)
  515. {
  516. read_sector(chain[offset + i], writer);
  517. }
  518. }
  519. template <typename T>
  520. void compound_document::read_short_sector(sector_id id, binary_writer<T> &writer)
  521. {
  522. const auto container_chain = follow_chain(entries_[0].start, sat_);
  523. auto container = std::vector<byte>();
  524. auto container_writer = binary_writer<byte>(container);
  525. for (auto sector : container_chain)
  526. {
  527. read_sector(sector, container_writer);
  528. }
  529. auto container_reader = binary_reader<byte>(container);
  530. container_reader.offset(static_cast<std::size_t>(id) * short_sector_size());
  531. writer.append(container_reader, short_sector_size());
  532. }
  533. template <typename T>
  534. void compound_document::read_short_sector_chain(sector_id start, binary_writer<T> &writer)
  535. {
  536. for (auto link : follow_chain(start, ssat_))
  537. {
  538. read_short_sector(link, writer);
  539. }
  540. }
  541. template <typename T>
  542. void compound_document::read_short_sector_chain(sector_id start, binary_writer<T> &writer, sector_id offset, std::size_t count)
  543. {
  544. auto chain = follow_chain(start, ssat_);
  545. for (auto i = std::size_t(0); i < count; ++i)
  546. {
  547. read_short_sector(chain[offset + i], writer);
  548. }
  549. }
  550. sector_id compound_document::allocate_sector()
  551. {
  552. const auto sectors_per_sector = sector_size() / sizeof(sector_id);
  553. auto next_free_iter = std::find(sat_.begin(), sat_.end(), FreeSector);
  554. if (next_free_iter == sat_.end())
  555. {
  556. auto next_msat_index = header_.num_msat_sectors;
  557. auto new_sat_sector_id = sector_id(sat_.size());
  558. msat_.push_back(new_sat_sector_id);
  559. write_msat();
  560. header_.msat[msat_.size() - 1] = new_sat_sector_id;
  561. ++header_.num_msat_sectors;
  562. write_header();
  563. sat_.resize(sat_.size() + sectors_per_sector, FreeSector);
  564. sat_[static_cast<std::size_t>(new_sat_sector_id)] = SATSector;
  565. auto sat_reader = binary_reader<sector_id>(sat_);
  566. sat_reader.offset(next_msat_index * sectors_per_sector);
  567. write_sector(sat_reader, new_sat_sector_id);
  568. next_free_iter = std::find(sat_.begin(), sat_.end(), FreeSector);
  569. }
  570. auto next_free = sector_id(next_free_iter - sat_.begin());
  571. sat_[static_cast<std::size_t>(next_free)] = EndOfChain;
  572. write_sat();
  573. auto empty_sector = std::vector<byte>(sector_size());
  574. auto empty_sector_reader = binary_reader<byte>(empty_sector);
  575. write_sector(empty_sector_reader, next_free);
  576. return next_free;
  577. }
  578. sector_chain compound_document::allocate_sectors(std::size_t count)
  579. {
  580. if (count == std::size_t(0)) return {};
  581. auto chain = sector_chain();
  582. auto current = allocate_sector();
  583. for (auto i = std::size_t(1); i < count; ++i)
  584. {
  585. chain.push_back(current);
  586. auto next = allocate_sector();
  587. sat_[static_cast<std::size_t>(current)] = next;
  588. current = next;
  589. }
  590. chain.push_back(current);
  591. write_sat();
  592. return chain;
  593. }
  594. sector_chain compound_document::follow_chain(sector_id start, const sector_chain &table)
  595. {
  596. auto chain = sector_chain();
  597. auto current = start;
  598. while (current >= 0)
  599. {
  600. chain.push_back(current);
  601. current = table[static_cast<std::size_t>(current)];
  602. }
  603. return chain;
  604. }
  605. sector_chain compound_document::allocate_short_sectors(std::size_t count)
  606. {
  607. if (count == std::size_t(0)) return {};
  608. auto chain = sector_chain();
  609. auto current = allocate_short_sector();
  610. for (auto i = std::size_t(1); i < count; ++i)
  611. {
  612. chain.push_back(current);
  613. auto next = allocate_short_sector();
  614. ssat_[static_cast<std::size_t>(current)] = next;
  615. current = next;
  616. }
  617. chain.push_back(current);
  618. write_ssat();
  619. return chain;
  620. }
  621. sector_id compound_document::allocate_short_sector()
  622. {
  623. const auto sectors_per_sector = sector_size() / sizeof(sector_id);
  624. auto next_free_iter = std::find(ssat_.begin(), ssat_.end(), FreeSector);
  625. if (next_free_iter == ssat_.end())
  626. {
  627. auto new_ssat_sector_id = allocate_sector();
  628. if (header_.ssat_start < 0)
  629. {
  630. header_.ssat_start = new_ssat_sector_id;
  631. }
  632. else
  633. {
  634. auto ssat_chain = follow_chain(header_.ssat_start, sat_);
  635. sat_[static_cast<std::size_t>(ssat_chain.back())] = new_ssat_sector_id;
  636. write_sat();
  637. }
  638. write_header();
  639. auto old_size = ssat_.size();
  640. ssat_.resize(old_size + sectors_per_sector, FreeSector);
  641. auto ssat_reader = binary_reader<sector_id>(ssat_);
  642. ssat_reader.offset(old_size / sectors_per_sector);
  643. write_sector(ssat_reader, new_ssat_sector_id);
  644. next_free_iter = std::find(ssat_.begin(), ssat_.end(), FreeSector);
  645. }
  646. ++header_.num_short_sectors;
  647. write_header();
  648. auto next_free = sector_id(next_free_iter - ssat_.begin());
  649. ssat_[static_cast<std::size_t>(next_free)] = EndOfChain;
  650. write_ssat();
  651. const auto short_sectors_per_sector = sector_size() / short_sector_size();
  652. const auto required_container_sectors = static_cast<std::size_t>(next_free) / short_sectors_per_sector + std::size_t(1);
  653. if (required_container_sectors > 0)
  654. {
  655. if (entries_[0].start < 0)
  656. {
  657. entries_[0].start = allocate_sector();
  658. write_entry(0);
  659. }
  660. auto container_chain = follow_chain(entries_[0].start, sat_);
  661. if (required_container_sectors > container_chain.size())
  662. {
  663. sat_[static_cast<std::size_t>(container_chain.back())] = allocate_sector();
  664. write_sat();
  665. }
  666. }
  667. return next_free;
  668. }
  669. directory_id compound_document::next_empty_entry()
  670. {
  671. auto entry_id = directory_id(0);
  672. for (; entry_id < directory_id(entries_.size()); ++entry_id)
  673. {
  674. auto &entry = entries_[static_cast<std::size_t>(entry_id)];
  675. if (entry.type == compound_document_entry::entry_type::Empty)
  676. {
  677. return entry_id;
  678. }
  679. }
  680. // entry_id is now equal to entries_.size()
  681. if (header_.directory_start < 0)
  682. {
  683. header_.directory_start = allocate_sector();
  684. }
  685. else
  686. {
  687. auto directory_chain = follow_chain(header_.directory_start, sat_);
  688. sat_[static_cast<std::size_t>(directory_chain.back())] = allocate_sector();
  689. write_sat();
  690. }
  691. const auto entries_per_sector = sector_size()
  692. / sizeof(compound_document_entry);
  693. for (auto i = std::size_t(0); i < entries_per_sector; ++i)
  694. {
  695. auto empty_entry = compound_document_entry();
  696. empty_entry.type = compound_document_entry::entry_type::Empty;
  697. entries_.push_back(empty_entry);
  698. write_entry(entry_id + directory_id(i));
  699. }
  700. return entry_id;
  701. }
  702. directory_id compound_document::insert_entry(
  703. const std::string &name,
  704. compound_document_entry::entry_type type)
  705. {
  706. auto entry_id = next_empty_entry();
  707. auto &entry = entries_[static_cast<std::size_t>(entry_id)];
  708. auto parent_id = directory_id(0);
  709. auto split = split_path(name);
  710. auto filename = split.back();
  711. split.pop_back();
  712. if (split.size() > 1)
  713. {
  714. parent_id = find_entry(join_path(split), compound_document_entry::entry_type::UserStorage);
  715. if (parent_id < 0)
  716. {
  717. throw xlnt::exception("bad path");
  718. }
  719. parent_storage_[entry_id] = parent_id;
  720. }
  721. entry.name(filename);
  722. entry.type = type;
  723. tree_insert(entry_id, parent_id);
  724. write_directory();
  725. return entry_id;
  726. }
  727. std::size_t compound_document::sector_data_start()
  728. {
  729. return sizeof(compound_document_header);
  730. }
  731. bool compound_document::contains_entry(const std::string &path,
  732. compound_document_entry::entry_type type)
  733. {
  734. return find_entry(path, type) >= 0;
  735. }
  736. directory_id compound_document::find_entry(const std::string &name,
  737. compound_document_entry::entry_type type)
  738. {
  739. if (type == compound_document_entry::entry_type::RootStorage
  740. && (name == "/" || name == "/Root Entry")) return 0;
  741. auto entry_id = directory_id(0);
  742. for (auto &entry : entries_)
  743. {
  744. if (entry.type == type && tree_path(entry_id) == name)
  745. {
  746. return entry_id;
  747. }
  748. ++entry_id;
  749. }
  750. return End;
  751. }
  752. void compound_document::print_directory()
  753. {
  754. auto entry_id = directory_id(0);
  755. for (auto &entry : entries_)
  756. {
  757. if (entry.type == compound_document_entry::entry_type::UserStream)
  758. {
  759. std::cout << tree_path(entry_id) << std::endl;
  760. }
  761. ++entry_id;
  762. }
  763. }
  764. void compound_document::write_directory()
  765. {
  766. for (auto entry_id = std::size_t(0); entry_id < entries_.size(); ++entry_id)
  767. {
  768. write_entry(directory_id(entry_id));
  769. }
  770. }
  771. void compound_document::read_directory()
  772. {
  773. const auto entries_per_sector = sector_size() / sizeof(compound_document_entry);
  774. const auto num_entries = follow_chain(header_.directory_start, sat_).size() * entries_per_sector;
  775. for (auto entry_id = std::size_t(0); entry_id < num_entries; ++entry_id)
  776. {
  777. entries_.push_back(compound_document_entry());
  778. read_entry(directory_id(entry_id));
  779. }
  780. auto stack = std::vector<directory_id>();
  781. auto storage_siblings = std::vector<directory_id>();
  782. auto stream_siblings = std::vector<directory_id>();
  783. auto directory_stack = std::vector<directory_id>();
  784. directory_stack.push_back(directory_id(0));
  785. while (!directory_stack.empty())
  786. {
  787. auto current_storage_id = directory_stack.back();
  788. directory_stack.pop_back();
  789. if (tree_child(current_storage_id) < 0) continue;
  790. auto storage_stack = std::vector<directory_id>();
  791. auto storage_root_id = tree_child(current_storage_id);
  792. parent_[storage_root_id] = End;
  793. storage_stack.push_back(storage_root_id);
  794. while (!storage_stack.empty())
  795. {
  796. auto current_entry_id = storage_stack.back();
  797. auto current_entry = entries_[static_cast<std::size_t>(current_entry_id)];
  798. storage_stack.pop_back();
  799. parent_storage_[current_entry_id] = current_storage_id;
  800. if (current_entry.type == compound_document_entry::entry_type::UserStorage)
  801. {
  802. directory_stack.push_back(current_entry_id);
  803. }
  804. if (tree_left(current_entry_id) >= 0)
  805. {
  806. storage_stack.push_back(tree_left(current_entry_id));
  807. tree_parent(tree_left(current_entry_id)) = current_entry_id;
  808. }
  809. if (tree_right(current_entry_id) >= 0)
  810. {
  811. storage_stack.push_back(tree_right(current_entry_id));
  812. tree_parent(tree_right(current_entry_id)) = current_entry_id;
  813. }
  814. }
  815. }
  816. }
  817. void compound_document::tree_insert(directory_id new_id, directory_id storage_id)
  818. {
  819. using entry_color = compound_document_entry::entry_color;
  820. parent_storage_[new_id] = storage_id;
  821. tree_left(new_id) = End;
  822. tree_right(new_id) = End;
  823. if (tree_root(new_id) == End)
  824. {
  825. if (new_id != 0)
  826. {
  827. tree_root(new_id) = new_id;
  828. }
  829. tree_color(new_id) = entry_color::Black;
  830. tree_parent(new_id) = End;
  831. return;
  832. }
  833. // normal tree insert
  834. // (will probably unbalance the tree, fix after)
  835. auto x = tree_root(new_id);
  836. auto y = End;
  837. while (x >= 0)
  838. {
  839. y = x;
  840. if (compare_keys(tree_key(new_id), tree_key(x)) > 0)
  841. {
  842. x = tree_right(x);
  843. }
  844. else
  845. {
  846. x = tree_left(x);
  847. }
  848. }
  849. tree_parent(new_id) = y;
  850. if (compare_keys(tree_key(new_id), tree_key(y)) > 0)
  851. {
  852. tree_right(y) = new_id;
  853. }
  854. else
  855. {
  856. tree_left(y) = new_id;
  857. }
  858. tree_insert_fixup(new_id);
  859. }
  860. std::string compound_document::tree_path(directory_id id)
  861. {
  862. auto storage_id = parent_storage_[id];
  863. auto result = std::vector<std::string>();
  864. while (storage_id > 0)
  865. {
  866. storage_id = parent_storage_[storage_id];
  867. result.push_back(entries_[static_cast<std::size_t>(storage_id)].name());
  868. }
  869. return "/" + join_path(result) + entries_[static_cast<std::size_t>(id)].name();
  870. }
  871. void compound_document::tree_rotate_left(directory_id x)
  872. {
  873. auto y = tree_right(x);
  874. // turn y's left subtree into x's right subtree
  875. tree_right(x) = tree_left(y);
  876. if (tree_left(y) != End)
  877. {
  878. tree_parent(tree_left(y)) = x;
  879. }
  880. // link x's parent to y
  881. tree_parent(y) = tree_parent(x);
  882. if (tree_parent(x) == End)
  883. {
  884. tree_root(x) = y;
  885. }
  886. else if (x == tree_left(tree_parent(x)))
  887. {
  888. tree_left(tree_parent(x)) = y;
  889. }
  890. else
  891. {
  892. tree_right(tree_parent(x)) = y;
  893. }
  894. // put x on y's left
  895. tree_left(y) = x;
  896. tree_parent(x) = y;
  897. }
  898. void compound_document::tree_rotate_right(directory_id y)
  899. {
  900. auto x = tree_left(y);
  901. // turn x's right subtree into y's left subtree
  902. tree_left(y) = tree_right(x);
  903. if (tree_right(x) != End)
  904. {
  905. tree_parent(tree_right(x)) = y;
  906. }
  907. // link y's parent to x
  908. tree_parent(x) = tree_parent(y);
  909. if (tree_parent(y) == End)
  910. {
  911. tree_root(y) = x;
  912. }
  913. else if (y == tree_left(tree_parent(y)))
  914. {
  915. tree_left(tree_parent(y)) = x;
  916. }
  917. else
  918. {
  919. tree_right(tree_parent(y)) = x;
  920. }
  921. // put y on x's right
  922. tree_right(x) = y;
  923. tree_parent(y) = x;
  924. }
  925. void compound_document::tree_insert_fixup(directory_id x)
  926. {
  927. using entry_color = compound_document_entry::entry_color;
  928. tree_color(x) = entry_color::Red;
  929. while (x != tree_root(x) && tree_color(tree_parent(x)) == entry_color::Red)
  930. {
  931. if (tree_parent(x) == tree_left(tree_parent(tree_parent(x))))
  932. {
  933. auto y = tree_right(tree_parent(tree_parent(x)));
  934. if (y >= 0 && tree_color(y) == entry_color::Red)
  935. {
  936. // case 1
  937. tree_color(tree_parent(x)) = entry_color::Black;
  938. tree_color(y) = entry_color::Black;
  939. tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
  940. x = tree_parent(tree_parent(x));
  941. }
  942. else
  943. {
  944. if (x == tree_right(tree_parent(x)))
  945. {
  946. // case 2
  947. x = tree_parent(x);
  948. tree_rotate_left(x);
  949. }
  950. // case 3
  951. tree_color(tree_parent(x)) = entry_color::Black;
  952. tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
  953. tree_rotate_right(tree_parent(tree_parent(x)));
  954. }
  955. }
  956. else // same as above with left and right switched
  957. {
  958. auto y = tree_left(tree_parent(tree_parent(x)));
  959. if (y >= 0 && tree_color(y) == entry_color::Red)
  960. {
  961. //case 1
  962. tree_color(tree_parent(x)) = entry_color::Black;
  963. tree_color(y) = entry_color::Black;
  964. tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
  965. x = tree_parent(tree_parent(x));
  966. }
  967. else
  968. {
  969. if (x == tree_left(tree_parent(x)))
  970. {
  971. // case 2
  972. x = tree_parent(x);
  973. tree_rotate_right(x);
  974. }
  975. // case 3
  976. tree_color(tree_parent(x)) = entry_color::Black;
  977. tree_color(tree_parent(tree_parent(x))) = entry_color::Red;
  978. tree_rotate_left(tree_parent(tree_parent(x)));
  979. }
  980. }
  981. }
  982. tree_color(tree_root(x)) = entry_color::Black;
  983. }
  984. directory_id &compound_document::tree_left(directory_id id)
  985. {
  986. return entries_[static_cast<std::size_t>(id)].prev;
  987. }
  988. directory_id &compound_document::tree_right(directory_id id)
  989. {
  990. return entries_[static_cast<std::size_t>(id)].next;
  991. }
  992. directory_id &compound_document::tree_parent(directory_id id)
  993. {
  994. return parent_[id];
  995. }
  996. directory_id &compound_document::tree_root(directory_id id)
  997. {
  998. return tree_child(parent_storage_[id]);
  999. }
  1000. directory_id &compound_document::tree_child(directory_id id)
  1001. {
  1002. return entries_[static_cast<std::size_t>(id)].child;
  1003. }
  1004. std::string compound_document::tree_key(directory_id id)
  1005. {
  1006. return entries_[static_cast<std::size_t>(id)].name();
  1007. }
  1008. compound_document_entry::entry_color &compound_document::tree_color(directory_id id)
  1009. {
  1010. return entries_[static_cast<std::size_t>(id)].color;
  1011. }
  1012. void compound_document::read_header()
  1013. {
  1014. in_->seekg(0, std::ios::beg);
  1015. in_->read(reinterpret_cast<char *>(&header_), sizeof(compound_document_header));
  1016. }
  1017. void compound_document::read_msat()
  1018. {
  1019. msat_.clear();
  1020. auto msat_sector = header_.extra_msat_start;
  1021. auto msat_writer = binary_writer<sector_id>(msat_);
  1022. for (auto i = std::uint32_t(0); i < header_.num_msat_sectors; ++i)
  1023. {
  1024. if (i < std::uint32_t(109))
  1025. {
  1026. msat_writer.write(header_.msat[i]);
  1027. }
  1028. else
  1029. {
  1030. read_sector(msat_sector, msat_writer);
  1031. msat_sector = msat_.back();
  1032. msat_.pop_back();
  1033. }
  1034. }
  1035. }
  1036. void compound_document::read_sat()
  1037. {
  1038. sat_.clear();
  1039. auto sat_writer = binary_writer<sector_id>(sat_);
  1040. for (auto msat_sector : msat_)
  1041. {
  1042. read_sector(msat_sector, sat_writer);
  1043. }
  1044. }
  1045. void compound_document::read_ssat()
  1046. {
  1047. ssat_.clear();
  1048. auto ssat_writer = binary_writer<sector_id>(ssat_);
  1049. for (auto ssat_sector : follow_chain(header_.ssat_start, sat_))
  1050. {
  1051. read_sector(ssat_sector, ssat_writer);
  1052. }
  1053. }
  1054. void compound_document::read_entry(directory_id id)
  1055. {
  1056. const auto directory_chain = follow_chain(header_.directory_start, sat_);
  1057. const auto entries_per_sector = sector_size() / sizeof(compound_document_entry);
  1058. const auto directory_sector = directory_chain[static_cast<std::size_t>(id) / entries_per_sector];
  1059. const auto offset = sector_size() * static_cast<std::size_t>(directory_sector)
  1060. + ((static_cast<std::size_t>(id) % entries_per_sector) * sizeof(compound_document_entry));
  1061. in_->seekg(static_cast<std::ptrdiff_t>(sector_data_start() + offset), std::ios::beg);
  1062. in_->read(reinterpret_cast<char *>(&entries_[static_cast<std::size_t>(id)]), sizeof(compound_document_entry));
  1063. }
  1064. void compound_document::write_header()
  1065. {
  1066. out_->seekp(0, std::ios::beg);
  1067. out_->write(reinterpret_cast<char *>(&header_), sizeof(compound_document_header));
  1068. }
  1069. void compound_document::write_msat()
  1070. {
  1071. auto msat_sector = header_.extra_msat_start;
  1072. for (auto i = std::uint32_t(0); i < header_.num_msat_sectors; ++i)
  1073. {
  1074. if (i < std::uint32_t(109))
  1075. {
  1076. header_.msat[i] = msat_[i];
  1077. }
  1078. else
  1079. {
  1080. auto sector = std::vector<sector_id>();
  1081. auto sector_writer = binary_writer<sector_id>(sector);
  1082. read_sector(msat_sector, sector_writer);
  1083. msat_sector = sector.back();
  1084. sector.pop_back();
  1085. std::copy(sector.begin(), sector.end(), std::back_inserter(msat_));
  1086. }
  1087. }
  1088. }
  1089. void compound_document::write_sat()
  1090. {
  1091. auto sector_reader = binary_reader<sector_id>(sat_);
  1092. for (auto sat_sector : msat_)
  1093. {
  1094. write_sector(sector_reader, sat_sector);
  1095. }
  1096. }
  1097. void compound_document::write_ssat()
  1098. {
  1099. auto sector_reader = binary_reader<sector_id>(ssat_);
  1100. for (auto ssat_sector : follow_chain(header_.ssat_start, sat_))
  1101. {
  1102. write_sector(sector_reader, ssat_sector);
  1103. }
  1104. }
  1105. void compound_document::write_entry(directory_id id)
  1106. {
  1107. const auto directory_chain = follow_chain(header_.directory_start, sat_);
  1108. const auto entries_per_sector = sector_size() / sizeof(compound_document_entry);
  1109. const auto directory_sector = directory_chain[static_cast<std::size_t>(id) / entries_per_sector];
  1110. const auto offset = sector_data_start() + sector_size() * static_cast<std::size_t>(directory_sector)
  1111. + ((static_cast<std::size_t>(id) % entries_per_sector) * sizeof(compound_document_entry));
  1112. out_->seekp(static_cast<std::ptrdiff_t>(offset), std::ios::beg);
  1113. out_->write(reinterpret_cast<char *>(&entries_[static_cast<std::size_t>(id)]), sizeof(compound_document_entry));
  1114. }
  1115. } // namespace detail
  1116. } // namespace xlnt