zstream.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631
  1. /*
  2. PARTIO SOFTWARE
  3. Copyright 2010 Disney Enterprises, Inc. All rights reserved
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. * Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. * Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. * The names "Disney", "Walt Disney Pictures", "Walt Disney Animation
  14. Studios" or the names of its contributors may NOT be used to
  15. endorse or promote products derived from this software without
  16. specific prior written permission from Walt Disney Pictures.
  17. Disclaimer: THIS SOFTWARE IS PROVIDED BY WALT DISNEY PICTURES AND
  18. CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
  19. BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
  20. FOR A PARTICULAR PURPOSE, NONINFRINGEMENT AND TITLE ARE DISCLAIMED.
  21. IN NO EVENT SHALL WALT DISNEY PICTURES, THE COPYRIGHT HOLDER OR
  22. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  23. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  24. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  25. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND BASED ON ANY
  26. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
  29. */
  30. #include <algorithm>
  31. #include <array>
  32. #include <cassert>
  33. #include <cstring>
  34. #include <fstream>
  35. #include <iomanip>
  36. #include <iostream>
  37. #include <iterator> // for std::back_inserter
  38. #include <stdexcept>
  39. #include <string>
  40. #include <miniz.h>
  41. #include <xlnt/utils/exceptions.hpp>
  42. #include <detail/serialization/vector_streambuf.hpp>
  43. #include <detail/serialization/zstream.hpp>
  44. namespace {
  45. template <class T>
  46. T read_int(std::istream &stream)
  47. {
  48. T value;
  49. stream.read(reinterpret_cast<char *>(&value), sizeof(T));
  50. return value;
  51. }
  52. template <class T>
  53. void write_int(std::ostream &stream, T value)
  54. {
  55. stream.write(reinterpret_cast<char *>(&value), sizeof(T));
  56. }
  57. xlnt::detail::zheader read_header(std::istream &istream, const bool global)
  58. {
  59. xlnt::detail::zheader header;
  60. auto sig = read_int<std::uint32_t>(istream);
  61. // read and check for local/global magic
  62. if (global)
  63. {
  64. if (sig != 0x02014b50)
  65. {
  66. throw xlnt::exception("missing global header signature");
  67. }
  68. header.version = read_int<std::uint16_t>(istream);
  69. }
  70. else if (sig != 0x04034b50)
  71. {
  72. throw xlnt::exception("missing local header signature");
  73. }
  74. // Read rest of header
  75. header.version = read_int<std::uint16_t>(istream);
  76. header.flags = read_int<std::uint16_t>(istream);
  77. header.compression_type = read_int<std::uint16_t>(istream);
  78. header.stamp_date = read_int<std::uint16_t>(istream);
  79. header.stamp_time = read_int<std::uint16_t>(istream);
  80. header.crc = read_int<std::uint32_t>(istream);
  81. header.compressed_size = read_int<std::uint32_t>(istream);
  82. header.uncompressed_size = read_int<std::uint32_t>(istream);
  83. auto filename_length = read_int<std::uint16_t>(istream);
  84. auto extra_length = read_int<std::uint16_t>(istream);
  85. std::uint16_t comment_length = 0;
  86. if (global)
  87. {
  88. comment_length = read_int<std::uint16_t>(istream);
  89. /*std::uint16_t disk_number_start = */ read_int<std::uint16_t>(istream);
  90. /*std::uint16_t int_file_attrib = */ read_int<std::uint16_t>(istream);
  91. /*std::uint32_t ext_file_attrib = */ read_int<std::uint32_t>(istream);
  92. header.header_offset = read_int<std::uint32_t>(istream);
  93. }
  94. header.filename.resize(filename_length, '\0');
  95. istream.read(&header.filename[0], filename_length);
  96. header.extra.resize(extra_length, 0);
  97. istream.read(reinterpret_cast<char *>(header.extra.data()), extra_length);
  98. if (global)
  99. {
  100. header.comment.resize(comment_length, '\0');
  101. istream.read(&header.comment[0], comment_length);
  102. }
  103. return header;
  104. }
  105. void write_header(const xlnt::detail::zheader &header, std::ostream &ostream, const bool global)
  106. {
  107. if (global)
  108. {
  109. write_int(ostream, static_cast<std::uint32_t>(0x02014b50)); // header sig
  110. write_int(ostream, static_cast<std::uint16_t>(20)); // version made by
  111. }
  112. else
  113. {
  114. write_int(ostream, static_cast<std::uint32_t>(0x04034b50));
  115. }
  116. write_int(ostream, header.version);
  117. write_int(ostream, header.flags);
  118. write_int(ostream, header.compression_type);
  119. write_int(ostream, header.stamp_date);
  120. write_int(ostream, header.stamp_time);
  121. write_int(ostream, header.crc);
  122. write_int(ostream, header.compressed_size);
  123. write_int(ostream, header.uncompressed_size);
  124. write_int(ostream, static_cast<std::uint16_t>(header.filename.length()));
  125. write_int(ostream, static_cast<std::uint16_t>(0)); // extra lengthx
  126. if (global)
  127. {
  128. write_int(ostream, static_cast<std::uint16_t>(0)); // filecomment
  129. write_int(ostream, static_cast<std::uint16_t>(0)); // disk# start
  130. write_int(ostream, static_cast<std::uint16_t>(0)); // internal file
  131. write_int(ostream, static_cast<std::uint32_t>(0)); // ext final
  132. write_int(ostream, static_cast<std::uint32_t>(header.header_offset)); // rel offset
  133. }
  134. for (auto c : header.filename)
  135. {
  136. write_int(ostream, c);
  137. }
  138. }
  139. } // namespace
  140. namespace xlnt {
  141. namespace detail {
  142. static const std::size_t buffer_size = 512;
  143. class zip_streambuf_decompress : public std::streambuf
  144. {
  145. std::istream &istream;
  146. z_stream strm;
  147. std::array<char, buffer_size> in;
  148. std::array<char, buffer_size> out;
  149. zheader header;
  150. std::size_t total_read;
  151. std::size_t total_uncompressed;
  152. bool valid;
  153. bool compressed_data;
  154. static const unsigned short DEFLATE = 8;
  155. static const unsigned short UNCOMPRESSED = 0;
  156. public:
  157. zip_streambuf_decompress(std::istream &stream, zheader central_header)
  158. : istream(stream), header(central_header), total_read(0), total_uncompressed(0), valid(true)
  159. {
  160. in.fill(0);
  161. out.fill(0);
  162. strm.zalloc = nullptr;
  163. strm.zfree = nullptr;
  164. strm.opaque = nullptr;
  165. strm.avail_in = 0;
  166. strm.next_in = nullptr;
  167. setg(in.data(), in.data(), in.data());
  168. setp(nullptr, nullptr);
  169. // skip the header
  170. read_header(istream, false);
  171. if (header.compression_type == DEFLATE)
  172. {
  173. compressed_data = true;
  174. }
  175. else if (header.compression_type == UNCOMPRESSED)
  176. {
  177. compressed_data = false;
  178. }
  179. else
  180. {
  181. compressed_data = false;
  182. throw xlnt::exception("unsupported compression type, should be DEFLATE or uncompressed");
  183. }
  184. // initialize the inflate
  185. if (compressed_data && valid)
  186. {
  187. #pragma clang diagnostic push
  188. #pragma clang diagnostic ignored "-Wold-style-cast"
  189. int result = inflateInit2(&strm, -MAX_WBITS);
  190. #pragma clang diagnostic pop
  191. if (result != Z_OK)
  192. {
  193. throw xlnt::exception("couldn't inflate ZIP, possibly corrupted");
  194. }
  195. }
  196. header = central_header;
  197. }
  198. ~zip_streambuf_decompress() override
  199. {
  200. if (compressed_data && valid)
  201. {
  202. inflateEnd(&strm);
  203. }
  204. }
  205. int process()
  206. {
  207. if (!valid) return -1;
  208. if (compressed_data)
  209. {
  210. strm.avail_out = buffer_size - 4;
  211. strm.next_out = reinterpret_cast<Bytef *>(out.data() + 4);
  212. while (strm.avail_out != 0)
  213. {
  214. if (strm.avail_in == 0)
  215. {
  216. // buffer empty, read some more from file
  217. istream.read(in.data(),
  218. static_cast<std::streamsize>(std::min(buffer_size, header.compressed_size - total_read)));
  219. strm.avail_in = static_cast<unsigned int>(istream.gcount());
  220. total_read += strm.avail_in;
  221. strm.next_in = reinterpret_cast<Bytef *>(in.data());
  222. }
  223. const auto ret = inflate(&strm, Z_NO_FLUSH); // decompress
  224. if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR)
  225. {
  226. throw xlnt::exception("couldn't inflate ZIP, possibly corrupted");
  227. }
  228. if (ret == Z_STREAM_END) break;
  229. }
  230. auto unzip_count = buffer_size - strm.avail_out - 4;
  231. total_uncompressed += unzip_count;
  232. return static_cast<int>(unzip_count);
  233. }
  234. // uncompressed, so just read
  235. istream.read(out.data() + 4,
  236. static_cast<std::streamsize>(std::min(buffer_size - 4, header.uncompressed_size - total_read)));
  237. auto count = istream.gcount();
  238. total_read += static_cast<std::size_t>(count);
  239. return static_cast<int>(count);
  240. }
  241. virtual int underflow() override
  242. {
  243. if (gptr() && (gptr() < egptr()))
  244. return traits_type::to_int_type(*gptr()); // if we already have data just use it
  245. auto put_back_count = gptr() - eback();
  246. if (put_back_count > 4) put_back_count = 4;
  247. std::memmove(
  248. out.data() + (4 - put_back_count), gptr() - put_back_count, static_cast<std::size_t>(put_back_count));
  249. int num = process();
  250. setg(out.data() + 4 - put_back_count, out.data() + 4, out.data() + 4 + num);
  251. if (num <= 0) return EOF;
  252. return traits_type::to_int_type(*gptr());
  253. }
  254. virtual int overflow(int c = EOF) override;
  255. };
  256. int zip_streambuf_decompress::overflow(int)
  257. {
  258. throw xlnt::exception("writing to read-only buffer");
  259. }
  260. class zip_streambuf_compress : public std::streambuf
  261. {
  262. std::ostream &ostream; // owned when header==0 (when not part of zip file)
  263. z_stream strm;
  264. std::array<char, buffer_size> in;
  265. std::array<char, buffer_size> out;
  266. zheader *header;
  267. std::uint32_t uncompressed_size;
  268. std::uint32_t crc;
  269. bool valid;
  270. public:
  271. zip_streambuf_compress(zheader *central_header, std::ostream &stream)
  272. : ostream(stream), header(central_header), valid(true)
  273. {
  274. strm.zalloc = nullptr;
  275. strm.zfree = nullptr;
  276. strm.opaque = nullptr;
  277. #pragma clang diagnostic push
  278. #pragma clang diagnostic ignored "-Wold-style-cast"
  279. int ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS, 8, Z_DEFAULT_STRATEGY);
  280. #pragma clang diagnostic pop
  281. if (ret != Z_OK)
  282. {
  283. std::cerr << "libz: failed to deflateInit" << std::endl;
  284. valid = false;
  285. return;
  286. }
  287. setg(nullptr, nullptr, nullptr);
  288. setp(in.data(), in.data() + buffer_size - 4); // we want to be 4 aligned
  289. // Write appropriate header
  290. if (header)
  291. {
  292. header->header_offset = static_cast<std::uint32_t>(stream.tellp());
  293. write_header(*header, ostream, false);
  294. }
  295. uncompressed_size = crc = 0;
  296. }
  297. virtual ~zip_streambuf_compress() override
  298. {
  299. if (valid)
  300. {
  301. process(true);
  302. deflateEnd(&strm);
  303. if (header)
  304. {
  305. auto final_position = ostream.tellp();
  306. header->uncompressed_size = uncompressed_size;
  307. header->crc = crc;
  308. ostream.seekp(header->header_offset);
  309. write_header(*header, ostream, false);
  310. ostream.seekp(final_position);
  311. }
  312. else
  313. {
  314. write_int(ostream, crc);
  315. write_int(ostream, uncompressed_size);
  316. }
  317. }
  318. if (!header) delete &ostream;
  319. }
  320. protected:
  321. int process(bool flush)
  322. {
  323. if (!valid) return -1;
  324. strm.next_in = reinterpret_cast<Bytef *>(pbase());
  325. strm.avail_in = static_cast<unsigned int>(pptr() - pbase());
  326. while (strm.avail_in != 0 || flush)
  327. {
  328. strm.avail_out = buffer_size;
  329. strm.next_out = reinterpret_cast<Bytef *>(out.data());
  330. int ret = deflate(&strm, flush ? Z_FINISH : Z_NO_FLUSH);
  331. if (!(ret != Z_BUF_ERROR && ret != Z_STREAM_ERROR))
  332. {
  333. valid = false;
  334. std::cerr << "gzip: gzip error " << strm.msg << std::endl;
  335. return -1;
  336. }
  337. auto generated_output = static_cast<int>(strm.next_out - reinterpret_cast<std::uint8_t *>(out.data()));
  338. ostream.write(out.data(), generated_output);
  339. if (header) header->compressed_size += static_cast<std::uint32_t>(generated_output);
  340. if (ret == Z_STREAM_END) break;
  341. }
  342. // update counts, crc's and buffers
  343. auto consumed_input = static_cast<std::uint32_t>(pptr() - pbase());
  344. uncompressed_size += consumed_input;
  345. crc = static_cast<std::uint32_t>(crc32(crc, reinterpret_cast<Bytef *>(in.data()), consumed_input));
  346. setp(pbase(), pbase() + buffer_size - 4);
  347. return 1;
  348. }
  349. virtual int sync() override
  350. {
  351. if (pptr() && pptr() > pbase()) return process(false);
  352. return 0;
  353. }
  354. virtual int underflow() override
  355. {
  356. throw xlnt::exception("Attempt to read write only ostream");
  357. }
  358. virtual int overflow(int c = EOF) override;
  359. };
  360. int zip_streambuf_compress::overflow(int c)
  361. {
  362. if (c != EOF)
  363. {
  364. *pptr() = static_cast<char>(c);
  365. pbump(1);
  366. }
  367. if (process(false) == EOF) return EOF;
  368. return c;
  369. }
  370. ozstream::ozstream(std::ostream &stream)
  371. : destination_stream_(stream)
  372. {
  373. if (!destination_stream_)
  374. {
  375. throw xlnt::exception("bad zip stream");
  376. }
  377. }
  378. ozstream::~ozstream()
  379. {
  380. // Write all file headers
  381. auto final_position = destination_stream_.tellp();
  382. for (const auto &header : file_headers_)
  383. {
  384. write_header(header, destination_stream_, true);
  385. }
  386. auto central_end = destination_stream_.tellp();
  387. // Write end of central
  388. write_int(destination_stream_, static_cast<std::uint32_t>(0x06054b50)); // end of central
  389. write_int(destination_stream_, static_cast<std::uint16_t>(0)); // this disk number
  390. write_int(destination_stream_, static_cast<std::uint16_t>(0)); // this disk number
  391. write_int(destination_stream_, static_cast<std::uint16_t>(file_headers_.size())); // one entry in center in this disk
  392. write_int(destination_stream_, static_cast<std::uint16_t>(file_headers_.size())); // one entry in center
  393. write_int(destination_stream_, static_cast<std::uint32_t>(central_end - final_position)); // size of header
  394. write_int(destination_stream_, static_cast<std::uint32_t>(final_position)); // offset to header
  395. write_int(destination_stream_, static_cast<std::uint16_t>(0)); // zip comment
  396. }
  397. std::unique_ptr<std::streambuf> ozstream::open(const path &filename)
  398. {
  399. zheader header;
  400. header.filename = filename.string();
  401. file_headers_.push_back(header);
  402. auto buffer = new zip_streambuf_compress(&file_headers_.back(), destination_stream_);
  403. return std::unique_ptr<zip_streambuf_compress>(buffer);
  404. }
  405. izstream::izstream(std::istream &stream)
  406. : source_stream_(stream)
  407. {
  408. if (!stream)
  409. {
  410. throw xlnt::exception("Invalid file handle");
  411. }
  412. read_central_header();
  413. }
  414. izstream::~izstream()
  415. {
  416. }
  417. bool izstream::read_central_header()
  418. {
  419. // Find the header
  420. // NOTE: this assumes the zip file header is the last thing written to file...
  421. source_stream_.seekg(0, std::ios_base::end);
  422. auto end_position = source_stream_.tellg();
  423. auto max_comment_size = std::uint32_t(0xffff); // max size of header
  424. auto read_size_before_comment = std::uint32_t(22);
  425. std::streamoff read_start = max_comment_size + read_size_before_comment;
  426. if (read_start > end_position)
  427. {
  428. read_start = end_position;
  429. }
  430. source_stream_.seekg(end_position - read_start);
  431. std::vector<std::uint8_t> buf(static_cast<std::size_t>(read_start), '\0');
  432. if (read_start <= 0)
  433. {
  434. throw xlnt::exception("file is empty");
  435. }
  436. source_stream_.read(reinterpret_cast<char *>(buf.data()), read_start);
  437. if (buf[0] == 0xd0 && buf[1] == 0xcf && buf[2] == 0x11 && buf[3] == 0xe0
  438. && buf[4] == 0xa1 && buf[5] == 0xb1 && buf[6] == 0x1a && buf[7] == 0xe1)
  439. {
  440. throw xlnt::exception("encrypted xlsx, password required");
  441. }
  442. auto found_header = false;
  443. std::streamoff header_index = 0;
  444. for (std::streamoff i = 0; i < read_start - 3; ++i)
  445. {
  446. if (buf[static_cast<std::size_t>(i)] == 0x50
  447. && buf[static_cast<std::size_t>(i) + 1] == 0x4b
  448. && buf[static_cast<std::size_t>(i) + 2] == 0x05
  449. && buf[static_cast<std::size_t>(i) + 3] == 0x06)
  450. {
  451. found_header = true;
  452. header_index = i;
  453. break;
  454. }
  455. }
  456. if (!found_header)
  457. {
  458. throw xlnt::exception("failed to find zip header");
  459. }
  460. // seek to end of central header and read
  461. source_stream_.seekg(end_position - (read_start - header_index));
  462. /*auto word = */ read_int<std::uint32_t>(source_stream_);
  463. auto disk_number1 = read_int<std::uint16_t>(source_stream_);
  464. auto disk_number2 = read_int<std::uint16_t>(source_stream_);
  465. if (disk_number1 != disk_number2 || disk_number1 != 0)
  466. {
  467. throw xlnt::exception("multiple disk zip files are not supported");
  468. }
  469. auto num_files = read_int<std::uint16_t>(source_stream_); // one entry in center in this disk
  470. auto num_files_this_disk = read_int<std::uint16_t>(source_stream_); // one entry in center
  471. if (num_files != num_files_this_disk)
  472. {
  473. throw xlnt::exception("multi disk zip files are not supported");
  474. }
  475. /*auto size_of_header = */ read_int<std::uint32_t>(source_stream_); // size of header
  476. auto header_offset = read_int<std::uint32_t>(source_stream_); // offset to header
  477. // go to header and read all file headers
  478. source_stream_.seekg(header_offset);
  479. for (std::uint16_t i = 0; i < num_files; ++i)
  480. {
  481. auto header = read_header(source_stream_, true);
  482. file_headers_[header.filename] = header;
  483. }
  484. return true;
  485. }
  486. std::unique_ptr<std::streambuf> izstream::open(const path &filename) const
  487. {
  488. if (!has_file(filename))
  489. {
  490. throw xlnt::exception("file not found");
  491. }
  492. auto header = file_headers_.at(filename.string());
  493. source_stream_.seekg(header.header_offset);
  494. auto buffer = new zip_streambuf_decompress(source_stream_, header);
  495. return std::unique_ptr<zip_streambuf_decompress>(buffer);
  496. }
  497. std::string izstream::read(const path &filename) const
  498. {
  499. auto buffer = open(filename);
  500. std::istream stream(buffer.get());
  501. auto bytes = to_vector(stream);
  502. return std::string(bytes.begin(), bytes.end());
  503. }
  504. std::vector<path> izstream::files() const
  505. {
  506. std::vector<path> filenames;
  507. std::transform(file_headers_.begin(), file_headers_.end(), std::back_inserter(filenames),
  508. [](const std::pair<std::string, zheader> &h) { return path(h.first); });
  509. return filenames;
  510. }
  511. bool izstream::has_file(const path &filename) const
  512. {
  513. return file_headers_.count(filename.string()) != 0;
  514. }
  515. } // namespace detail
  516. } // namespace xlnt