123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631 |
- /*
- PARTIO SOFTWARE
- Copyright 2010 Disney Enterprises, Inc. All rights reserved
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
- * The names "Disney", "Walt Disney Pictures", "Walt Disney Animation
- Studios" or the names of its contributors may NOT be used to
- endorse or promote products derived from this software without
- specific prior written permission from Walt Disney Pictures.
- Disclaimer: THIS SOFTWARE IS PROVIDED BY WALT DISNEY PICTURES AND
- CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
- BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
- FOR A PARTICULAR PURPOSE, NONINFRINGEMENT AND TITLE ARE DISCLAIMED.
- IN NO EVENT SHALL WALT DISNEY PICTURES, THE COPYRIGHT HOLDER OR
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND BASED ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
- */
- #include <algorithm>
- #include <array>
- #include <cassert>
- #include <cstring>
- #include <fstream>
- #include <iomanip>
- #include <iostream>
- #include <iterator> // for std::back_inserter
- #include <stdexcept>
- #include <string>
- #include <miniz.h>
- #include <xlnt/utils/exceptions.hpp>
- #include <detail/serialization/vector_streambuf.hpp>
- #include <detail/serialization/zstream.hpp>
- namespace {
- template <class T>
- T read_int(std::istream &stream)
- {
- T value;
- stream.read(reinterpret_cast<char *>(&value), sizeof(T));
- return value;
- }
- template <class T>
- void write_int(std::ostream &stream, T value)
- {
- stream.write(reinterpret_cast<char *>(&value), sizeof(T));
- }
- xlnt::detail::zheader read_header(std::istream &istream, const bool global)
- {
- xlnt::detail::zheader header;
- auto sig = read_int<std::uint32_t>(istream);
- // read and check for local/global magic
- if (global)
- {
- if (sig != 0x02014b50)
- {
- throw xlnt::exception("missing global header signature");
- }
- header.version = read_int<std::uint16_t>(istream);
- }
- else if (sig != 0x04034b50)
- {
- throw xlnt::exception("missing local header signature");
- }
- // Read rest of header
- header.version = read_int<std::uint16_t>(istream);
- header.flags = read_int<std::uint16_t>(istream);
- header.compression_type = read_int<std::uint16_t>(istream);
- header.stamp_date = read_int<std::uint16_t>(istream);
- header.stamp_time = read_int<std::uint16_t>(istream);
- header.crc = read_int<std::uint32_t>(istream);
- header.compressed_size = read_int<std::uint32_t>(istream);
- header.uncompressed_size = read_int<std::uint32_t>(istream);
- auto filename_length = read_int<std::uint16_t>(istream);
- auto extra_length = read_int<std::uint16_t>(istream);
- std::uint16_t comment_length = 0;
- if (global)
- {
- comment_length = read_int<std::uint16_t>(istream);
- /*std::uint16_t disk_number_start = */ read_int<std::uint16_t>(istream);
- /*std::uint16_t int_file_attrib = */ read_int<std::uint16_t>(istream);
- /*std::uint32_t ext_file_attrib = */ read_int<std::uint32_t>(istream);
- header.header_offset = read_int<std::uint32_t>(istream);
- }
- header.filename.resize(filename_length, '\0');
- istream.read(&header.filename[0], filename_length);
- header.extra.resize(extra_length, 0);
- istream.read(reinterpret_cast<char *>(header.extra.data()), extra_length);
- if (global)
- {
- header.comment.resize(comment_length, '\0');
- istream.read(&header.comment[0], comment_length);
- }
- return header;
- }
- void write_header(const xlnt::detail::zheader &header, std::ostream &ostream, const bool global)
- {
- if (global)
- {
- write_int(ostream, static_cast<std::uint32_t>(0x02014b50)); // header sig
- write_int(ostream, static_cast<std::uint16_t>(20)); // version made by
- }
- else
- {
- write_int(ostream, static_cast<std::uint32_t>(0x04034b50));
- }
- write_int(ostream, header.version);
- write_int(ostream, header.flags);
- write_int(ostream, header.compression_type);
- write_int(ostream, header.stamp_date);
- write_int(ostream, header.stamp_time);
- write_int(ostream, header.crc);
- write_int(ostream, header.compressed_size);
- write_int(ostream, header.uncompressed_size);
- write_int(ostream, static_cast<std::uint16_t>(header.filename.length()));
- write_int(ostream, static_cast<std::uint16_t>(0)); // extra lengthx
- if (global)
- {
- write_int(ostream, static_cast<std::uint16_t>(0)); // filecomment
- write_int(ostream, static_cast<std::uint16_t>(0)); // disk# start
- write_int(ostream, static_cast<std::uint16_t>(0)); // internal file
- write_int(ostream, static_cast<std::uint32_t>(0)); // ext final
- write_int(ostream, static_cast<std::uint32_t>(header.header_offset)); // rel offset
- }
- for (auto c : header.filename)
- {
- write_int(ostream, c);
- }
- }
- } // namespace
- namespace xlnt {
- namespace detail {
- static const std::size_t buffer_size = 512;
- class zip_streambuf_decompress : public std::streambuf
- {
- std::istream &istream;
- z_stream strm;
- std::array<char, buffer_size> in;
- std::array<char, buffer_size> out;
- zheader header;
- std::size_t total_read;
- std::size_t total_uncompressed;
- bool valid;
- bool compressed_data;
- static const unsigned short DEFLATE = 8;
- static const unsigned short UNCOMPRESSED = 0;
- public:
- zip_streambuf_decompress(std::istream &stream, zheader central_header)
- : istream(stream), header(central_header), total_read(0), total_uncompressed(0), valid(true)
- {
- in.fill(0);
- out.fill(0);
- strm.zalloc = nullptr;
- strm.zfree = nullptr;
- strm.opaque = nullptr;
- strm.avail_in = 0;
- strm.next_in = nullptr;
- setg(in.data(), in.data(), in.data());
- setp(nullptr, nullptr);
- // skip the header
- read_header(istream, false);
- if (header.compression_type == DEFLATE)
- {
- compressed_data = true;
- }
- else if (header.compression_type == UNCOMPRESSED)
- {
- compressed_data = false;
- }
- else
- {
- compressed_data = false;
- throw xlnt::exception("unsupported compression type, should be DEFLATE or uncompressed");
- }
- // initialize the inflate
- if (compressed_data && valid)
- {
- #pragma clang diagnostic push
- #pragma clang diagnostic ignored "-Wold-style-cast"
- int result = inflateInit2(&strm, -MAX_WBITS);
- #pragma clang diagnostic pop
- if (result != Z_OK)
- {
- throw xlnt::exception("couldn't inflate ZIP, possibly corrupted");
- }
- }
- header = central_header;
- }
- ~zip_streambuf_decompress() override
- {
- if (compressed_data && valid)
- {
- inflateEnd(&strm);
- }
- }
- int process()
- {
- if (!valid) return -1;
- if (compressed_data)
- {
- strm.avail_out = buffer_size - 4;
- strm.next_out = reinterpret_cast<Bytef *>(out.data() + 4);
- while (strm.avail_out != 0)
- {
- if (strm.avail_in == 0)
- {
- // buffer empty, read some more from file
- istream.read(in.data(),
- static_cast<std::streamsize>(std::min(buffer_size, header.compressed_size - total_read)));
- strm.avail_in = static_cast<unsigned int>(istream.gcount());
- total_read += strm.avail_in;
- strm.next_in = reinterpret_cast<Bytef *>(in.data());
- }
- const auto ret = inflate(&strm, Z_NO_FLUSH); // decompress
- if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR)
- {
- throw xlnt::exception("couldn't inflate ZIP, possibly corrupted");
- }
- if (ret == Z_STREAM_END) break;
- }
- auto unzip_count = buffer_size - strm.avail_out - 4;
- total_uncompressed += unzip_count;
- return static_cast<int>(unzip_count);
- }
- // uncompressed, so just read
- istream.read(out.data() + 4,
- static_cast<std::streamsize>(std::min(buffer_size - 4, header.uncompressed_size - total_read)));
- auto count = istream.gcount();
- total_read += static_cast<std::size_t>(count);
- return static_cast<int>(count);
- }
- virtual int underflow() override
- {
- if (gptr() && (gptr() < egptr()))
- return traits_type::to_int_type(*gptr()); // if we already have data just use it
- auto put_back_count = gptr() - eback();
- if (put_back_count > 4) put_back_count = 4;
- std::memmove(
- out.data() + (4 - put_back_count), gptr() - put_back_count, static_cast<std::size_t>(put_back_count));
- int num = process();
- setg(out.data() + 4 - put_back_count, out.data() + 4, out.data() + 4 + num);
- if (num <= 0) return EOF;
- return traits_type::to_int_type(*gptr());
- }
- virtual int overflow(int c = EOF) override;
- };
- int zip_streambuf_decompress::overflow(int)
- {
- throw xlnt::exception("writing to read-only buffer");
- }
- class zip_streambuf_compress : public std::streambuf
- {
- std::ostream &ostream; // owned when header==0 (when not part of zip file)
- z_stream strm;
- std::array<char, buffer_size> in;
- std::array<char, buffer_size> out;
- zheader *header;
- std::uint32_t uncompressed_size;
- std::uint32_t crc;
- bool valid;
- public:
- zip_streambuf_compress(zheader *central_header, std::ostream &stream)
- : ostream(stream), header(central_header), valid(true)
- {
- strm.zalloc = nullptr;
- strm.zfree = nullptr;
- strm.opaque = nullptr;
- #pragma clang diagnostic push
- #pragma clang diagnostic ignored "-Wold-style-cast"
- int ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS, 8, Z_DEFAULT_STRATEGY);
- #pragma clang diagnostic pop
- if (ret != Z_OK)
- {
- std::cerr << "libz: failed to deflateInit" << std::endl;
- valid = false;
- return;
- }
- setg(nullptr, nullptr, nullptr);
- setp(in.data(), in.data() + buffer_size - 4); // we want to be 4 aligned
- // Write appropriate header
- if (header)
- {
- header->header_offset = static_cast<std::uint32_t>(stream.tellp());
- write_header(*header, ostream, false);
- }
- uncompressed_size = crc = 0;
- }
- virtual ~zip_streambuf_compress() override
- {
- if (valid)
- {
- process(true);
- deflateEnd(&strm);
- if (header)
- {
- auto final_position = ostream.tellp();
- header->uncompressed_size = uncompressed_size;
- header->crc = crc;
- ostream.seekp(header->header_offset);
- write_header(*header, ostream, false);
- ostream.seekp(final_position);
- }
- else
- {
- write_int(ostream, crc);
- write_int(ostream, uncompressed_size);
- }
- }
- if (!header) delete &ostream;
- }
- protected:
- int process(bool flush)
- {
- if (!valid) return -1;
- strm.next_in = reinterpret_cast<Bytef *>(pbase());
- strm.avail_in = static_cast<unsigned int>(pptr() - pbase());
- while (strm.avail_in != 0 || flush)
- {
- strm.avail_out = buffer_size;
- strm.next_out = reinterpret_cast<Bytef *>(out.data());
- int ret = deflate(&strm, flush ? Z_FINISH : Z_NO_FLUSH);
- if (!(ret != Z_BUF_ERROR && ret != Z_STREAM_ERROR))
- {
- valid = false;
- std::cerr << "gzip: gzip error " << strm.msg << std::endl;
- return -1;
- }
- auto generated_output = static_cast<int>(strm.next_out - reinterpret_cast<std::uint8_t *>(out.data()));
- ostream.write(out.data(), generated_output);
- if (header) header->compressed_size += static_cast<std::uint32_t>(generated_output);
- if (ret == Z_STREAM_END) break;
- }
- // update counts, crc's and buffers
- auto consumed_input = static_cast<std::uint32_t>(pptr() - pbase());
- uncompressed_size += consumed_input;
- crc = static_cast<std::uint32_t>(crc32(crc, reinterpret_cast<Bytef *>(in.data()), consumed_input));
- setp(pbase(), pbase() + buffer_size - 4);
- return 1;
- }
- virtual int sync() override
- {
- if (pptr() && pptr() > pbase()) return process(false);
- return 0;
- }
- virtual int underflow() override
- {
- throw xlnt::exception("Attempt to read write only ostream");
- }
- virtual int overflow(int c = EOF) override;
- };
- int zip_streambuf_compress::overflow(int c)
- {
- if (c != EOF)
- {
- *pptr() = static_cast<char>(c);
- pbump(1);
- }
- if (process(false) == EOF) return EOF;
- return c;
- }
- ozstream::ozstream(std::ostream &stream)
- : destination_stream_(stream)
- {
- if (!destination_stream_)
- {
- throw xlnt::exception("bad zip stream");
- }
- }
- ozstream::~ozstream()
- {
- // Write all file headers
- auto final_position = destination_stream_.tellp();
- for (const auto &header : file_headers_)
- {
- write_header(header, destination_stream_, true);
- }
- auto central_end = destination_stream_.tellp();
- // Write end of central
- write_int(destination_stream_, static_cast<std::uint32_t>(0x06054b50)); // end of central
- write_int(destination_stream_, static_cast<std::uint16_t>(0)); // this disk number
- write_int(destination_stream_, static_cast<std::uint16_t>(0)); // this disk number
- write_int(destination_stream_, static_cast<std::uint16_t>(file_headers_.size())); // one entry in center in this disk
- write_int(destination_stream_, static_cast<std::uint16_t>(file_headers_.size())); // one entry in center
- write_int(destination_stream_, static_cast<std::uint32_t>(central_end - final_position)); // size of header
- write_int(destination_stream_, static_cast<std::uint32_t>(final_position)); // offset to header
- write_int(destination_stream_, static_cast<std::uint16_t>(0)); // zip comment
- }
- std::unique_ptr<std::streambuf> ozstream::open(const path &filename)
- {
- zheader header;
- header.filename = filename.string();
- file_headers_.push_back(header);
- auto buffer = new zip_streambuf_compress(&file_headers_.back(), destination_stream_);
- return std::unique_ptr<zip_streambuf_compress>(buffer);
- }
- izstream::izstream(std::istream &stream)
- : source_stream_(stream)
- {
- if (!stream)
- {
- throw xlnt::exception("Invalid file handle");
- }
- read_central_header();
- }
- izstream::~izstream()
- {
- }
- bool izstream::read_central_header()
- {
- // Find the header
- // NOTE: this assumes the zip file header is the last thing written to file...
- source_stream_.seekg(0, std::ios_base::end);
- auto end_position = source_stream_.tellg();
- auto max_comment_size = std::uint32_t(0xffff); // max size of header
- auto read_size_before_comment = std::uint32_t(22);
- std::streamoff read_start = max_comment_size + read_size_before_comment;
- if (read_start > end_position)
- {
- read_start = end_position;
- }
- source_stream_.seekg(end_position - read_start);
- std::vector<std::uint8_t> buf(static_cast<std::size_t>(read_start), '\0');
- if (read_start <= 0)
- {
- throw xlnt::exception("file is empty");
- }
- source_stream_.read(reinterpret_cast<char *>(buf.data()), read_start);
- if (buf[0] == 0xd0 && buf[1] == 0xcf && buf[2] == 0x11 && buf[3] == 0xe0
- && buf[4] == 0xa1 && buf[5] == 0xb1 && buf[6] == 0x1a && buf[7] == 0xe1)
- {
- throw xlnt::exception("encrypted xlsx, password required");
- }
- auto found_header = false;
- std::streamoff header_index = 0;
- for (std::streamoff i = 0; i < read_start - 3; ++i)
- {
- if (buf[static_cast<std::size_t>(i)] == 0x50
- && buf[static_cast<std::size_t>(i) + 1] == 0x4b
- && buf[static_cast<std::size_t>(i) + 2] == 0x05
- && buf[static_cast<std::size_t>(i) + 3] == 0x06)
- {
- found_header = true;
- header_index = i;
- break;
- }
- }
- if (!found_header)
- {
- throw xlnt::exception("failed to find zip header");
- }
- // seek to end of central header and read
- source_stream_.seekg(end_position - (read_start - header_index));
- /*auto word = */ read_int<std::uint32_t>(source_stream_);
- auto disk_number1 = read_int<std::uint16_t>(source_stream_);
- auto disk_number2 = read_int<std::uint16_t>(source_stream_);
- if (disk_number1 != disk_number2 || disk_number1 != 0)
- {
- throw xlnt::exception("multiple disk zip files are not supported");
- }
- auto num_files = read_int<std::uint16_t>(source_stream_); // one entry in center in this disk
- auto num_files_this_disk = read_int<std::uint16_t>(source_stream_); // one entry in center
- if (num_files != num_files_this_disk)
- {
- throw xlnt::exception("multi disk zip files are not supported");
- }
- /*auto size_of_header = */ read_int<std::uint32_t>(source_stream_); // size of header
- auto header_offset = read_int<std::uint32_t>(source_stream_); // offset to header
- // go to header and read all file headers
- source_stream_.seekg(header_offset);
- for (std::uint16_t i = 0; i < num_files; ++i)
- {
- auto header = read_header(source_stream_, true);
- file_headers_[header.filename] = header;
- }
- return true;
- }
- std::unique_ptr<std::streambuf> izstream::open(const path &filename) const
- {
- if (!has_file(filename))
- {
- throw xlnt::exception("file not found");
- }
- auto header = file_headers_.at(filename.string());
- source_stream_.seekg(header.header_offset);
- auto buffer = new zip_streambuf_decompress(source_stream_, header);
- return std::unique_ptr<zip_streambuf_decompress>(buffer);
- }
- std::string izstream::read(const path &filename) const
- {
- auto buffer = open(filename);
- std::istream stream(buffer.get());
- auto bytes = to_vector(stream);
- return std::string(bytes.begin(), bytes.end());
- }
- std::vector<path> izstream::files() const
- {
- std::vector<path> filenames;
- std::transform(file_headers_.begin(), file_headers_.end(), std::back_inserter(filenames),
- [](const std::pair<std::string, zheader> &h) { return path(h.first); });
- return filenames;
- }
- bool izstream::has_file(const path &filename) const
- {
- return file_headers_.count(filename.string()) != 0;
- }
- } // namespace detail
- } // namespace xlnt
|