// ZipInputStream.cs // // ------------------------------------------------------------------ // // Copyright (c) 2009-2010 Dino Chiesa. // All rights reserved. // // This code module is part of DotNetZip, a zipfile class library. // // ------------------------------------------------------------------ // // This code is licensed under the Microsoft Public License. // See the file License.txt for the license details. // More info on: http://dotnetzip.codeplex.com // // ------------------------------------------------------------------ // // last saved (in emacs): // Time-stamp: <2011-July-31 14:48:30> // // ------------------------------------------------------------------ // // This module defines the ZipInputStream class, which is a stream metaphor for // reading zip files. This class does not depend on Ionic.Zip.ZipFile, but rather // stands alongside it as an alternative "container" for ZipEntry, when reading zips. // // It adds one interesting method to the normal "stream" interface: GetNextEntry. // // ------------------------------------------------------------------ // using System; using System.Threading; using System.Collections.Generic; using System.IO; using Ionic.Zip; using OfficeOpenXml.Packaging.Ionic.Zip; using OfficeOpenXml.Packaging.Ionic.Crc; namespace Ionic.Zip { /// /// Provides a stream metaphor for reading zip files. /// /// /// /// /// This class provides an alternative programming model for reading zip files to /// the one enabled by the class. Use this when reading zip /// files, as an alternative to the class, when you would /// like to use a Stream class to read the file. /// /// /// /// Some application designs require a readable stream for input. This stream can /// be used to read a zip file, and extract entries. /// /// /// /// Both the ZipInputStream class and the ZipFile class can be used /// to read and extract zip files. Both of them support many of the common zip /// features, including Unicode, different compression levels, and ZIP64. The /// programming models differ. For example, when extracting entries via calls to /// the GetNextEntry() and Read() methods on the /// ZipInputStream class, the caller is responsible for creating the file, /// writing the bytes into the file, setting the attributes on the file, and /// setting the created, last modified, and last accessed timestamps on the /// file. All of these things are done automatically by a call to ZipEntry.Extract(). For this reason, the /// ZipInputStream is generally recommended for when your application wants /// to extract the data, without storing that data into a file. /// /// /// /// Aside from the obvious differences in programming model, there are some /// differences in capability between the ZipFile class and the /// ZipInputStream class. /// /// /// /// /// ZipFile can be used to create or update zip files, or read and /// extract zip files. ZipInputStream can be used only to read and /// extract zip files. If you want to use a stream to create zip files, check /// out the . /// /// /// /// ZipInputStream cannot read segmented or spanned /// zip files. /// /// /// /// ZipInputStream will not read Zip file comments. /// /// /// /// When reading larger files, ZipInputStream will always underperform /// ZipFile. This is because the ZipInputStream does a full scan on the /// zip file, while the ZipFile class reads the central directory of the /// zip file. /// /// /// /// /// internal class ZipInputStream : Stream { /// /// Create a ZipInputStream, wrapping it around an existing stream. /// /// /// /// /// /// While the class is generally easier /// to use, this class provides an alternative to those /// applications that want to read from a zipfile directly, /// using a . /// /// /// /// Both the ZipInputStream class and the ZipFile class can be used /// to read and extract zip files. Both of them support many of the common zip /// features, including Unicode, different compression levels, and ZIP64. The /// programming models differ. For example, when extracting entries via calls to /// the GetNextEntry() and Read() methods on the /// ZipInputStream class, the caller is responsible for creating the file, /// writing the bytes into the file, setting the attributes on the file, and /// setting the created, last modified, and last accessed timestamps on the /// file. All of these things are done automatically by a call to ZipEntry.Extract(). For this reason, the /// ZipInputStream is generally recommended for when your application wants /// to extract the data, without storing that data into a file. /// /// /// /// Aside from the obvious differences in programming model, there are some /// differences in capability between the ZipFile class and the /// ZipInputStream class. /// /// /// /// /// ZipFile can be used to create or update zip files, or read and extract /// zip files. ZipInputStream can be used only to read and extract zip /// files. If you want to use a stream to create zip files, check out the . /// /// /// /// ZipInputStream cannot read segmented or spanned /// zip files. /// /// /// /// ZipInputStream will not read Zip file comments. /// /// /// /// When reading larger files, ZipInputStream will always underperform /// ZipFile. This is because the ZipInputStream does a full scan on the /// zip file, while the ZipFile class reads the central directory of the /// zip file. /// /// /// /// /// /// /// /// The stream to read. It must be readable. This stream will be closed at /// the time the ZipInputStream is closed. /// /// /// /// /// This example shows how to read a zip file, and extract entries, using the /// ZipInputStream class. /// /// /// private void Unzip() /// { /// byte[] buffer= new byte[2048]; /// int n; /// using (var raw = File.Open(inputFileName, FileMode.Open, FileAccess.Read)) /// { /// using (var input= new ZipInputStream(raw)) /// { /// ZipEntry e; /// while (( e = input.GetNextEntry()) != null) /// { /// if (e.IsDirectory) continue; /// string outputPath = Path.Combine(extractDir, e.FileName); /// using (var output = File.Open(outputPath, FileMode.Create, FileAccess.ReadWrite)) /// { /// while ((n= input.Read(buffer, 0, buffer.Length)) > 0) /// { /// output.Write(buffer,0,n); /// } /// } /// } /// } /// } /// } /// /// /// /// Private Sub UnZip() /// Dim inputFileName As String = "MyArchive.zip" /// Dim extractDir As String = "extract" /// Dim buffer As Byte() = New Byte(2048) {} /// Using raw As FileStream = File.Open(inputFileName, FileMode.Open, FileAccess.Read) /// Using input As ZipInputStream = New ZipInputStream(raw) /// Dim e As ZipEntry /// Do While (Not e = input.GetNextEntry Is Nothing) /// If Not e.IsDirectory Then /// Using output As FileStream = File.Open(Path.Combine(extractDir, e.FileName), _ /// FileMode.Create, FileAccess.ReadWrite) /// Dim n As Integer /// Do While (n = input.Read(buffer, 0, buffer.Length) > 0) /// output.Write(buffer, 0, n) /// Loop /// End Using /// End If /// Loop /// End Using /// End Using /// End Sub /// /// public ZipInputStream(Stream stream) : this (stream, false) { } /// /// Create a ZipInputStream, given the name of an existing zip file. /// /// /// /// /// /// This constructor opens a FileStream for the given zipfile, and /// wraps a ZipInputStream around that. See the documentation for the /// constructor for full details. /// /// /// /// While the class is generally easier /// to use, this class provides an alternative to those /// applications that want to read from a zipfile directly, /// using a . /// /// /// /// /// /// The name of the filesystem file to read. /// /// /// /// /// This example shows how to read a zip file, and extract entries, using the /// ZipInputStream class. /// /// /// private void Unzip() /// { /// byte[] buffer= new byte[2048]; /// int n; /// using (var input= new ZipInputStream(inputFileName)) /// { /// ZipEntry e; /// while (( e = input.GetNextEntry()) != null) /// { /// if (e.IsDirectory) continue; /// string outputPath = Path.Combine(extractDir, e.FileName); /// using (var output = File.Open(outputPath, FileMode.Create, FileAccess.ReadWrite)) /// { /// while ((n= input.Read(buffer, 0, buffer.Length)) > 0) /// { /// output.Write(buffer,0,n); /// } /// } /// } /// } /// } /// /// /// /// Private Sub UnZip() /// Dim inputFileName As String = "MyArchive.zip" /// Dim extractDir As String = "extract" /// Dim buffer As Byte() = New Byte(2048) {} /// Using input As ZipInputStream = New ZipInputStream(inputFileName) /// Dim e As ZipEntry /// Do While (Not e = input.GetNextEntry Is Nothing) /// If Not e.IsDirectory Then /// Using output As FileStream = File.Open(Path.Combine(extractDir, e.FileName), _ /// FileMode.Create, FileAccess.ReadWrite) /// Dim n As Integer /// Do While (n = input.Read(buffer, 0, buffer.Length) > 0) /// output.Write(buffer, 0, n) /// Loop /// End Using /// End If /// Loop /// End Using /// End Sub /// /// public ZipInputStream(String fileName) { Stream stream = File.Open(fileName, FileMode.Open, FileAccess.Read, FileShare.Read ); _Init(stream, false, fileName); } /// /// Create a ZipInputStream, explicitly specifying whether to /// keep the underlying stream open. /// /// /// /// See the documentation for the ZipInputStream(Stream) /// constructor for a discussion of the class, and an example of how to use the class. /// /// /// /// The stream to read from. It must be readable. /// /// /// /// true if the application would like the stream /// to remain open after the ZipInputStream has been closed. /// public ZipInputStream(Stream stream, bool leaveOpen) { _Init(stream, leaveOpen, null); } private void _Init(Stream stream, bool leaveOpen, string name) { _inputStream = stream; if (!_inputStream.CanRead) throw new ZipException("The stream must be readable."); _container= new ZipContainer(this); _provisionalAlternateEncoding = System.Text.Encoding.GetEncoding("IBM437"); _leaveUnderlyingStreamOpen = leaveOpen; _findRequired= true; _name = name ?? "(stream)"; } /// Provides a string representation of the instance. /// /// /// This can be useful for debugging purposes. /// /// /// a string representation of the instance. public override String ToString() { return String.Format ("ZipInputStream::{0}(leaveOpen({1})))", _name, _leaveUnderlyingStreamOpen); } /// /// The text encoding to use when reading entries into the zip archive, for /// those entries whose filenames or comments cannot be encoded with the /// default (IBM437) encoding. /// /// /// /// /// In its /// zip specification, PKWare describes two options for encoding /// filenames and comments: using IBM437 or UTF-8. But, some archiving tools /// or libraries do not follow the specification, and instead encode /// characters using the system default code page. For example, WinRAR when /// run on a machine in Shanghai may encode filenames with the Big-5 Chinese /// (950) code page. This behavior is contrary to the Zip specification, but /// it occurs anyway. /// /// /// /// When using DotNetZip to read zip archives that use something other than /// UTF-8 or IBM437, set this property to specify the code page to use when /// reading encoded filenames and comments for each ZipEntry in the zip /// file. /// /// /// /// This property is "provisional". When the entry in the zip archive is not /// explicitly marked as using UTF-8, then IBM437 is used to decode filenames /// and comments. If a loss of data would result from using IBM436 - /// specifically when encoding and decoding is not reflexive - the codepage /// specified here is used. It is possible, therefore, to have a given entry /// with a Comment encoded in IBM437 and a FileName encoded with /// the specified "provisional" codepage. /// /// /// /// When a zip file uses an arbitrary, non-UTF8 code page for encoding, there /// is no standard way for the reader application - whether DotNetZip, WinZip, /// WinRar, or something else - to know which codepage has been used for the /// entries. Readers of zip files are not able to inspect the zip file and /// determine the codepage that was used for the entries contained within it. /// It is left to the application or user to determine the necessary codepage /// when reading zip files encoded this way. If you use an incorrect codepage /// when reading a zipfile, you will get entries with filenames that are /// incorrect, and the incorrect filenames may even contain characters that /// are not legal for use within filenames in Windows. Extracting entries with /// illegal characters in the filenames will lead to exceptions. It's too bad, /// but this is just the way things are with code pages in zip files. Caveat /// Emptor. /// /// /// public System.Text.Encoding ProvisionalAlternateEncoding { get { return _provisionalAlternateEncoding; } set { _provisionalAlternateEncoding = value; } } /// /// Size of the work buffer to use for the ZLIB codec during decompression. /// /// /// /// Setting this affects the performance and memory efficiency of compression /// and decompression. For larger files, setting this to a larger size may /// improve performance, but the exact numbers vary depending on available /// memory, and a bunch of other variables. I don't have good firm /// recommendations on how to set it. You'll have to test it yourself. Or /// just leave it alone and accept the default. /// public int CodecBufferSize { get; set; } /// /// Sets the password to be used on the ZipInputStream instance. /// /// /// /// /// /// When reading a zip archive, this password is used to read and decrypt the /// entries that are encrypted within the zip file. When entries within a zip /// file use different passwords, set the appropriate password for the entry /// before the first call to Read() for each entry. /// /// /// /// When reading an entry that is not encrypted, the value of this property is /// ignored. /// /// /// /// /// /// /// This example uses the ZipInputStream to read and extract entries from a /// zip file, using a potentially different password for each entry. /// /// /// byte[] buffer= new byte[2048]; /// int n; /// using (var raw = File.Open(_inputFileName, FileMode.Open, FileAccess.Read )) /// { /// using (var input= new ZipInputStream(raw)) /// { /// ZipEntry e; /// while (( e = input.GetNextEntry()) != null) /// { /// input.Password = PasswordForEntry(e.FileName); /// if (e.IsDirectory) continue; /// string outputPath = Path.Combine(_extractDir, e.FileName); /// using (var output = File.Open(outputPath, FileMode.Create, FileAccess.ReadWrite)) /// { /// while ((n= input.Read(buffer,0,buffer.Length)) > 0) /// { /// output.Write(buffer,0,n); /// } /// } /// } /// } /// } /// /// /// public String Password { set { if (_closed) { _exceptionPending = true; throw new System.InvalidOperationException("The stream has been closed."); } _Password = value; } } private void SetupStream() { // Seek to the correct posn in the file, and open a // stream that can be read. _crcStream= _currentEntry.InternalOpenReader(_Password); _LeftToRead = _crcStream.Length; _needSetup = false; } internal Stream ReadStream { get { return _inputStream; } } /// /// Read the data from the stream into the buffer. /// /// /// /// /// The data for the zipentry will be decrypted and uncompressed, as /// necessary, before being copied into the buffer. /// /// /// /// You must set the property before calling /// Read() the first time for an encrypted entry. To determine if an /// entry is encrypted and requires a password, check the ZipEntry.Encryption property. /// /// /// /// The buffer to hold the data read from the stream. /// the offset within the buffer to copy the first byte read. /// the number of bytes to read. /// the number of bytes read, after decryption and decompression. public override int Read(byte[] buffer, int offset, int count) { if (_closed) { _exceptionPending = true; throw new System.InvalidOperationException("The stream has been closed."); } if (_needSetup) SetupStream(); if (_LeftToRead == 0) return 0; int len = (_LeftToRead > count) ? count : (int)_LeftToRead; int n = _crcStream.Read(buffer, offset, len); _LeftToRead -= n; if (_LeftToRead == 0) { int CrcResult = _crcStream.Crc; _currentEntry.VerifyCrcAfterExtract(CrcResult); _inputStream.Seek(_endOfEntry, SeekOrigin.Begin); // workitem 10178 SharedUtilities.Workaround_Ladybug318918(_inputStream); } return n; } /// /// Read the next entry from the zip file. /// /// /// /// /// Call this method just before calling , /// to position the pointer in the zip file to the next entry that can be /// read. Subsequent calls to Read(), will decrypt and decompress the /// data in the zip file, until Read() returns 0. /// /// /// /// Each time you call GetNextEntry(), the pointer in the wrapped /// stream is moved to the next entry in the zip file. If you call , and thus re-position the pointer within /// the file, you will need to call GetNextEntry() again, to insure /// that the file pointer is positioned at the beginning of a zip entry. /// /// /// /// This method returns the ZipEntry. Using a stream approach, you will /// read the raw bytes for an entry in a zip file via calls to Read(). /// Alternatively, you can extract an entry into a file, or a stream, by /// calling , or one of its siblings. /// /// /// /// /// /// The ZipEntry read. Returns null (or Nothing in VB) if there are no more /// entries in the zip file. /// /// public ZipEntry GetNextEntry() { if (_findRequired) { // find the next signature long d = SharedUtilities.FindSignature(_inputStream, ZipConstants.ZipEntrySignature); if (d == -1) return null; // back up 4 bytes: ReadEntry assumes the file pointer is positioned before the entry signature _inputStream.Seek(-4, SeekOrigin.Current); // workitem 10178 SharedUtilities.Workaround_Ladybug318918(_inputStream); } // workitem 10923 else if (_firstEntry) { // we've already read one entry. // Seek to the end of it. _inputStream.Seek(_endOfEntry, SeekOrigin.Begin); SharedUtilities.Workaround_Ladybug318918(_inputStream); } _currentEntry = ZipEntry.ReadEntry(_container, !_firstEntry); // ReadEntry leaves the file position after all the entry // data and the optional bit-3 data descriptpr. This is // where the next entry would normally start. _endOfEntry = _inputStream.Position; _firstEntry = true; _needSetup = true; _findRequired= false; return _currentEntry; } /// /// Dispose the stream. /// /// /// /// /// This method disposes the ZipInputStream. It may also close the /// underlying stream, depending on which constructor was used. /// /// /// /// Typically the application will call Dispose() implicitly, via /// a using statement in C#, or a Using statement in VB. /// /// /// /// Application code won't call this code directly. This method may /// be invoked in two distinct scenarios. If disposing == true, the /// method has been called directly or indirectly by a user's code, /// for example via the public Dispose() method. In this case, both /// managed and unmanaged resources can be referenced and disposed. /// If disposing == false, the method has been called by the runtime /// from inside the object finalizer and this method should not /// reference other objects; in that case only unmanaged resources /// must be referenced or disposed. /// /// /// /// /// true if the Dispose method was invoked by user code. /// protected override void Dispose(bool disposing) { if (_closed) return; if (disposing) // not called from finalizer { // When ZipInputStream is used within a using clause, and an // exception is thrown, Close() is invoked. But we don't want to // try to write anything in that case. Eventually the exception // will be propagated to the application. if (_exceptionPending) return; if (!_leaveUnderlyingStreamOpen) { #if NETCF _inputStream.Close(); #else _inputStream.Dispose(); #endif } } _closed= true; } /// /// Always returns true. /// public override bool CanRead { get { return true; }} /// /// Returns the value of CanSeek for the underlying (wrapped) stream. /// public override bool CanSeek { get { return _inputStream.CanSeek; } } /// /// Always returns false. /// public override bool CanWrite { get { return false; } } /// /// Returns the length of the underlying stream. /// public override long Length { get { return _inputStream.Length; }} /// /// Gets or sets the position of the underlying stream. /// /// /// Setting the position is equivalent to calling Seek(value, SeekOrigin.Begin). /// public override long Position { get { return _inputStream.Position;} set { Seek(value, SeekOrigin.Begin); } } /// /// This is a no-op. /// public override void Flush() { throw new NotSupportedException("Flush"); } /// /// This method always throws a NotSupportedException. /// /// ignored /// ignored /// ignored public override void Write(byte[] buffer, int offset, int count) { throw new NotSupportedException("Write"); } /// /// This method seeks in the underlying stream. /// /// /// /// /// Call this method if you want to seek around within the zip file for random access. /// /// /// /// Applications can intermix calls to Seek() with calls to . After a call to Seek(), /// GetNextEntry() will get the next ZipEntry that falls after /// the current position in the input stream. You're on your own for finding /// out just where to seek in the stream, to get to the various entries. /// /// /// /// /// the offset point to seek to /// the reference point from which to seek /// The new position public override long Seek(long offset, SeekOrigin origin) { _findRequired= true; var x = _inputStream.Seek(offset, origin); // workitem 10178 SharedUtilities.Workaround_Ladybug318918(_inputStream); return x; } /// /// This method always throws a NotSupportedException. /// /// ignored public override void SetLength(long value) { throw new NotSupportedException(); } private Stream _inputStream; private System.Text.Encoding _provisionalAlternateEncoding; private ZipEntry _currentEntry; private bool _firstEntry; private bool _needSetup; private ZipContainer _container; private CrcCalculatorStream _crcStream; private Int64 _LeftToRead; internal String _Password; private Int64 _endOfEntry; private string _name; private bool _leaveUnderlyingStreamOpen; private bool _closed; private bool _findRequired; private bool _exceptionPending; } }