001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.OutputStream; 025 026import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; 027import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; 028import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; 029import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; 030import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; 031import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; 032import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; 033import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream; 034import org.apache.commons.compress.archivers.sevenz.SevenZFile; 035import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; 036import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 037import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; 038import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; 039import org.apache.commons.compress.utils.IOUtils; 040 041/** 042 * Factory to create Archive[In|Out]putStreams from names or the first bytes of 043 * the InputStream. In order to add other implementations, you should extend 044 * ArchiveStreamFactory and override the appropriate methods (and call their 045 * implementation from super of course). 046 * 047 * Compressing a ZIP-File: 048 * 049 * <pre> 050 * final OutputStream out = new FileOutputStream(output); 051 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out); 052 * 053 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); 054 * IOUtils.copy(new FileInputStream(file1), os); 055 * os.closeArchiveEntry(); 056 * 057 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); 058 * IOUtils.copy(new FileInputStream(file2), os); 059 * os.closeArchiveEntry(); 060 * os.close(); 061 * </pre> 062 * 063 * Decompressing a ZIP-File: 064 * 065 * <pre> 066 * final InputStream is = new FileInputStream(input); 067 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is); 068 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry(); 069 * OutputStream out = new FileOutputStream(new File(dir, entry.getName())); 070 * IOUtils.copy(in, out); 071 * out.close(); 072 * in.close(); 073 * </pre> 074 * @Immutable provided that the deprecated method setEntryEncoding is not used. 075 * @ThreadSafe even if the deprecated method setEntryEncoding is used 076 */ 077public class ArchiveStreamFactory { 078 079 /** 080 * Constant (value {@value}) used to identify the AR archive format. 081 * @since 1.1 082 */ 083 public static final String AR = "ar"; 084 /** 085 * Constant (value {@value}) used to identify the ARJ archive format. 086 * Not supported as an output stream type. 087 * @since 1.6 088 */ 089 public static final String ARJ = "arj"; 090 /** 091 * Constant (value {@value}) used to identify the CPIO archive format. 092 * @since 1.1 093 */ 094 public static final String CPIO = "cpio"; 095 /** 096 * Constant (value {@value}) used to identify the Unix DUMP archive format. 097 * Not supported as an output stream type. 098 * @since 1.3 099 */ 100 public static final String DUMP = "dump"; 101 /** 102 * Constant (value {@value}) used to identify the JAR archive format. 103 * @since 1.1 104 */ 105 public static final String JAR = "jar"; 106 /** 107 * Constant used to identify the TAR archive format. 108 * @since 1.1 109 */ 110 public static final String TAR = "tar"; 111 /** 112 * Constant (value {@value}) used to identify the ZIP archive format. 113 * @since 1.1 114 */ 115 public static final String ZIP = "zip"; 116 /** 117 * Constant (value {@value}) used to identify the 7z archive format. 118 * @since 1.8 119 */ 120 public static final String SEVEN_Z = "7z"; 121 122 /** 123 * Entry encoding, null for the platform default. 124 */ 125 private final String encoding; 126 127 /** 128 * Entry encoding, null for the default. 129 */ 130 private volatile String entryEncoding = null; 131 132 /** 133 * Create an instance using the platform default encoding. 134 */ 135 public ArchiveStreamFactory() { 136 this(null); 137 } 138 139 /** 140 * Create an instance using the specified encoding. 141 * 142 * @param encoding the encoding to be used. 143 * 144 * @since 1.10 145 */ 146 public ArchiveStreamFactory(String encoding) { 147 super(); 148 this.encoding = encoding; 149 // Also set the original field so can continue to use it. 150 this.entryEncoding = encoding; 151 } 152 153 /** 154 * Returns the encoding to use for arj, jar, zip, dump, cpio and tar 155 * files, or null for the archiver default. 156 * 157 * @return entry encoding, or null for the archiver default 158 * @since 1.5 159 */ 160 public String getEntryEncoding() { 161 return entryEncoding; 162 } 163 164 /** 165 * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default. 166 * 167 * @param entryEncoding the entry encoding, null uses the archiver default. 168 * @since 1.5 169 * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding 170 * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} 171 * was used to specify the factory encoding. 172 */ 173 @Deprecated 174 public void setEntryEncoding(String entryEncoding) { 175 // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway 176 if (encoding != null) { 177 throw new IllegalStateException("Cannot overide encoding set by the constructor"); 178 } 179 this.entryEncoding = entryEncoding; 180 } 181 182 /** 183 * Create an archive input stream from an archiver name and an input stream. 184 * 185 * @param archiverName the archive name, 186 * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z} 187 * @param in the input stream 188 * @return the archive input stream 189 * @throws ArchiveException if the archiver name is not known 190 * @throws StreamingNotSupportedException if the format cannot be 191 * read from a stream 192 * @throws IllegalArgumentException if the archiver name or stream is null 193 */ 194 public ArchiveInputStream createArchiveInputStream( 195 final String archiverName, final InputStream in) 196 throws ArchiveException { 197 198 if (archiverName == null) { 199 throw new IllegalArgumentException("Archivername must not be null."); 200 } 201 202 if (in == null) { 203 throw new IllegalArgumentException("InputStream must not be null."); 204 } 205 206 if (AR.equalsIgnoreCase(archiverName)) { 207 return new ArArchiveInputStream(in); 208 } 209 if (ARJ.equalsIgnoreCase(archiverName)) { 210 if (entryEncoding != null) { 211 return new ArjArchiveInputStream(in, entryEncoding); 212 } else { 213 return new ArjArchiveInputStream(in); 214 } 215 } 216 if (ZIP.equalsIgnoreCase(archiverName)) { 217 if (entryEncoding != null) { 218 return new ZipArchiveInputStream(in, entryEncoding); 219 } else { 220 return new ZipArchiveInputStream(in); 221 } 222 } 223 if (TAR.equalsIgnoreCase(archiverName)) { 224 if (entryEncoding != null) { 225 return new TarArchiveInputStream(in, entryEncoding); 226 } else { 227 return new TarArchiveInputStream(in); 228 } 229 } 230 if (JAR.equalsIgnoreCase(archiverName)) { 231 if (entryEncoding != null) { 232 return new JarArchiveInputStream(in, entryEncoding); 233 } else { 234 return new JarArchiveInputStream(in); 235 } 236 } 237 if (CPIO.equalsIgnoreCase(archiverName)) { 238 if (entryEncoding != null) { 239 return new CpioArchiveInputStream(in, entryEncoding); 240 } else { 241 return new CpioArchiveInputStream(in); 242 } 243 } 244 if (DUMP.equalsIgnoreCase(archiverName)) { 245 if (entryEncoding != null) { 246 return new DumpArchiveInputStream(in, entryEncoding); 247 } else { 248 return new DumpArchiveInputStream(in); 249 } 250 } 251 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 252 throw new StreamingNotSupportedException(SEVEN_Z); 253 } 254 255 throw new ArchiveException("Archiver: " + archiverName + " not found."); 256 } 257 258 /** 259 * Create an archive output stream from an archiver name and an output stream. 260 * 261 * @param archiverName the archive name, 262 * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} 263 * @param out the output stream 264 * @return the archive output stream 265 * @throws ArchiveException if the archiver name is not known 266 * @throws StreamingNotSupportedException if the format cannot be 267 * written to a stream 268 * @throws IllegalArgumentException if the archiver name or stream is null 269 */ 270 public ArchiveOutputStream createArchiveOutputStream( 271 final String archiverName, final OutputStream out) 272 throws ArchiveException { 273 if (archiverName == null) { 274 throw new IllegalArgumentException("Archivername must not be null."); 275 } 276 if (out == null) { 277 throw new IllegalArgumentException("OutputStream must not be null."); 278 } 279 280 if (AR.equalsIgnoreCase(archiverName)) { 281 return new ArArchiveOutputStream(out); 282 } 283 if (ZIP.equalsIgnoreCase(archiverName)) { 284 ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out); 285 if (entryEncoding != null) { 286 zip.setEncoding(entryEncoding); 287 } 288 return zip; 289 } 290 if (TAR.equalsIgnoreCase(archiverName)) { 291 if (entryEncoding != null) { 292 return new TarArchiveOutputStream(out, entryEncoding); 293 } else { 294 return new TarArchiveOutputStream(out); 295 } 296 } 297 if (JAR.equalsIgnoreCase(archiverName)) { 298 if (entryEncoding != null) { 299 return new JarArchiveOutputStream(out, entryEncoding); 300 } else { 301 return new JarArchiveOutputStream(out); 302 } 303 } 304 if (CPIO.equalsIgnoreCase(archiverName)) { 305 if (entryEncoding != null) { 306 return new CpioArchiveOutputStream(out, entryEncoding); 307 } else { 308 return new CpioArchiveOutputStream(out); 309 } 310 } 311 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 312 throw new StreamingNotSupportedException(SEVEN_Z); 313 } 314 throw new ArchiveException("Archiver: " + archiverName + " not found."); 315 } 316 317 /** 318 * Create an archive input stream from an input stream, autodetecting 319 * the archive type from the first few bytes of the stream. The InputStream 320 * must support marks, like BufferedInputStream. 321 * 322 * @param in the input stream 323 * @return the archive input stream 324 * @throws ArchiveException if the archiver name is not known 325 * @throws StreamingNotSupportedException if the format cannot be 326 * read from a stream 327 * @throws IllegalArgumentException if the stream is null or does not support mark 328 */ 329 public ArchiveInputStream createArchiveInputStream(final InputStream in) 330 throws ArchiveException { 331 if (in == null) { 332 throw new IllegalArgumentException("Stream must not be null."); 333 } 334 335 if (!in.markSupported()) { 336 throw new IllegalArgumentException("Mark is not supported."); 337 } 338 339 final byte[] signature = new byte[12]; 340 in.mark(signature.length); 341 try { 342 int signatureLength = IOUtils.readFully(in, signature); 343 in.reset(); 344 if (ZipArchiveInputStream.matches(signature, signatureLength)) { 345 if (entryEncoding != null) { 346 return new ZipArchiveInputStream(in, entryEncoding); 347 } else { 348 return new ZipArchiveInputStream(in); 349 } 350 } else if (JarArchiveInputStream.matches(signature, signatureLength)) { 351 if (entryEncoding != null) { 352 return new JarArchiveInputStream(in, entryEncoding); 353 } else { 354 return new JarArchiveInputStream(in); 355 } 356 } else if (ArArchiveInputStream.matches(signature, signatureLength)) { 357 return new ArArchiveInputStream(in); 358 } else if (CpioArchiveInputStream.matches(signature, signatureLength)) { 359 if (entryEncoding != null) { 360 return new CpioArchiveInputStream(in, entryEncoding); 361 } else { 362 return new CpioArchiveInputStream(in); 363 } 364 } else if (ArjArchiveInputStream.matches(signature, signatureLength)) { 365 if (entryEncoding != null) { 366 return new ArjArchiveInputStream(in, entryEncoding); 367 } else { 368 return new ArjArchiveInputStream(in); 369 } 370 } else if (SevenZFile.matches(signature, signatureLength)) { 371 throw new StreamingNotSupportedException(SEVEN_Z); 372 } 373 374 // Dump needs a bigger buffer to check the signature; 375 final byte[] dumpsig = new byte[32]; 376 in.mark(dumpsig.length); 377 signatureLength = IOUtils.readFully(in, dumpsig); 378 in.reset(); 379 if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) { 380 return new DumpArchiveInputStream(in, entryEncoding); 381 } 382 383 // Tar needs an even bigger buffer to check the signature; read the first block 384 final byte[] tarheader = new byte[512]; 385 in.mark(tarheader.length); 386 signatureLength = IOUtils.readFully(in, tarheader); 387 in.reset(); 388 if (TarArchiveInputStream.matches(tarheader, signatureLength)) { 389 return new TarArchiveInputStream(in, entryEncoding); 390 } 391 // COMPRESS-117 - improve auto-recognition 392 if (signatureLength >= 512) { 393 TarArchiveInputStream tais = null; 394 try { 395 tais = new TarArchiveInputStream(new ByteArrayInputStream(tarheader)); 396 // COMPRESS-191 - verify the header checksum 397 if (tais.getNextTarEntry().isCheckSumOK()) { 398 return new TarArchiveInputStream(in, encoding); 399 } 400 } catch (Exception e) { // NOPMD 401 // can generate IllegalArgumentException as well 402 // as IOException 403 // autodetection, simply not a TAR 404 // ignored 405 } finally { 406 IOUtils.closeQuietly(tais); 407 } 408 } 409 } catch (IOException e) { 410 throw new ArchiveException("Could not use reset and mark operations.", e); 411 } 412 413 throw new ArchiveException("No Archiver found for the stream signature"); 414 } 415 416}