View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   *
17   */
18  
19  /*
20   * This package is based on the work done by Timothy Gerard Endres
21   * (time@ice.com) to whom the Ant project is very grateful for his great code.
22   */
23  
24  package edu.internet2.middleware.grouperInstallerExt.org.apache.commons.compress.archivers.tar;
25  
26  import java.io.ByteArrayOutputStream;
27  import java.io.IOException;
28  import java.io.InputStream;
29  import java.util.HashMap;
30  import java.util.Map;
31  import java.util.Map.Entry;
32  
33  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.compress.archivers.ArchiveEntry;
34  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.compress.archivers.ArchiveInputStream;
35  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.compress.archivers.zip.ZipEncoding;
36  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
37  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.compress.utils.ArchiveUtils;
38  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.compress.utils.CharsetNames;
39  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.compress.utils.IOUtils;
40  
41  /**
42   * The TarInputStream reads a UNIX tar archive as an InputStream.
43   * methods are provided to position at each successive entry in
44   * the archive, and the read each entry as a normal input stream
45   * using read().
46   * @NotThreadSafe
47   */
48  public class TarArchiveInputStream extends ArchiveInputStream {
49  
50      private static final int SMALL_BUFFER_SIZE = 256;
51  
52      private final byte[] SMALL_BUF = new byte[SMALL_BUFFER_SIZE];
53  
54      /** The size the TAR header */
55      private final int recordSize;
56  
57      /** The size of a block */
58      private final int blockSize;
59  
60      /** True if file has hit EOF */
61      private boolean hasHitEOF;
62  
63      /** Size of the current entry */
64      private long entrySize;
65  
66      /** How far into the entry the stream is at */
67      private long entryOffset;
68  
69      /** An input stream to read from */
70      private final InputStream is;
71  
72      /** The meta-data about the current entry */
73      private TarArchiveEntry currEntry;
74  
75      /** The encoding of the file */
76      private final ZipEncoding encoding;
77  
78      /**
79       * Constructor for TarInputStream.
80       * @param is the input stream to use
81       */
82      public TarArchiveInputStream(InputStream is) {
83          this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE);
84      }
85  
86      /**
87       * Constructor for TarInputStream.
88       * @param is the input stream to use
89       * @param encoding name of the encoding to use for file names
90       * @since 1.4
91       */
92      public TarArchiveInputStream(InputStream is, String encoding) {
93          this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE,
94               encoding);
95      }
96  
97      /**
98       * Constructor for TarInputStream.
99       * @param is the input stream to use
100      * @param blockSize the block size to use
101      */
102     public TarArchiveInputStream(InputStream is, int blockSize) {
103         this(is, blockSize, TarConstants.DEFAULT_RCDSIZE);
104     }
105 
106     /**
107      * Constructor for TarInputStream.
108      * @param is the input stream to use
109      * @param blockSize the block size to use
110      * @param encoding name of the encoding to use for file names
111      * @since 1.4
112      */
113     public TarArchiveInputStream(InputStream is, int blockSize,
114                                  String encoding) {
115         this(is, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding);
116     }
117 
118     /**
119      * Constructor for TarInputStream.
120      * @param is the input stream to use
121      * @param blockSize the block size to use
122      * @param recordSize the record size to use
123      */
124     public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) {
125         this(is, blockSize, recordSize, null);      
126     }
127 
128     /**
129      * Constructor for TarInputStream.
130      * @param is the input stream to use
131      * @param blockSize the block size to use
132      * @param recordSize the record size to use
133      * @param encoding name of the encoding to use for file names
134      * @since 1.4
135      */
136     public TarArchiveInputStream(InputStream is, int blockSize, int recordSize,
137                                  String encoding) {
138         this.is = is;
139         this.hasHitEOF = false;
140         this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
141         this.recordSize = recordSize;
142         this.blockSize = blockSize;
143     }
144 
145     /**
146      * Closes this stream. Calls the TarBuffer's close() method.
147      * @throws IOException on error
148      */
149     @Override
150     public void close() throws IOException {
151         is.close();
152     }
153 
154     /**
155      * Get the record size being used by this stream's buffer.
156      *
157      * @return The TarBuffer record size.
158      */
159     public int getRecordSize() {
160         return recordSize;
161     }
162 
163     /**
164      * Get the available data that can be read from the current
165      * entry in the archive. This does not indicate how much data
166      * is left in the entire archive, only in the current entry.
167      * This value is determined from the entry's size header field
168      * and the amount of data already read from the current entry.
169      * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE
170      * bytes are left in the current entry in the archive.
171      *
172      * @return The number of available bytes for the current entry.
173      * @throws IOException for signature
174      */
175     @Override
176     public int available() throws IOException {
177         if (entrySize - entryOffset > Integer.MAX_VALUE) {
178             return Integer.MAX_VALUE;
179         }
180         return (int) (entrySize - entryOffset);
181     }
182 
183     
184     /**
185      * Skips over and discards <code>n</code> bytes of data from this input
186      * stream. The <code>skip</code> method may, for a variety of reasons, end
187      * up skipping over some smaller number of bytes, possibly <code>0</code>.
188      * This may result from any of a number of conditions; reaching end of file
189      * or end of entry before <code>n</code> bytes have been skipped; are only
190      * two possibilities. The actual number of bytes skipped is returned. If
191      * <code>n</code> is negative, no bytes are skipped.
192      * 
193      * 
194      * @param n
195      *            the number of bytes to be skipped.
196      * @return the actual number of bytes skipped.
197      * @exception IOException
198      *                if some other I/O error occurs.
199      */
200     @Override
201     public long skip(final long n) throws IOException {
202         if (n <= 0) {
203             return 0;
204         }
205 
206         final long available = entrySize - entryOffset;
207         final long skipped = is.skip(Math.min(n, available)); 
208         count(skipped);
209         entryOffset += skipped;
210         return skipped;
211     }
212 
213     /**
214      * Since we do not support marking just yet, we return false.
215      *
216      * @return False.
217      */
218     @Override
219     public boolean markSupported() {
220         return false;
221     }
222 
223     /**
224      * Since we do not support marking just yet, we do nothing.
225      *
226      * @param markLimit The limit to mark.
227      */
228     @Override
229     public void mark(int markLimit) {
230     }
231 
232     /**
233      * Since we do not support marking just yet, we do nothing.
234      */
235     @Override
236     public synchronized void reset() {
237     }
238 
239     /**
240      * Get the next entry in this tar archive. This will skip
241      * over any remaining data in the current entry, if there
242      * is one, and place the input stream at the header of the
243      * next entry, and read the header and instantiate a new
244      * TarEntry from the header bytes and return that entry.
245      * If there are no more entries in the archive, null will
246      * be returned to indicate that the end of the archive has
247      * been reached.
248      *
249      * @return The next TarEntry in the archive, or null.
250      * @throws IOException on error
251      */
252     public TarArchiveEntry getNextTarEntry() throws IOException {
253         if (hasHitEOF) {
254             return null;
255         }
256 
257         if (currEntry != null) {
258             /* Skip will only go to the end of the current entry */
259             IOUtils.skip(this, Long.MAX_VALUE);
260 
261             /* skip to the end of the last record */
262             skipRecordPadding();
263         }
264 
265         byte[] headerBuf = getRecord();
266 
267         if (headerBuf == null) {
268             /* hit EOF */
269             currEntry = null;
270             return null;
271         }
272 
273         try {
274             currEntry = new TarArchiveEntry(headerBuf, encoding);
275         } catch (IllegalArgumentException e) {
276             IOException ioe = new IOException("Error detected parsing the header");
277             ioe.initCause(e);
278             throw ioe;
279         }
280 
281         entryOffset = 0;
282         entrySize = currEntry.getSize();
283 
284         if (currEntry.isGNULongLinkEntry()) {
285             byte[] longLinkData = getLongNameData();
286             if (longLinkData == null) {
287                 // Bugzilla: 40334
288                 // Malformed tar file - long link entry name not followed by
289                 // entry
290                 return null;
291             }
292             currEntry.setLinkName(encoding.decode(longLinkData));
293         }
294 
295         if (currEntry.isGNULongNameEntry()) {
296             byte[] longNameData = getLongNameData();
297             if (longNameData == null) {
298                 // Bugzilla: 40334
299                 // Malformed tar file - long entry name not followed by
300                 // entry
301                 return null;
302             }
303             currEntry.setName(encoding.decode(longNameData));
304         }
305 
306         if (currEntry.isPaxHeader()){ // Process Pax headers
307             paxHeaders();
308         }
309 
310         if (currEntry.isGNUSparse()){ // Process sparse files
311             readGNUSparse();
312         }
313 
314         // If the size of the next element in the archive has changed
315         // due to a new size being reported in the posix header
316         // information, we update entrySize here so that it contains
317         // the correct value.
318         entrySize = currEntry.getSize();
319 
320         return currEntry;
321     }
322     
323     /**
324      * The last record block should be written at the full size, so skip any
325      * additional space used to fill a record after an entry
326      */
327     private void skipRecordPadding() throws IOException {
328         if (this.entrySize > 0 && this.entrySize % this.recordSize != 0) {
329             long numRecords = (this.entrySize / this.recordSize) + 1;
330             long padding = (numRecords * this.recordSize) - this.entrySize;
331             long skipped = IOUtils.skip(is, padding);
332             count(skipped);
333         }
334     }
335 
336     /**
337      * Get the next entry in this tar archive as longname data.
338      *
339      * @return The next entry in the archive as longname data, or null.
340      * @throws IOException on error
341      */
342     protected byte[] getLongNameData() throws IOException {
343         // read in the name
344         ByteArrayOutputStream longName = new ByteArrayOutputStream();
345         int length = 0;
346         while ((length = read(SMALL_BUF)) >= 0) {
347             longName.write(SMALL_BUF, 0, length);
348         }
349         getNextEntry();
350         if (currEntry == null) {
351             // Bugzilla: 40334
352             // Malformed tar file - long entry name not followed by entry
353             return null;
354         }
355         byte[] longNameData = longName.toByteArray();
356         // remove trailing null terminator(s)
357         length = longNameData.length;
358         while (length > 0 && longNameData[length - 1] == 0) {
359             --length;
360         }
361         if (length != longNameData.length) {
362             byte[] l = new byte[length];
363             System.arraycopy(longNameData, 0, l, 0, length);
364             longNameData = l;
365         }
366         return longNameData;
367     }
368 
369     /**
370      * Get the next record in this tar archive. This will skip
371      * over any remaining data in the current entry, if there
372      * is one, and place the input stream at the header of the
373      * next entry.
374      *
375      * <p>If there are no more entries in the archive, null will be
376      * returned to indicate that the end of the archive has been
377      * reached.  At the same time the {@code hasHitEOF} marker will be
378      * set to true.</p>
379      *
380      * @return The next header in the archive, or null.
381      * @throws IOException on error
382      */
383     private byte[] getRecord() throws IOException {
384         byte[] headerBuf = readRecord();
385         hasHitEOF = isEOFRecord(headerBuf);
386         if (hasHitEOF && headerBuf != null) {
387             tryToConsumeSecondEOFRecord();
388             consumeRemainderOfLastBlock();
389             headerBuf = null;
390         }
391         return headerBuf;
392     }
393 
394     /**
395      * Determine if an archive record indicate End of Archive. End of
396      * archive is indicated by a record that consists entirely of null bytes.
397      *
398      * @param record The record data to check.
399      * @return true if the record data is an End of Archive
400      */
401     protected boolean isEOFRecord(byte[] record) {
402         return record == null || ArchiveUtils.isArrayZero(record, recordSize);
403     }
404     
405     /**
406      * Read a record from the input stream and return the data.
407      *
408      * @return The record data or null if EOF has been hit.
409      * @throws IOException on error
410      */
411     protected byte[] readRecord() throws IOException {
412 
413         byte[] record = new byte[recordSize];
414 
415         int readNow = IOUtils.readFully(is, record);
416         count(readNow);
417         if (readNow != recordSize) {
418             return null;
419         }
420 
421         return record;
422     }
423 
424     private void paxHeaders() throws IOException{
425         Map<String, String> headers = parsePaxHeaders(this);
426         getNextEntry(); // Get the actual file entry
427         applyPaxHeadersToCurrentEntry(headers);
428     }
429 
430     Map<String, String> parsePaxHeaders(InputStream i) throws IOException {
431         Map<String, String> headers = new HashMap<String, String>();
432         // Format is "length keyword=value\n";
433         while(true){ // get length
434             int ch;
435             int len = 0;
436             int read = 0;
437             while((ch = i.read()) != -1) {
438                 read++;
439                 if (ch == ' '){ // End of length string
440                     // Get keyword
441                     ByteArrayOutputStream coll = new ByteArrayOutputStream();
442                     while((ch = i.read()) != -1) {
443                         read++;
444                         if (ch == '='){ // end of keyword
445                             String keyword = coll.toString(CharsetNames.UTF_8);
446                             // Get rest of entry
447                             final int restLen = len - read;
448                             byte[] rest = new byte[restLen];
449                             int got = IOUtils.readFully(i, rest);
450                             if (got != restLen) {
451                                 throw new IOException("Failed to read "
452                                                       + "Paxheader. Expected "
453                                                       + restLen
454                                                       + " bytes, read "
455                                                       + got);
456                             }
457                             // Drop trailing NL
458                             String value = new String(rest, 0,
459                                                       restLen - 1, CharsetNames.UTF_8);
460                             headers.put(keyword, value);
461                             break;
462                         }
463                         coll.write((byte) ch);
464                     }
465                     break; // Processed single header
466                 }
467                 len *= 10;
468                 len += ch - '0';
469             }
470             if (ch == -1){ // EOF
471                 break;
472             }
473         }
474         return headers;
475     }
476 
477     private void applyPaxHeadersToCurrentEntry(Map<String, String> headers) {
478         /*
479          * The following headers are defined for Pax.
480          * atime, ctime, charset: cannot use these without changing TarArchiveEntry fields
481          * mtime
482          * comment
483          * gid, gname
484          * linkpath
485          * size
486          * uid,uname
487          * SCHILY.devminor, SCHILY.devmajor: don't have setters/getters for those
488          */
489         for (Entry<String, String> ent : headers.entrySet()){
490             String key = ent.getKey();
491             String val = ent.getValue();
492             if ("path".equals(key)){
493                 currEntry.setName(val);
494             } else if ("linkpath".equals(key)){
495                 currEntry.setLinkName(val);
496             } else if ("gid".equals(key)){
497                 currEntry.setGroupId(Integer.parseInt(val));
498             } else if ("gname".equals(key)){
499                 currEntry.setGroupName(val);
500             } else if ("uid".equals(key)){
501                 currEntry.setUserId(Integer.parseInt(val));
502             } else if ("uname".equals(key)){
503                 currEntry.setUserName(val);
504             } else if ("size".equals(key)){
505                 currEntry.setSize(Long.parseLong(val));
506             } else if ("mtime".equals(key)){
507                 currEntry.setModTime((long) (Double.parseDouble(val) * 1000));
508             } else if ("SCHILY.devminor".equals(key)){
509                 currEntry.setDevMinor(Integer.parseInt(val));
510             } else if ("SCHILY.devmajor".equals(key)){
511                 currEntry.setDevMajor(Integer.parseInt(val));
512             }
513         }
514     }
515 
516     /**
517      * Adds the sparse chunks from the current entry to the sparse chunks,
518      * including any additional sparse entries following the current entry.
519      *
520      * @throws IOException on error
521      *
522      * @todo Sparse files get not yet really processed.
523      */
524     private void readGNUSparse() throws IOException {
525         /* we do not really process sparse files yet
526         sparses = new ArrayList();
527         sparses.addAll(currEntry.getSparses());
528         */
529         if (currEntry.isExtended()) {
530             TarArchiveSparseEntry entry;
531             do {
532                 byte[] headerBuf = getRecord();
533                 if (headerBuf == null) {
534                     currEntry = null;
535                     break;
536                 }
537                 entry = new TarArchiveSparseEntry(headerBuf);
538                 /* we do not really process sparse files yet
539                 sparses.addAll(entry.getSparses());
540                 */
541             } while (entry.isExtended());
542         }
543     }
544 
545     /**
546      * Returns the next Archive Entry in this Stream.
547      *
548      * @return the next entry,
549      *         or {@code null} if there are no more entries
550      * @throws IOException if the next entry could not be read
551      */
552     @Override
553     public ArchiveEntry getNextEntry() throws IOException {
554         return getNextTarEntry();
555     }
556     
557     /**
558      * Tries to read the next record rewinding the stream if it is not a EOF record.
559      *
560      * <p>This is meant to protect against cases where a tar
561      * implementation has written only one EOF record when two are
562      * expected.  Actually this won't help since a non-conforming
563      * implementation likely won't fill full blocks consisting of - by
564      * default - ten records either so we probably have already read
565      * beyond the archive anyway.</p>
566      */
567     private void tryToConsumeSecondEOFRecord() throws IOException {
568         boolean shouldReset = true;
569         boolean marked = is.markSupported();
570         if (marked) {
571             is.mark(recordSize);
572         }
573         try {
574             shouldReset = !isEOFRecord(readRecord());
575         } finally {
576             if (shouldReset && marked) {
577                 pushedBackBytes(recordSize);
578             	is.reset();
579             }
580         }
581     }
582 
583     /**
584      * Reads bytes from the current tar archive entry.
585      *
586      * This method is aware of the boundaries of the current
587      * entry in the archive and will deal with them as if they
588      * were this stream's start and EOF.
589      *
590      * @param buf The buffer into which to place bytes read.
591      * @param offset The offset at which to place bytes read.
592      * @param numToRead The number of bytes to read.
593      * @return The number of bytes read, or -1 at EOF.
594      * @throws IOException on error
595      */
596     @Override
597     public int read(byte[] buf, int offset, int numToRead) throws IOException {
598     	int totalRead = 0;
599 
600         if (hasHitEOF || entryOffset >= entrySize) {
601             return -1;
602         }
603 
604         if (currEntry == null) {
605             throw new IllegalStateException("No current tar entry");
606         }
607 
608         numToRead = Math.min(numToRead, available());
609         
610         totalRead = is.read(buf, offset, numToRead);
611         
612         if (totalRead == -1) {
613             if (numToRead > 0) {
614                 throw new IOException("Truncated TAR archive");
615             }
616             hasHitEOF = true;
617         } else {
618             count(totalRead);
619             entryOffset += totalRead;
620         }
621 
622         return totalRead;
623     }
624 
625     /**
626      * Whether this class is able to read the given entry.
627      *
628      * <p>May return false if the current entry is a sparse file.</p>
629      */
630     @Override
631     public boolean canReadEntryData(ArchiveEntry ae) {
632         if (ae instanceof TarArchiveEntry) {
633             TarArchiveEntry/../../../../../../../../edu/internet2/middleware/grouperInstallerExt/org/apache/commons/compress/archivers/tar/TarArchiveEntry.html#TarArchiveEntry">TarArchiveEntry te = (TarArchiveEntry) ae;
634             return !te.isGNUSparse();
635         }
636         return false;
637     }
638 
639     /**
640      * Get the current TAR Archive Entry that this input stream is processing
641      * 
642      * @return The current Archive Entry
643      */
644     public TarArchiveEntry getCurrentEntry() {
645         return currEntry;
646     }
647 
648     protected final void setCurrentEntry(TarArchiveEntry e) {
649         currEntry = e;
650     }
651 
652     protected final boolean isAtEOF() {
653         return hasHitEOF;
654     }
655 
656     protected final void setAtEOF(boolean b) {
657         hasHitEOF = b;
658     }
659 
660     /**
661      * This method is invoked once the end of the archive is hit, it
662      * tries to consume the remaining bytes under the assumption that
663      * the tool creating this archive has padded the last block.
664      */
665     private void consumeRemainderOfLastBlock() throws IOException {
666         long bytesReadOfLastBlock = getBytesRead() % blockSize;
667         if (bytesReadOfLastBlock > 0) {
668             long skipped = IOUtils.skip(is, blockSize - bytesReadOfLastBlock);
669             count(skipped);
670         }
671     }
672 
673     /**
674      * Checks if the signature matches what is expected for a tar file.
675      *
676      * @param signature
677      *            the bytes to check
678      * @param length
679      *            the number of bytes to check
680      * @return true, if this stream is a tar archive stream, false otherwise
681      */
682     public static boolean matches(byte[] signature, int length) {
683         if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) {
684             return false;
685         }
686 
687         if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX,
688                 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
689             &&
690             ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX,
691                 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
692                 ){
693             return true;
694         }
695         if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU,
696                 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
697             &&
698             (
699              ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE,
700                 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
701             ||
702             ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO,
703                 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
704             )
705                 ){
706             return true;
707         }
708         // COMPRESS-107 - recognise Ant tar files
709         if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT,
710                 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
711             &&
712             ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT,
713                 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
714                 ){
715             return true;
716         }
717         return false;
718     }
719 
720 }