View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   *
17   */
18  
19  package edu.internet2.middleware.grouperInstallerExt.org.apache.commons.compress.archivers.zip;
20  
21  import java.nio.ByteBuffer;
22  import java.nio.charset.Charset;
23  import java.nio.charset.UnsupportedCharsetException;
24  import java.util.Collections;
25  import java.util.HashMap;
26  import java.util.Map;
27  
28  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.compress.utils.Charsets;
29  
30  /**
31   * Static helper functions for robustly encoding filenames in zip files. 
32   */
33  public abstract class ZipEncodingHelper {
34  
35      /**
36       * A class, which holds the high characters of a simple encoding
37       * and lazily instantiates a Simple8BitZipEncoding instance in a
38       * thread-safe manner.
39       */
40      private static class SimpleEncodingHolder {
41  
42          private final char [] highChars;
43          private Simple8BitZipEncoding encoding;
44  
45          /**
46           * Instantiate a simple encoding holder.
47           * 
48           * @param highChars The characters for byte codes 128 to 255.
49           * 
50           * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[])
51           */
52          SimpleEncodingHolder(char [] highChars) {
53              this.highChars = highChars;
54          }
55  
56          /**
57           * @return The associated {@link Simple8BitZipEncoding}, which
58           *         is instantiated if not done so far.
59           */
60          public synchronized Simple8BitZipEncoding getEncoding() {
61              if (this.encoding == null) {
62                  this.encoding = new Simple8BitZipEncoding(this.highChars);
63              }
64              return this.encoding;
65          }
66      }
67  
68      private static final Map<String, SimpleEncodingHolder> simpleEncodings;
69  
70      static {
71          Map<String, SimpleEncodingHolder> se =
72              new HashMap<String, SimpleEncodingHolder>();
73  
74          char[] cp437_high_chars =
75              new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
76                           0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
77                           0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
78                           0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
79                           0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5,
80                           0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
81                           0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310,
82                           0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
83                           0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561,
84                           0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557,
85                           0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534,
86                           0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
87                           0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
88                           0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559,
89                           0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518,
90                           0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
91                           0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3,
92                           0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4,
93                           0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1,
94                           0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
95                           0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2,
96                           0x25a0, 0x00a0 };
97  
98          SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars);
99  
100         se.put("CP437", cp437);
101         se.put("Cp437", cp437);
102         se.put("cp437", cp437);
103         se.put("IBM437", cp437);
104         se.put("ibm437", cp437);
105 
106         char[] cp850_high_chars =
107             new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
108                          0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
109                          0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
110                          0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
111                          0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8,
112                          0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
113                          0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae,
114                          0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
115                          0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1,
116                          0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557,
117                          0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534,
118                          0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
119                          0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
120                          0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb,
121                          0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518,
122                          0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
123                          0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5,
124                          0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9,
125                          0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1,
126                          0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
127                          0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2,
128                          0x25a0, 0x00a0 };
129 
130         SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars);
131 
132         se.put("CP850", cp850);
133         se.put("Cp850", cp850);
134         se.put("cp850", cp850);
135         se.put("IBM850", cp850);
136         se.put("ibm850", cp850);
137         simpleEncodings = Collections.unmodifiableMap(se);
138     }
139 
140     /**
141      * Grow a byte buffer, so it has a minimal capacity or at least
142      * the double capacity of the original buffer 
143      * 
144      * @param b The original buffer.
145      * @param newCapacity The minimal requested new capacity.
146      * @return A byte buffer <code>r</code> with
147      *         <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and
148      *         all the data contained in <code>b</code> copied to the beginning
149      *         of <code>r</code>.
150      *
151      */
152     static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) {
153         b.limit(b.position());
154         b.rewind();
155 
156         int c2 = b.capacity() * 2;
157         ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2);
158 
159         on.put(b);
160         return on;
161     }
162 
163  
164     /**
165      * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
166      * ASCII bytes.
167      */
168     private static final byte[] HEX_DIGITS =
169         new byte [] {
170         0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41,
171         0x42, 0x43, 0x44, 0x45, 0x46
172     };
173 
174     /**
175      * Append <code>%Uxxxx</code> to the given byte buffer.
176      * The caller must assure, that <code>bb.remaining()&gt;=6</code>.
177      * 
178      * @param bb The byte buffer to write to.
179      * @param c The character to write.
180      */
181     static void appendSurrogate(ByteBuffer bb, char c) {
182 
183         bb.put((byte) '%');
184         bb.put((byte) 'U');
185 
186         bb.put(HEX_DIGITS[(c >> 12)&0x0f]);
187         bb.put(HEX_DIGITS[(c >> 8)&0x0f]);
188         bb.put(HEX_DIGITS[(c >> 4)&0x0f]);
189         bb.put(HEX_DIGITS[c & 0x0f]);
190     }
191 
192 
193     /**
194      * name of the encoding UTF-8
195      */
196     static final String UTF8 = "UTF8";
197 
198     /**
199      * name of the encoding UTF-8
200      */
201     static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8);
202 
203     /**
204      * Instantiates a zip encoding.
205      * 
206      * @param name The name of the zip encoding. Specify {@code null} for
207      *             the platform's default encoding.
208      * @return A zip encoding for the given encoding name.
209      */
210     public static ZipEncoding getZipEncoding(String name) {
211  
212         // fallback encoding is good enough for UTF-8.
213         if (isUTF8(name)) {
214             return UTF8_ZIP_ENCODING;
215         }
216 
217         if (name == null) {
218             return new FallbackZipEncoding();
219         }
220 
221         SimpleEncodingHolder h = simpleEncodings.get(name);
222 
223         if (h!=null) {
224             return h.getEncoding();
225         }
226 
227         try {
228 
229             Charset cs = Charset.forName(name);
230             return new NioZipEncoding(cs);
231 
232         } catch (UnsupportedCharsetException e) {
233             return new FallbackZipEncoding(name);
234         }
235     }
236 
237     /**
238      * Returns whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
239      * 
240      * @param charsetName
241      *            If the given name is null, then check the platform's default encoding.
242      */
243     static boolean isUTF8(String charsetName) {
244         if (charsetName == null) {
245             // check platform's default encoding
246             charsetName = System.getProperty("file.encoding");
247         }
248         if (Charsets.UTF_8.name().equalsIgnoreCase(charsetName)) {
249             return true;
250         }
251         for (String alias : Charsets.UTF_8.aliases()) {
252             if (alias.equalsIgnoreCase(charsetName)) {
253                 return true;
254             }
255         }
256         return false;
257     }
258 }