View Javadoc
1   /*******************************************************************************
2    * Copyright 2012 Internet2
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   ******************************************************************************/
16  /*
17   * Copyright 2001-2004 The Apache Software Foundation.
18   * 
19   * Licensed under the Apache License, Version 2.0 (the "License");
20   * you may not use this file except in compliance with the License.
21   * You may obtain a copy of the License at
22   * 
23   *      http://www.apache.org/licenses/LICENSE-2.0
24   * 
25   * Unless required by applicable law or agreed to in writing, software
26   * distributed under the License is distributed on an "AS IS" BASIS,
27   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28   * See the License for the specific language governing permissions and
29   * limitations under the License.
30   */ 
31  
32  package edu.internet2.middleware.grouperInstallerExt.org.apache.commons.codec.net;
33  
34  import java.io.ByteArrayOutputStream;
35  import java.io.UnsupportedEncodingException;
36  import java.util.BitSet;
37  
38  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.codec.BinaryDecoder;
39  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.codec.BinaryEncoder;
40  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.codec.DecoderException;
41  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.codec.EncoderException;
42  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.codec.StringDecoder;
43  import edu.internet2.middleware.grouperInstallerExt.org.apache.commons.codec.StringEncoder;
44  
45  /**
46   * <p>
47   * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>.
48   * </p>
49   * <p>
50   * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
51   * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
52   * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
53   * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
54   * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
55   * gateway.
56   * </p>
57   * 
58   * <p>
59   * Note:
60   * </p>
61   * <p>
62   * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
63   * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec
64   * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
65   * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
66   * </p>
67   * 
68   * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
69   *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
70   * 
71   * @author Apache Software Foundation
72   * @since 1.3
73   * @version $Id: QuotedPrintableCodec.java,v 1.1 2008-11-30 10:57:27 mchyzer Exp $
74   */
75  public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
76      /**
77       * The default charset used for string decoding and encoding.
78       */
79      private String charset = StringEncodings.UTF8;
80  
81      /**
82       * BitSet of printable characters as defined in RFC 1521.
83       */
84      private static final BitSet PRINTABLE_CHARS = new BitSet(256);
85  
86      private static byte ESCAPE_CHAR = '=';
87  
88      private static byte TAB = 9;
89  
90      private static byte SPACE = 32;
91      // Static initializer for printable chars collection
92      static {
93          // alpha characters
94          for (int i = 33; i <= 60; i++) {
95              PRINTABLE_CHARS.set(i);
96          }
97          for (int i = 62; i <= 126; i++) {
98              PRINTABLE_CHARS.set(i);
99          }
100         PRINTABLE_CHARS.set(TAB);
101         PRINTABLE_CHARS.set(SPACE);
102     }
103 
104     /**
105      * Default constructor.
106      */
107     public QuotedPrintableCodec() {
108         super();
109     }
110 
111     /**
112      * Constructor which allows for the selection of a default charset
113      * 
114      * @param charset
115      *                  the default string charset to use.
116      */
117     public QuotedPrintableCodec(String charset) {
118         super();
119         this.charset = charset;
120     }
121 
122     /**
123      * Encodes byte into its quoted-printable representation.
124      * 
125      * @param b
126      *                  byte to encode
127      * @param buffer
128      *                  the buffer to write to
129      */
130     private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
131         buffer.write(ESCAPE_CHAR);
132         char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
133         char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
134         buffer.write(hex1);
135         buffer.write(hex2);
136     }
137 
138     /**
139      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
140      * 
141      * <p>
142      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
143      * RFC 1521 and is suitable for encoding binary data and unformatted text.
144      * </p>
145      * 
146      * @param printable
147      *                  bitset of characters deemed quoted-printable
148      * @param bytes
149      *                  array of bytes to be encoded
150      * @return array of bytes containing quoted-printable data
151      */
152     public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
153         if (bytes == null) {
154             return null;
155         }
156         if (printable == null) {
157             printable = PRINTABLE_CHARS;
158         }
159         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
160         for (int i = 0; i < bytes.length; i++) {
161             int b = bytes[i];
162             if (b < 0) {
163                 b = 256 + b;
164             }
165             if (printable.get(b)) {
166                 buffer.write(b);
167             } else {
168                 encodeQuotedPrintable(b, buffer);
169             }
170         }
171         return buffer.toByteArray();
172     }
173 
174     /**
175      * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
176      * back to their original representation.
177      * 
178      * <p>
179      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
180      * RFC 1521.
181      * </p>
182      * 
183      * @param bytes
184      *                  array of quoted-printable characters
185      * @return array of original bytes
186      * @throws DecoderException
187      *                  Thrown if quoted-printable decoding is unsuccessful
188      */
189     public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
190         if (bytes == null) {
191             return null;
192         }
193         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
194         for (int i = 0; i < bytes.length; i++) {
195             int b = bytes[i];
196             if (b == ESCAPE_CHAR) {
197                 try {
198                     int u = Character.digit((char) bytes[++i], 16);
199                     int l = Character.digit((char) bytes[++i], 16);
200                     if (u == -1 || l == -1) {
201                         throw new DecoderException("Invalid quoted-printable encoding");
202                     }
203                     buffer.write((char) ((u << 4) + l));
204                 } catch (ArrayIndexOutOfBoundsException e) {
205                     throw new DecoderException("Invalid quoted-printable encoding");
206                 }
207             } else {
208                 buffer.write(b);
209             }
210         }
211         return buffer.toByteArray();
212     }
213 
214     /**
215      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
216      * 
217      * <p>
218      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
219      * RFC 1521 and is suitable for encoding binary data and unformatted text.
220      * </p>
221      * 
222      * @param bytes
223      *                  array of bytes to be encoded
224      * @return array of bytes containing quoted-printable data
225      */
226     public byte[] encode(byte[] bytes) {
227         return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
228     }
229 
230     /**
231      * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
232      * back to their original representation.
233      * 
234      * <p>
235      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
236      * RFC 1521.
237      * </p>
238      * 
239      * @param bytes
240      *                  array of quoted-printable characters
241      * @return array of original bytes
242      * @throws DecoderException
243      *                  Thrown if quoted-printable decoding is unsuccessful
244      */
245     public byte[] decode(byte[] bytes) throws DecoderException {
246         return decodeQuotedPrintable(bytes);
247     }
248 
249     /**
250      * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
251      * 
252      * <p>
253      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
254      * RFC 1521 and is suitable for encoding binary data.
255      * </p>
256      * 
257      * @param pString
258      *                  string to convert to quoted-printable form
259      * @return quoted-printable string
260      * 
261      * @throws EncoderException
262      *                  Thrown if quoted-printable encoding is unsuccessful
263      * 
264      * @see #getDefaultCharset()
265      */
266     public String encode(String pString) throws EncoderException {
267         if (pString == null) {
268             return null;
269         }
270         try {
271             return encode(pString, getDefaultCharset());
272         } catch (UnsupportedEncodingException e) {
273             throw new EncoderException(e.getMessage());
274         }
275     }
276 
277     /**
278      * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
279      * are converted back to their original representation.
280      * 
281      * @param pString
282      *                  quoted-printable string to convert into its original form
283      * @param charset
284      *                  the original string charset
285      * @return original string
286      * @throws DecoderException
287      *                  Thrown if quoted-printable decoding is unsuccessful
288      * @throws UnsupportedEncodingException
289      *                  Thrown if charset is not supported
290      */
291     public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
292         if (pString == null) {
293             return null;
294         }
295         return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
296     }
297 
298     /**
299      * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
300      * converted back to their original representation.
301      * 
302      * @param pString
303      *                  quoted-printable string to convert into its original form
304      * @return original string
305      * @throws DecoderException
306      *                  Thrown if quoted-printable decoding is unsuccessful
307      * @throws UnsupportedEncodingException
308      *                  Thrown if charset is not supported
309      * @see #getDefaultCharset()
310      */
311     public String decode(String pString) throws DecoderException {
312         if (pString == null) {
313             return null;
314         }
315         try {
316             return decode(pString, getDefaultCharset());
317         } catch (UnsupportedEncodingException e) {
318             throw new DecoderException(e.getMessage());
319         }
320     }
321 
322     /**
323      * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
324      * 
325      * @param pObject
326      *                  string to convert to a quoted-printable form
327      * @return quoted-printable object
328      * @throws EncoderException
329      *                  Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
330      *                  unsuccessful
331      */
332     public Object encode(Object pObject) throws EncoderException {
333         if (pObject == null) {
334             return null;
335         } else if (pObject instanceof byte[]) {
336             return encode((byte[]) pObject);
337         } else if (pObject instanceof String) {
338             return encode((String) pObject);
339         } else {
340             throw new EncoderException("Objects of type "
341                 + pObject.getClass().getName()
342                 + " cannot be quoted-printable encoded");
343         }
344     }
345 
346     /**
347      * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
348      * representation.
349      * 
350      * @param pObject
351      *                  quoted-printable object to convert into its original form
352      * @return original object
353      * @throws DecoderException
354      *                  Thrown if quoted-printable decoding is not applicable to objects of this type if decoding is
355      *                  unsuccessful
356      */
357     public Object decode(Object pObject) throws DecoderException {
358         if (pObject == null) {
359             return null;
360         } else if (pObject instanceof byte[]) {
361             return decode((byte[]) pObject);
362         } else if (pObject instanceof String) {
363             return decode((String) pObject);
364         } else {
365             throw new DecoderException("Objects of type "
366                 + pObject.getClass().getName()
367                 + " cannot be quoted-printable decoded");
368         }
369     }
370 
371     /**
372      * Returns the default charset used for string decoding and encoding.
373      * 
374      * @return the default string charset.
375      */
376     public String getDefaultCharset() {
377         return this.charset;
378     }
379 
380     /**
381      * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
382      * 
383      * <p>
384      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
385      * RFC 1521 and is suitable for encoding binary data and unformatted text.
386      * </p>
387      * 
388      * @param pString
389      *                  string to convert to quoted-printable form
390      * @param charset
391      *                  the charset for pString
392      * @return quoted-printable string
393      * 
394      * @throws UnsupportedEncodingException
395      *                  Thrown if the charset is not supported
396      */
397     public String encode(String pString, String charset) throws UnsupportedEncodingException {
398         if (pString == null) {
399             return null;
400         }
401         return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
402     }
403 }