View Javadoc
1   /*******************************************************************************
2    * Copyright 2012 Internet2
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   ******************************************************************************/
16  /*
17   * Licensed to the Apache Software Foundation (ASF) under one or more
18   * contributor license agreements.  See the NOTICE file distributed with
19   * this work for additional information regarding copyright ownership.
20   * The ASF licenses this file to You under the Apache License, Version 2.0
21   * (the "License"); you may not use this file except in compliance with
22   * the License.  You may obtain a copy of the License at
23   *
24   *      http://www.apache.org/licenses/LICENSE-2.0
25   *
26   * Unless required by applicable law or agreed to in writing, software
27   * distributed under the License is distributed on an "AS IS" BASIS,
28   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29   * See the License for the specific language governing permissions and
30   * limitations under the License.
31   */
32  package edu.internet2.middleware.grouperActivemqExt.org.apache.commons.jexl2.parser;
33  
34  /**
35   * Common constant strings utilities.
36   * <p>
37   * This package methods read JEXL string literals and handle escaping through the
38   * 'backslash' (ie: \) character. Escaping is used to neutralize string delimiters (the single
39   * and double quotes) and read Unicode hexadecimal encoded characters.
40   * </p>
41   * <p>
42   * The only escapable characters are the single and double quotes - ''' and '"' -,
43   * a Unicode sequence starting with 'u' followed by 4 hexadecimals and
44   * the backslash character - '\' - itself.
45   * </p>
46   * <p>
47   * A sequence where '\' occurs before any non-escapable character or sequence has no effect, the
48   * sequence output being the same as the input.
49   * </p>
50   */
51  public class StringParser {
52      /** Default constructor.  */
53      public StringParser() {
54      }
55  
56      /**
57       * Builds a string, handles escaping through '\' syntax.
58       * @param str the string to build from
59       * @param eatsep whether the separator, the first character, should be considered
60       * @return the built string
61       */
62      public static String buildString(CharSequence str, boolean eatsep) {
63          StringBuilder strb = new StringBuilder(str.length());
64          char sep = eatsep ? str.charAt(0) : 0;
65          int end = str.length() - (eatsep ? 1 : 0);
66          int begin = (eatsep ? 1 : 0);
67          read(strb, str, begin, end, sep);
68          return strb.toString();
69      }
70  
71      /**
72       * Read the remainder of a string till a given separator,
73       * handles escaping through '\' syntax.
74       * @param strb the destination buffer to copy characters into
75       * @param str the origin
76       * @param index the offset into the origin
77       * @param sep the separator, single or double quote, marking end of string
78       * @return the offset in origin
79       */
80      public static int readString(StringBuilder strb, CharSequence str, int index, char sep) {
81          return read(strb, str, index, str.length(), sep);
82      }
83      /** The length of an escaped unicode sequence. */
84      private static final int UCHAR_LEN = 4;
85  
86      /**
87       * Read the remainder of a string till a given separator,
88       * handles escaping through '\' syntax.
89       * @param strb the destination buffer to copy characters into
90       * @param str the origin
91       * @param begin the relative offset in str to begin reading
92       * @param end the relative offset in str to end reading
93       * @param sep the separator, single or double quote, marking end of string
94       * @return the last character offset handled in origin
95       */
96      private static int read(StringBuilder strb, CharSequence str, int begin, int end, char sep) {
97          boolean escape = false;
98          int index = begin;
99          for (; index < end; ++index) {
100             char c = str.charAt(index);
101             if (escape) {
102                 if (c == 'u' && (index + UCHAR_LEN) < end && readUnicodeChar(strb, str, index + 1) > 0) {
103                     index += UCHAR_LEN;
104                 } else {
105                     // if c is not an escapable character, re-emmit the backslash before it
106                     boolean notSeparator = sep == 0 ? c != '\'' && c != '"' : c != sep;
107                     if (notSeparator && c != '\\') {
108                         strb.append('\\');
109                     }
110                     strb.append(c);
111                 }
112                 escape = false;
113                 continue;
114             }
115             if (c == '\\') {
116                 escape = true;
117                 continue;
118             }
119             strb.append(c);
120             if (c == sep) {
121                 break;
122             }
123         }
124         return index;
125     }
126     /** Initial shift value for composing a Unicode char from 4 nibbles (16 - 4). */
127     private static final int SHIFT = 12;
128     /** The base 10 offset used to convert hexa characters to decimal. */
129     private static final int BASE10 = 10;
130 
131     /**
132      * Reads a Unicode escape character.
133      * @param strb the builder to write the character to
134      * @param str the sequence
135      * @param begin the begin offset in sequence (after the '\\u')
136      * @return 0 if char could not be read, 4 otherwise
137      */
138     private static int readUnicodeChar(StringBuilder strb, CharSequence str, int begin) {
139         char xc = 0;
140         int bits = SHIFT;
141         int value = 0;
142         for (int offset = 0; offset < UCHAR_LEN; ++offset) {
143             char c = str.charAt(begin + offset);
144             if (c >= '0' && c <= '9') {
145                 value = (c - '0');
146             } else if (c >= 'a' && c <= 'h') {
147                 value = (c - 'a' + BASE10);
148             } else if (c >= 'A' && c <= 'H') {
149                 value = (c - 'A' + BASE10);
150             } else {
151                 return 0;
152             }
153             xc |= value << bits;
154             bits -= UCHAR_LEN;
155         }
156         strb.append(xc);
157         return UCHAR_LEN;
158     }
159     /** The last 7bits ascii character. */
160     private static final char LAST_ASCII = 127;
161     /** The first printable 7bits ascii character. */
162     private static final char FIRST_ASCII = 32;
163 
164     /**
165      * Escapes a String representation, expand non-ASCII characters as Unicode escape sequence.
166      * @param str the string to escape
167      * @return the escaped representation
168      */
169     public static String escapeString(String str, char delim) {
170         if (str == null) {
171             return null;
172         }
173         final int length = str.length();
174         StringBuilder strb = new StringBuilder(length + 2);
175         strb.append(delim);
176         for (int i = 0; i < length; ++i) {
177             char c = str.charAt(i);
178             switch (c) {
179                 case 0:
180                     continue;
181                 case '\b':
182                     strb.append("\\b");
183                     break;
184                 case '\t':
185                     strb.append("\\t");
186                     break;
187                 case '\n':
188                     strb.append("\\n");
189                     break;
190                 case '\f':
191                     strb.append("\\f");
192                     break;
193                 case '\r':
194                     strb.append("\\r");
195                     break;
196                 case '\"':
197                     strb.append("\\\"");
198                     break;
199                 case '\'':
200                     strb.append("\\\'");
201                     break;
202                 case '\\':
203                     strb.append("\\\\");
204                     break;
205                 default:
206                     if (c >= FIRST_ASCII && c <= LAST_ASCII) {
207                         strb.append(c);
208                     } else {
209                         // convert to Unicode escape sequence
210                         strb.append('\\');
211                         strb.append('u');
212                         String hex = Integer.toHexString(c);
213                         for (int h = hex.length(); h < UCHAR_LEN; ++h) {
214                             strb.append('0');
215                         }
216                         strb.append(hex);
217                     }
218             }
219         }
220         strb.append(delim);
221         return strb.toString();
222     }
223 }