1 /******************************************************************************* 2 * Copyright 2012 Internet2 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 ******************************************************************************/ 16 /* 17 * Licensed to the Apache Software Foundation (ASF) under one or more 18 * contributor license agreements. See the NOTICE file distributed with 19 * this work for additional information regarding copyright ownership. 20 * The ASF licenses this file to You under the Apache License, Version 2.0 21 * (the "License"); you may not use this file except in compliance with 22 * the License. You may obtain a copy of the License at 23 * 24 * http://www.apache.org/licenses/LICENSE-2.0 25 * 26 * Unless required by applicable law or agreed to in writing, software 27 * distributed under the License is distributed on an "AS IS" BASIS, 28 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 29 * See the License for the specific language governing permissions and 30 * limitations under the License. 31 */ 32 package edu.internet2.middleware.grouperActivemqExt.org.apache.commons.jexl2.parser; 33 34 /** 35 * Common constant strings utilities. 36 * <p> 37 * This package methods read JEXL string literals and handle escaping through the 38 * 'backslash' (ie: \) character. Escaping is used to neutralize string delimiters (the single 39 * and double quotes) and read Unicode hexadecimal encoded characters. 40 * </p> 41 * <p> 42 * The only escapable characters are the single and double quotes - ''' and '"' -, 43 * a Unicode sequence starting with 'u' followed by 4 hexadecimals and 44 * the backslash character - '\' - itself. 45 * </p> 46 * <p> 47 * A sequence where '\' occurs before any non-escapable character or sequence has no effect, the 48 * sequence output being the same as the input. 49 * </p> 50 */ 51 public class StringParser { 52 /** Default constructor. */ 53 public StringParser() { 54 } 55 56 /** 57 * Builds a string, handles escaping through '\' syntax. 58 * @param str the string to build from 59 * @param eatsep whether the separator, the first character, should be considered 60 * @return the built string 61 */ 62 public static String buildString(CharSequence str, boolean eatsep) { 63 StringBuilder strb = new StringBuilder(str.length()); 64 char sep = eatsep ? str.charAt(0) : 0; 65 int end = str.length() - (eatsep ? 1 : 0); 66 int begin = (eatsep ? 1 : 0); 67 read(strb, str, begin, end, sep); 68 return strb.toString(); 69 } 70 71 /** 72 * Read the remainder of a string till a given separator, 73 * handles escaping through '\' syntax. 74 * @param strb the destination buffer to copy characters into 75 * @param str the origin 76 * @param index the offset into the origin 77 * @param sep the separator, single or double quote, marking end of string 78 * @return the offset in origin 79 */ 80 public static int readString(StringBuilder strb, CharSequence str, int index, char sep) { 81 return read(strb, str, index, str.length(), sep); 82 } 83 /** The length of an escaped unicode sequence. */ 84 private static final int UCHAR_LEN = 4; 85 86 /** 87 * Read the remainder of a string till a given separator, 88 * handles escaping through '\' syntax. 89 * @param strb the destination buffer to copy characters into 90 * @param str the origin 91 * @param begin the relative offset in str to begin reading 92 * @param end the relative offset in str to end reading 93 * @param sep the separator, single or double quote, marking end of string 94 * @return the last character offset handled in origin 95 */ 96 private static int read(StringBuilder strb, CharSequence str, int begin, int end, char sep) { 97 boolean escape = false; 98 int index = begin; 99 for (; index < end; ++index) { 100 char c = str.charAt(index); 101 if (escape) { 102 if (c == 'u' && (index + UCHAR_LEN) < end && readUnicodeChar(strb, str, index + 1) > 0) { 103 index += UCHAR_LEN; 104 } else { 105 // if c is not an escapable character, re-emmit the backslash before it 106 boolean notSeparator = sep == 0 ? c != '\'' && c != '"' : c != sep; 107 if (notSeparator && c != '\\') { 108 strb.append('\\'); 109 } 110 strb.append(c); 111 } 112 escape = false; 113 continue; 114 } 115 if (c == '\\') { 116 escape = true; 117 continue; 118 } 119 strb.append(c); 120 if (c == sep) { 121 break; 122 } 123 } 124 return index; 125 } 126 /** Initial shift value for composing a Unicode char from 4 nibbles (16 - 4). */ 127 private static final int SHIFT = 12; 128 /** The base 10 offset used to convert hexa characters to decimal. */ 129 private static final int BASE10 = 10; 130 131 /** 132 * Reads a Unicode escape character. 133 * @param strb the builder to write the character to 134 * @param str the sequence 135 * @param begin the begin offset in sequence (after the '\\u') 136 * @return 0 if char could not be read, 4 otherwise 137 */ 138 private static int readUnicodeChar(StringBuilder strb, CharSequence str, int begin) { 139 char xc = 0; 140 int bits = SHIFT; 141 int value = 0; 142 for (int offset = 0; offset < UCHAR_LEN; ++offset) { 143 char c = str.charAt(begin + offset); 144 if (c >= '0' && c <= '9') { 145 value = (c - '0'); 146 } else if (c >= 'a' && c <= 'h') { 147 value = (c - 'a' + BASE10); 148 } else if (c >= 'A' && c <= 'H') { 149 value = (c - 'A' + BASE10); 150 } else { 151 return 0; 152 } 153 xc |= value << bits; 154 bits -= UCHAR_LEN; 155 } 156 strb.append(xc); 157 return UCHAR_LEN; 158 } 159 /** The last 7bits ascii character. */ 160 private static final char LAST_ASCII = 127; 161 /** The first printable 7bits ascii character. */ 162 private static final char FIRST_ASCII = 32; 163 164 /** 165 * Escapes a String representation, expand non-ASCII characters as Unicode escape sequence. 166 * @param str the string to escape 167 * @return the escaped representation 168 */ 169 public static String escapeString(String str, char delim) { 170 if (str == null) { 171 return null; 172 } 173 final int length = str.length(); 174 StringBuilder strb = new StringBuilder(length + 2); 175 strb.append(delim); 176 for (int i = 0; i < length; ++i) { 177 char c = str.charAt(i); 178 switch (c) { 179 case 0: 180 continue; 181 case '\b': 182 strb.append("\\b"); 183 break; 184 case '\t': 185 strb.append("\\t"); 186 break; 187 case '\n': 188 strb.append("\\n"); 189 break; 190 case '\f': 191 strb.append("\\f"); 192 break; 193 case '\r': 194 strb.append("\\r"); 195 break; 196 case '\"': 197 strb.append("\\\""); 198 break; 199 case '\'': 200 strb.append("\\\'"); 201 break; 202 case '\\': 203 strb.append("\\\\"); 204 break; 205 default: 206 if (c >= FIRST_ASCII && c <= LAST_ASCII) { 207 strb.append(c); 208 } else { 209 // convert to Unicode escape sequence 210 strb.append('\\'); 211 strb.append('u'); 212 String hex = Integer.toHexString(c); 213 for (int h = hex.length(); h < UCHAR_LEN; ++h) { 214 strb.append('0'); 215 } 216 strb.append(hex); 217 } 218 } 219 } 220 strb.append(delim); 221 return strb.toString(); 222 } 223 }