1 /** 2 * Copyright 2014 Internet2 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /* 17 * Licensed to the Apache Software Foundation (ASF) under one or more 18 * contributor license agreements. See the NOTICE file distributed with 19 * this work for additional information regarding copyright ownership. 20 * The ASF licenses this file to You under the Apache License, Version 2.0 21 * (the "License"); you may not use this file except in compliance with 22 * the License. You may obtain a copy of the License at 23 * 24 * http://www.apache.org/licenses/LICENSE-2.0 25 * 26 * Unless required by applicable law or agreed to in writing, software 27 * distributed under the License is distributed on an "AS IS" BASIS, 28 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 29 * See the License for the specific language governing permissions and 30 * limitations under the License. 31 */ 32 package edu.internet2.middleware.grouperClientExt.org.apache.commons.lang3; 33 34 import java.io.IOException; 35 import java.io.Writer; 36 37 import edu.internet2.middleware.grouperClientExt.org.apache.commons.lang3.text.translate.AggregateTranslator; 38 import edu.internet2.middleware.grouperClientExt.org.apache.commons.lang3.text.translate.CharSequenceTranslator; 39 import edu.internet2.middleware.grouperClientExt.org.apache.commons.lang3.text.translate.EntityArrays; 40 import edu.internet2.middleware.grouperClientExt.org.apache.commons.lang3.text.translate.LookupTranslator; 41 import edu.internet2.middleware.grouperClientExt.org.apache.commons.lang3.text.translate.NumericEntityUnescaper; 42 import edu.internet2.middleware.grouperClientExt.org.apache.commons.lang3.text.translate.OctalUnescaper; 43 import edu.internet2.middleware.grouperClientExt.org.apache.commons.lang3.text.translate.UnicodeEscaper; 44 import edu.internet2.middleware.grouperClientExt.org.apache.commons.lang3.text.translate.UnicodeUnescaper; 45 46 /** 47 * <p>Escapes and unescapes {@code String}s for 48 * Java, Java Script, HTML and XML.</p> 49 * 50 * <p>#ThreadSafe#</p> 51 * @since 2.0 52 * @version $Id: StringEscapeUtils.java 1148520 2011-07-19 20:53:23Z ggregory $ 53 */ 54 public class StringEscapeUtils { 55 56 /* ESCAPE TRANSLATORS */ 57 58 /** 59 * Translator object for escaping Java. 60 * 61 * While {@link #escapeJava(String)} is the expected method of use, this 62 * object allows the Java escaping functionality to be used 63 * as the foundation for a custom translator. 64 * 65 * @since 3.0 66 */ 67 public static final CharSequenceTranslator ESCAPE_JAVA = 68 new LookupTranslator( 69 new String[][] { 70 {"\"", "\\\""}, 71 {"\\", "\\\\"}, 72 }).with( 73 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) 74 ).with( 75 UnicodeEscaper.outsideOf(32, 0x7f) 76 ); 77 78 /** 79 * Translator object for escaping EcmaScript/JavaScript. 80 * 81 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 82 * object allows the EcmaScript escaping functionality to be used 83 * as the foundation for a custom translator. 84 * 85 * @since 3.0 86 */ 87 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 88 new AggregateTranslator( 89 new LookupTranslator( 90 new String[][] { 91 {"'", "\\'"}, 92 {"\"", "\\\""}, 93 {"\\", "\\\\"}, 94 {"/", "\\/"} 95 }), 96 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 97 UnicodeEscaper.outsideOf(32, 0x7f) 98 ); 99 100 /** 101 * Translator object for escaping XML. 102 * 103 * While {@link #escapeXml(String)} is the expected method of use, this 104 * object allows the XML escaping functionality to be used 105 * as the foundation for a custom translator. 106 * 107 * @since 3.0 108 */ 109 public static final CharSequenceTranslator ESCAPE_XML = 110 new AggregateTranslator( 111 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 112 new LookupTranslator(EntityArrays.APOS_ESCAPE()) 113 ); 114 115 /** 116 * Translator object for escaping HTML version 3.0. 117 * 118 * While {@link #escapeHtml3(String)} is the expected method of use, this 119 * object allows the HTML escaping functionality to be used 120 * as the foundation for a custom translator. 121 * 122 * @since 3.0 123 */ 124 public static final CharSequenceTranslator ESCAPE_HTML3 = 125 new AggregateTranslator( 126 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 127 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) 128 ); 129 130 /** 131 * Translator object for escaping HTML version 4.0. 132 * 133 * While {@link #escapeHtml4(String)} is the expected method of use, this 134 * object allows the HTML escaping functionality to be used 135 * as the foundation for a custom translator. 136 * 137 * @since 3.0 138 */ 139 public static final CharSequenceTranslator ESCAPE_HTML4 = 140 new AggregateTranslator( 141 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 142 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), 143 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) 144 ); 145 146 /** 147 * Translator object for escaping individual Comma Separated Values. 148 * 149 * While {@link #escapeCsv(String)} is the expected method of use, this 150 * object allows the CSV escaping functionality to be used 151 * as the foundation for a custom translator. 152 * 153 * @since 3.0 154 */ 155 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); 156 157 // TODO: Create a parent class - 'SinglePassTranslator' ? 158 // It would handle the index checking + length returning, 159 // and could also have an optimization check method. 160 static class CsvEscaper extends CharSequenceTranslator { 161 162 private static final char CSV_DELIMITER = ','; 163 private static final char CSV_QUOTE = '"'; 164 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 165 private static final char[] CSV_SEARCH_CHARS = 166 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 167 168 @Override 169 public int translate(CharSequence input, int index, Writer out) throws IOException { 170 171 if(index != 0) { 172 throw new IllegalStateException("CsvEscaper should never reach the [1] index"); 173 } 174 175 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { 176 out.write(input.toString()); 177 } else { 178 out.write(CSV_QUOTE); 179 out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); 180 out.write(CSV_QUOTE); 181 } 182 return input.length(); 183 } 184 } 185 186 /* UNESCAPE TRANSLATORS */ 187 188 /** 189 * Translator object for unescaping escaped Java. 190 * 191 * While {@link #unescapeJava(String)} is the expected method of use, this 192 * object allows the Java unescaping functionality to be used 193 * as the foundation for a custom translator. 194 * 195 * @since 3.0 196 */ 197 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? 198 public static final CharSequenceTranslator UNESCAPE_JAVA = 199 new AggregateTranslator( 200 new OctalUnescaper(), // .between('\1', '\377'), 201 new UnicodeUnescaper(), 202 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), 203 new LookupTranslator( 204 new String[][] { 205 {"\\\\", "\\"}, 206 {"\\\"", "\""}, 207 {"\\'", "'"}, 208 {"\\", ""} 209 }) 210 ); 211 212 /** 213 * Translator object for unescaping escaped EcmaScript. 214 * 215 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 216 * object allows the EcmaScript unescaping functionality to be used 217 * as the foundation for a custom translator. 218 * 219 * @since 3.0 220 */ 221 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 222 223 /** 224 * Translator object for unescaping escaped HTML 3.0. 225 * 226 * While {@link #unescapeHtml3(String)} is the expected method of use, this 227 * object allows the HTML unescaping functionality to be used 228 * as the foundation for a custom translator. 229 * 230 * @since 3.0 231 */ 232 public static final CharSequenceTranslator UNESCAPE_HTML3 = 233 new AggregateTranslator( 234 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 235 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 236 new NumericEntityUnescaper() 237 ); 238 239 /** 240 * Translator object for unescaping escaped HTML 4.0. 241 * 242 * While {@link #unescapeHtml4(String)} is the expected method of use, this 243 * object allows the HTML unescaping functionality to be used 244 * as the foundation for a custom translator. 245 * 246 * @since 3.0 247 */ 248 public static final CharSequenceTranslator UNESCAPE_HTML4 = 249 new AggregateTranslator( 250 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 251 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 252 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), 253 new NumericEntityUnescaper() 254 ); 255 256 /** 257 * Translator object for unescaping escaped XML. 258 * 259 * While {@link #unescapeXml(String)} is the expected method of use, this 260 * object allows the XML unescaping functionality to be used 261 * as the foundation for a custom translator. 262 * 263 * @since 3.0 264 */ 265 public static final CharSequenceTranslator UNESCAPE_XML = 266 new AggregateTranslator( 267 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 268 new LookupTranslator(EntityArrays.APOS_UNESCAPE()), 269 new NumericEntityUnescaper() 270 ); 271 272 /** 273 * Translator object for unescaping escaped Comma Separated Value entries. 274 * 275 * While {@link #unescapeCsv(String)} is the expected method of use, this 276 * object allows the CSV unescaping functionality to be used 277 * as the foundation for a custom translator. 278 * 279 * @since 3.0 280 */ 281 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); 282 283 static class CsvUnescaper extends CharSequenceTranslator { 284 285 private static final char CSV_DELIMITER = ','; 286 private static final char CSV_QUOTE = '"'; 287 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 288 private static final char[] CSV_SEARCH_CHARS = 289 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 290 291 @Override 292 public int translate(CharSequence input, int index, Writer out) throws IOException { 293 294 if(index != 0) { 295 throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); 296 } 297 298 if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { 299 out.write(input.toString()); 300 return input.length(); 301 } 302 303 // strip quotes 304 String quoteless = input.subSequence(1, input.length() - 1).toString(); 305 306 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { 307 // deal with escaped quotes; ie) "" 308 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); 309 } else { 310 out.write(input.toString()); 311 } 312 return input.length(); 313 } 314 } 315 316 /* Helper functions */ 317 318 /** 319 * <p>{@code StringEscapeUtils} instances should NOT be constructed in 320 * standard programming.</p> 321 * 322 * <p>Instead, the class should be used as: 323 * <pre>StringEscapeUtils.escapeJava("foo");</pre></p> 324 * 325 * <p>This constructor is public to permit tools that require a JavaBean 326 * instance to operate.</p> 327 */ 328 public StringEscapeUtils() { 329 super(); 330 } 331 332 // Java and JavaScript 333 //-------------------------------------------------------------------------- 334 /** 335 * <p>Escapes the characters in a {@code String} using Java String rules.</p> 336 * 337 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 338 * 339 * <p>So a tab becomes the characters {@code '\\'} and 340 * {@code 't'}.</p> 341 * 342 * <p>The only difference between Java strings and JavaScript strings 343 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 344 * 345 * <p>Example: 346 * <pre> 347 * input string: He didn't say, "Stop!" 348 * output string: He didn't say, \"Stop!\" 349 * </pre> 350 * </p> 351 * 352 * @param input String to escape values in, may be null 353 * @return String with escaped values, {@code null} if null string input 354 */ 355 public static final String escapeJava(String input) { 356 return ESCAPE_JAVA.translate(input); 357 } 358 359 /** 360 * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p> 361 * <p>Escapes any values it finds into their EcmaScript String form. 362 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 363 * 364 * <p>So a tab becomes the characters {@code '\\'} and 365 * {@code 't'}.</p> 366 * 367 * <p>The only difference between Java strings and EcmaScript strings 368 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 369 * 370 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p> 371 * 372 * <p>Example: 373 * <pre> 374 * input string: He didn't say, "Stop!" 375 * output string: He didn\'t say, \"Stop!\" 376 * </pre> 377 * </p> 378 * 379 * @param input String to escape values in, may be null 380 * @return String with escaped values, {@code null} if null string input 381 * 382 * @since 3.0 383 */ 384 public static final String escapeEcmaScript(String input) { 385 return ESCAPE_ECMASCRIPT.translate(input); 386 } 387 388 /** 389 * <p>Unescapes any Java literals found in the {@code String}. 390 * For example, it will turn a sequence of {@code '\'} and 391 * {@code 'n'} into a newline character, unless the {@code '\'} 392 * is preceded by another {@code '\'}.</p> 393 * 394 * @param input the {@code String} to unescape, may be null 395 * @return a new unescaped {@code String}, {@code null} if null string input 396 */ 397 public static final String unescapeJava(String input) { 398 return UNESCAPE_JAVA.translate(input); 399 } 400 401 /** 402 * <p>Unescapes any EcmaScript literals found in the {@code String}.</p> 403 * 404 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 405 * into a newline character, unless the {@code '\'} is preceded by another 406 * {@code '\'}.</p> 407 * 408 * @see #unescapeJava(String) 409 * @param input the {@code String} to unescape, may be null 410 * @return A new unescaped {@code String}, {@code null} if null string input 411 * 412 * @since 3.0 413 */ 414 public static final String unescapeEcmaScript(String input) { 415 return UNESCAPE_ECMASCRIPT.translate(input); 416 } 417 418 // HTML and XML 419 //-------------------------------------------------------------------------- 420 /** 421 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 422 * 423 * <p> 424 * For example: 425 * </p> 426 * <p><code>"bread" & "butter"</code></p> 427 * becomes: 428 * <p> 429 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 430 * </p> 431 * 432 * <p>Supports all known HTML 4.0 entities, including funky accents. 433 * Note that the commonly used apostrophe escape character (&apos;) 434 * is not a legal entity and so is not supported). </p> 435 * 436 * @param input the {@code String} to escape, may be null 437 * @return a new escaped {@code String}, {@code null} if null string input 438 * 439 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 440 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 441 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 442 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 443 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 444 * 445 * @since 3.0 446 */ 447 public static final String escapeHtml4(String input) { 448 return ESCAPE_HTML4.translate(input); 449 } 450 451 /** 452 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 453 * <p>Supports only the HTML 3.0 entities. </p> 454 * 455 * @param input the {@code String} to escape, may be null 456 * @return a new escaped {@code String}, {@code null} if null string input 457 * 458 * @since 3.0 459 */ 460 public static final String escapeHtml3(String input) { 461 return ESCAPE_HTML3.translate(input); 462 } 463 464 //----------------------------------------------------------------------- 465 /** 466 * <p>Unescapes a string containing entity escapes to a string 467 * containing the actual Unicode characters corresponding to the 468 * escapes. Supports HTML 4.0 entities.</p> 469 * 470 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" 471 * will become "<Français>"</p> 472 * 473 * <p>If an entity is unrecognized, it is left alone, and inserted 474 * verbatim into the result string. e.g. "&gt;&zzzz;x" will 475 * become ">&zzzz;x".</p> 476 * 477 * @param input the {@code String} to unescape, may be null 478 * @return a new unescaped {@code String}, {@code null} if null string input 479 * 480 * @since 3.0 481 */ 482 public static final String unescapeHtml4(String input) { 483 return UNESCAPE_HTML4.translate(input); 484 } 485 486 /** 487 * <p>Unescapes a string containing entity escapes to a string 488 * containing the actual Unicode characters corresponding to the 489 * escapes. Supports only HTML 3.0 entities.</p> 490 * 491 * @param input the {@code String} to unescape, may be null 492 * @return a new unescaped {@code String}, {@code null} if null string input 493 * 494 * @since 3.0 495 */ 496 public static final String unescapeHtml3(String input) { 497 return UNESCAPE_HTML3.translate(input); 498 } 499 500 //----------------------------------------------------------------------- 501 /** 502 * <p>Escapes the characters in a {@code String} using XML entities.</p> 503 * 504 * <p>For example: <tt>"bread" & "butter"</tt> => 505 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>. 506 * </p> 507 * 508 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 509 * Does not support DTDs or external entities.</p> 510 * 511 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 512 * escaped. If you still wish this functionality, you can achieve it 513 * via the following: 514 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p> 515 * 516 * @param input the {@code String} to escape, may be null 517 * @return a new escaped {@code String}, {@code null} if null string input 518 * @see #unescapeXml(java.lang.String) 519 */ 520 public static final String escapeXml(String input) { 521 return ESCAPE_XML.translate(input); 522 } 523 524 525 //----------------------------------------------------------------------- 526 /** 527 * <p>Unescapes a string containing XML entity escapes to a string 528 * containing the actual Unicode characters corresponding to the 529 * escapes.</p> 530 * 531 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 532 * Does not support DTDs or external entities.</p> 533 * 534 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 535 * Unicode characters. This may change in future releases. </p> 536 * 537 * @param input the {@code String} to unescape, may be null 538 * @return a new unescaped {@code String}, {@code null} if null string input 539 * @see #escapeXml(String) 540 */ 541 public static final String unescapeXml(String input) { 542 return UNESCAPE_XML.translate(input); 543 } 544 545 546 //----------------------------------------------------------------------- 547 548 /** 549 * <p>Returns a {@code String} value for a CSV column enclosed in double quotes, 550 * if required.</p> 551 * 552 * <p>If the value contains a comma, newline or double quote, then the 553 * String value is returned enclosed in double quotes.</p> 554 * </p> 555 * 556 * <p>Any double quote characters in the value are escaped with another double quote.</p> 557 * 558 * <p>If the value does not contain a comma, newline or double quote, then the 559 * String value is returned unchanged.</p> 560 * </p> 561 * 562 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 563 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 564 * 565 * @param input the input CSV column String, may be null 566 * @return the input String, enclosed in double quotes if the value contains a comma, 567 * newline or double quote, {@code null} if null string input 568 * @since 2.4 569 */ 570 public static final String escapeCsv(String input) { 571 return ESCAPE_CSV.translate(input); 572 } 573 574 /** 575 * <p>Returns a {@code String} value for an unescaped CSV column. </p> 576 * 577 * <p>If the value is enclosed in double quotes, and contains a comma, newline 578 * or double quote, then quotes are removed. 579 * </p> 580 * 581 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 582 * to just one double quote. </p> 583 * 584 * <p>If the value is not enclosed in double quotes, or is and does not contain a 585 * comma, newline or double quote, then the String value is returned unchanged.</p> 586 * </p> 587 * 588 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 589 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 590 * 591 * @param input the input CSV column String, may be null 592 * @return the input String, with enclosing double quotes removed and embedded double 593 * quotes unescaped, {@code null} if null string input 594 * @since 2.4 595 */ 596 public static final String unescapeCsv(String input) { 597 return UNESCAPE_CSV.translate(input); 598 } 599 600 }