View Javadoc
1   /*******************************************************************************
2    * Copyright 2012 Internet2
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   ******************************************************************************/
16  /*
17   * @author mchyzer
18   * $Id: XmlIndenter.java,v 1.1 2008-11-30 10:57:27 mchyzer Exp $
19   */
20  package edu.internet2.middleware.grouperInstaller.util;
21  
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  
26  /**
27   * indent xml, assumes the input is not yet indented.  Also, this is only for
28   * testing or logging or documentation purposes, not production
29   */
30  public class XmlIndenter {
31    
32    /** chars to process */
33    private String xml;
34    
35    /** current start tag */
36    private int startTagIndex;
37    
38    /** current end tag */
39    private int endTagIndex;
40    
41    /** current number of indents (times to is the indent */
42    private int currentNumberOfIndents;
43    
44    /** current tag we are on */
45    private String currentTagName;
46    
47    /** result */
48    private StringBuilder result;
49    
50    /**
51     * get the result
52     * @return the result
53     */
54    public String result() {
55      try {
56        this.indent();
57      } catch (RuntimeException re) {
58        throw new RuntimeException("Problem here: " + this, re);
59      }
60      if (this.xml == null) {
61        return null;
62      }
63      return GrouperInstallerUtils.trim(this.result.toString());
64    }
65  
66    /**
67     * indent the string
68     */
69    private void indent() {
70      if (this.xml == null) {
71        return;
72      }
73      this.result = new StringBuilder();
74      this.startTagIndex = -1;
75      this.endTagIndex = -1;
76      this.currentTagName = null;
77      this.currentNumberOfIndents = 0;
78      //<a><b whatever=\"whatever\"><c>hey</c><d><e>there</e><f /><g / ><h></h></d></b></a>
79      //<a>
80      //  <b whatever="whatever">
81      //    <c>hey</c>
82      //    <d>
83      //      <e>there</e>
84      //      <f />
85      //      <g / >
86      //      <h></h>
87      //    </d>
88      //  </b>
89      //</a>
90      while(true) {
91        this.startTagIndex = findStartTagIndex();
92        if (this.startTagIndex == -1) {
93          //cant find anything else...  make sure everything there
94          if (this.endTagIndex != this.xml.length()-1) {
95            this.result.append(this.xml, this.endTagIndex+1, this.xml.length());
96          }
97          break;
98        }
99        this.endTagIndex = findEndTagIndex();
100       
101       //if XML or doctype, then just print with newline and continue
102       if (ignoreTag(this.xml, this.startTagIndex, this.endTagIndex)) {
103         
104         //just return and indent
105         //lets put this tag on the queue
106         this.printNewlineIndent(this.startTagIndex, this.endTagIndex+1);
107         continue;
108       }
109       
110       this.currentTagName = findTagName();
111       
112       //if self closed, then carry on
113       if (selfClosedTag(this.xml, this.endTagIndex)) {
114         //just return and indent
115         //lets put this tag on the queue
116         this.printNewlineIndent(this.startTagIndex, this.endTagIndex+1);
117       } else if (closeTag(this.xml, this.startTagIndex)) {
118         //if end tag, then return and unindent
119         this.unindent();
120         this.currentNumberOfIndents--;
121         //lets put this tag on the queue
122         this.printNewlineIndent(this.startTagIndex, this.endTagIndex+1);
123         
124       } else {
125         int nextTagStartIndex = findNextStartTagIndex(this.xml, this.endTagIndex+1);
126         int nextTagEndIndex = findNextEndTagIndex(this.xml, nextTagStartIndex+1);
127         
128         String nextTagName = tagName(this.xml, nextTagStartIndex, nextTagEndIndex);
129         boolean isNextTagCloseTag = closeTag(this.xml, nextTagStartIndex);
130         if (!textTag(this.xml, this.endTagIndex, this.currentTagName, nextTagName, isNextTagCloseTag)) {
131           this.currentNumberOfIndents++;
132           this.printNewlineIndent(this.startTagIndex, this.endTagIndex+1);
133         } else {
134           //else this is a text tag, print from here to end of next tag, newline and indent
135           this.printNewlineIndent(this.startTagIndex, nextTagEndIndex+1);
136           //increment past the next one
137           this.startTagIndex = nextTagEndIndex;
138           this.endTagIndex = nextTagEndIndex;
139         }
140       }
141     }
142   }
143   
144   /**
145    * see if we can ignore the tag, e.g. xml header or doctype
146    * @param theXml
147    * @param theStartTagIndex
148    * @param theEndTagIndex
149    * @return true if ignore
150    */
151   static boolean ignoreTag(String theXml, int theStartTagIndex, int theEndTagIndex) {
152     char firstChar = theXml.charAt(theStartTagIndex+1);
153     if (firstChar == '?' || firstChar == '!') {
154       return true;
155     }
156     return false;
157   }
158   
159   /**
160    * put a newline and indent
161    * @param start
162    * @param end
163    */
164   private void printNewlineIndent(int start, int end) {
165     //lets put this tag on the queue
166     this.result.append(this.xml, start, end);
167     this.newlineIndent();
168     
169   }
170 
171   /**
172    * put a newline and indent
173    */
174   private void newlineIndent() {
175     this.result.append("\n").append(GrouperInstallerUtils.repeat("  ", this.currentNumberOfIndents));
176   }
177   
178   /**
179    * unindent a previous indent if it is there
180    */
181   private void unindent() {
182     for (int i=0;i<2;i++) {
183       if (this.result.charAt(this.result.length()-1) == ' ') {
184         this.result.deleteCharAt(this.result.length()-1);
185       }
186     }
187   }
188   
189   /**
190    * find the current tag name
191    * should support: &lt; a /&gt;
192    * or &lt; / b&gt;
193    * @param xml
194    * @param startTagIndex
195    * @param endTagIndex (or -1 if none found)
196    * @return the current tag name
197    */
198   static String tagName(String xml, int startTagIndex, int endTagIndex) {
199     endTagIndex = endTagIndex > startTagIndex ? endTagIndex : (xml.length()-1);
200     String tag = xml.substring(startTagIndex, endTagIndex+1);
201     Pattern tagPattern = Pattern.compile("^<[\\s/]*([a-zA-Z_\\-0-9:\\.]+).*$", Pattern.DOTALL);
202     Matcher matcher = tagPattern.matcher(tag);
203     if (!matcher.matches()) {
204       throw new RuntimeException("Cant match tag: '" + tag + "'");
205     }
206     //assume this matches...
207     String tagName = matcher.group(1);
208     return tagName;
209   }
210   
211   /**
212    * after the last end tag, find the next start tag
213    * @return the next start tag
214    */
215   private int findStartTagIndex() {
216     return findNextStartTagIndex(this.xml, this.endTagIndex+1);
217   }
218 
219   /**
220    * after the last end tag, find the next start tag
221    * @return the next start tag
222    */
223   private String findTagName() {
224     return tagName(this.xml, this.startTagIndex, this.endTagIndex);
225   }
226 
227   /**
228    * after the last start tag, find the next end start tag
229    * @return the next start tag
230    */
231   private int findEndTagIndex() {
232     return findNextEndTagIndex(this.xml, this.startTagIndex+1);
233   }
234 
235   /**
236    * find the start tag from xml and a start from index
237    * @param xml
238    * @param startFrom
239    * @return the start tag index of -1 if not found another
240    */
241   static int findNextStartTagIndex(String xml, int startFrom) {
242     int length = xml.length();
243     for (int i= startFrom; i<length;i++) {
244       if (xml.charAt(i) == '<') {
245         return i;
246       }
247     }
248     return -1;
249   }
250   
251   /**
252    * find the end tag from xml and a start from index
253    * @param xml
254    * @param startFrom
255    * @return the start tag index of -1 if not found another
256    */
257   static int findNextEndTagIndex(String xml, int startFrom) {
258     int length = xml.length();
259     for (int i= startFrom; i<length;i++) {
260       if (xml.charAt(i) == '>') {
261         return i;
262       }
263     }
264     return -1;
265   }
266   
267   /**
268    * find if the tag is closed on 
269    * @param xml
270    * @param endTagIndex
271    * @return true if self closed
272    */
273   static boolean selfClosedTag(String xml, int endTagIndex) {
274     for (int i=endTagIndex-1;i>=0;i--) {
275       char curChar = xml.charAt(i);
276       //ignore whitespace
277       if (Character.isWhitespace(curChar)) {
278         continue;
279       }
280       if (curChar == '/') {
281         return true;
282       }
283       return false;
284     }
285     //shouldnt really get here...
286     return false;
287   }
288 
289   /**
290    * find if the tag is a close tag (e.g. &lt;/a&gt;)
291    * @param xml
292    * @param startTagIndex
293    * @return true if self closed
294    */
295   static boolean closeTag(String xml, int startTagIndex) {
296     for (int i=startTagIndex+1;i<xml.length();i++) {
297       char curChar = xml.charAt(i);
298       //ignore whitespace
299       if (Character.isWhitespace(curChar)) {
300         continue;
301       }
302       if (curChar == '/') {
303         return true;
304       }
305       return false;
306     }
307     //shouldnt really get here...
308     return false;
309   }
310 
311   /**
312    * find if the tag contains text (note, dont call this if know it is self closed,
313    * though in that case it shouldnt be text anyways)
314    * @param xml
315    * @param endTagIndex
316    * @param tagName 
317    * @param nextTagName 
318    * @param isNextCloseTag 
319    * @return true if contains text (as opposed to other tags)
320    */
321   static boolean textTag(String xml, int endTagIndex, String tagName, 
322       String nextTagName, boolean isNextCloseTag) {
323     if (GrouperInstallerUtils.equals(tagName, nextTagName) && isNextCloseTag) {
324       return true;
325     }
326     return false;
327   }
328 
329   /**
330    * @param theXml is the xml to format
331    * indenter
332    */
333   public XmlIndenter(String theXml) {
334     if (theXml != null) {
335       this.xml = GrouperInstallerUtils.trimToEmpty(theXml);
336     }
337   }
338   
339 }