001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.oozie.util; 019 020 import java.io.ByteArrayInputStream; 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.io.StringReader; 024 import java.io.StringWriter; 025 import java.text.CharacterIterator; 026 import java.text.StringCharacterIterator; 027 import java.util.Enumeration; 028 import java.util.Iterator; 029 import java.util.List; 030 import java.util.Map; 031 import java.util.Properties; 032 033 import javax.xml.XMLConstants; 034 import javax.xml.parsers.DocumentBuilderFactory; 035 import javax.xml.transform.Result; 036 import javax.xml.transform.Source; 037 import javax.xml.transform.Transformer; 038 import javax.xml.transform.TransformerFactory; 039 import javax.xml.transform.dom.DOMSource; 040 import javax.xml.transform.stream.StreamResult; 041 import javax.xml.transform.stream.StreamSource; 042 import javax.xml.validation.Schema; 043 import javax.xml.validation.SchemaFactory; 044 import javax.xml.validation.Validator; 045 046 import org.apache.hadoop.conf.Configuration; 047 import org.apache.oozie.service.SchemaService; 048 import org.apache.oozie.service.Services; 049 import org.apache.oozie.service.SchemaService.SchemaName; 050 import org.jdom.Comment; 051 import org.jdom.Document; 052 import org.jdom.Element; 053 import org.jdom.JDOMException; 054 import org.jdom.input.SAXBuilder; 055 import org.jdom.output.Format; 056 import org.jdom.output.XMLOutputter; 057 import org.xml.sax.EntityResolver; 058 import org.xml.sax.InputSource; 059 import org.xml.sax.SAXException; 060 061 /** 062 * XML utility methods. 063 */ 064 public class XmlUtils { 065 public static final String SLA_NAME_SPACE_URI = "uri:oozie:sla:0.1"; 066 067 private static class NoExternalEntityEntityResolver implements EntityResolver { 068 069 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { 070 return new InputSource(new ByteArrayInputStream(new byte[0])); 071 } 072 073 } 074 075 private static SAXBuilder createSAXBuilder() { 076 SAXBuilder saxBuilder = new SAXBuilder(); 077 078 //THIS IS NOT WORKING 079 //saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false); 080 081 //INSTEAD WE ARE JUST SETTING AN EntityResolver that does not resolve entities 082 saxBuilder.setEntityResolver(new NoExternalEntityEntityResolver()); 083 return saxBuilder; 084 } 085 086 /** 087 * Remove comments from any Xml String. 088 * 089 * @param xmlStr XML string to remove comments. 090 * @return String after removing comments. 091 * @throws JDOMException thrown if an error happend while XML parsing. 092 */ 093 public static String removeComments(String xmlStr) throws JDOMException { 094 if (xmlStr == null) { 095 return null; 096 } 097 try { 098 SAXBuilder saxBuilder = createSAXBuilder(); 099 Document document = saxBuilder.build(new StringReader(xmlStr)); 100 removeComments(document); 101 return prettyPrint(document.getRootElement()).toString(); 102 } 103 catch (IOException ex) { 104 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex); 105 } 106 } 107 108 private static void removeComments(List l) { 109 for (Iterator i = l.iterator(); i.hasNext();) { 110 Object node = i.next(); 111 if (node instanceof Comment) { 112 i.remove(); 113 } 114 else { 115 if (node instanceof Element) { 116 removeComments(((Element) node).getContent()); 117 } 118 } 119 } 120 } 121 122 private static void removeComments(Document doc) { 123 removeComments(doc.getContent()); 124 } 125 126 /** 127 * Parse a string assuming it is a valid XML document and return an JDOM Element for it. 128 * 129 * @param xmlStr XML string to parse. 130 * @return JDOM element for the parsed XML string. 131 * @throws JDOMException thrown if an error happend while XML parsing. 132 */ 133 public static Element parseXml(String xmlStr) throws JDOMException { 134 ParamChecker.notNull(xmlStr, "xmlStr"); 135 try { 136 SAXBuilder saxBuilder = createSAXBuilder(); 137 Document document = saxBuilder.build(new StringReader(xmlStr)); 138 return document.getRootElement(); 139 } 140 catch (IOException ex) { 141 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex); 142 } 143 } 144 145 /** 146 * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it. 147 * 148 * @param is inputstream to parse. 149 * @return JDOM element for the parsed XML string. 150 * @throws JDOMException thrown if an error happend while XML parsing. 151 * @throws IOException thrown if an IO error occurred. 152 */ 153 public static Element parseXml(InputStream is) throws JDOMException, IOException { 154 ParamChecker.notNull(is, "is"); 155 SAXBuilder saxBuilder = createSAXBuilder(); 156 Document document = saxBuilder.build(is); 157 return document.getRootElement(); 158 } 159 160 /** 161 * //TODO move this to action registry method Return the value of an attribute from the root element of an XML 162 * document. 163 * 164 * @param filePath path of the XML document. 165 * @param attributeName attribute to retrieve value for. 166 * @return value of the specified attribute. 167 */ 168 public static String getRootAttribute(String filePath, String attributeName) { 169 ParamChecker.notNull(filePath, "filePath"); 170 ParamChecker.notNull(attributeName, "attributeName"); 171 SAXBuilder saxBuilder = createSAXBuilder(); 172 try { 173 Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath)); 174 return doc.getRootElement().getAttributeValue(attributeName); 175 } 176 catch (JDOMException e) { 177 throw new RuntimeException(); 178 } 179 catch (IOException e) { 180 throw new RuntimeException(); 181 } 182 } 183 184 /** 185 * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the 186 * {@link #toString} method is invoked. 187 */ 188 public static class PrettyPrint { 189 private String str; 190 private Element element; 191 192 private PrettyPrint(String str) { 193 this.str = str; 194 } 195 196 private PrettyPrint(Element element) { 197 this.element = ParamChecker.notNull(element, "element"); 198 } 199 200 /** 201 * Return the pretty print representation of an XML document. 202 * 203 * @return the pretty print representation of an XML document. 204 */ 205 @Override 206 public String toString() { 207 if (str != null) { 208 return str; 209 } 210 else { 211 XMLOutputter outputter = new XMLOutputter(); 212 StringWriter stringWriter = new StringWriter(); 213 outputter.setFormat(Format.getPrettyFormat()); 214 try { 215 outputter.output(element, stringWriter); 216 } 217 catch (Exception ex) { 218 throw new RuntimeException(ex); 219 } 220 return stringWriter.toString(); 221 } 222 } 223 } 224 225 /** 226 * Return a pretty print string for a JDOM Element. 227 * 228 * @param element JDOM element. 229 * @return pretty print of the given JDOM Element. 230 */ 231 public static PrettyPrint prettyPrint(Element element) { 232 return new PrettyPrint(element); 233 234 } 235 236 /** 237 * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original 238 * string. 239 * 240 * @param xmlStr XML string. 241 * @return prettyprint of the given XML string or the original string if the given string is not valid XML. 242 */ 243 public static PrettyPrint prettyPrint(String xmlStr) { 244 try { 245 return new PrettyPrint(parseXml(xmlStr)); 246 } 247 catch (Exception e) { 248 return new PrettyPrint(xmlStr); 249 } 250 } 251 252 /** 253 * Return a pretty print string for a Configuration object. 254 * 255 * @param conf Configuration object. 256 * @return prettyprint of the given Configuration object. 257 */ 258 public static PrettyPrint prettyPrint(Configuration conf) { 259 Element root = new Element("configuration"); 260 for (Map.Entry<String, String> entry : conf) { 261 Element property = new Element("property"); 262 Element name = new Element("name"); 263 name.setText(entry.getKey()); 264 Element value = new Element("value"); 265 value.setText(entry.getValue()); 266 property.addContent(name); 267 property.addContent(value); 268 root.addContent(property); 269 } 270 return new PrettyPrint(root); 271 } 272 273 /** 274 * Schema validation for a given xml. <p/> 275 * 276 * @param schema for validation 277 * @param xml to be validated 278 */ 279 public static void validateXml(Schema schema, String xml) throws SAXException, IOException { 280 281 Validator validator = schema.newValidator(); 282 validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes()))); 283 } 284 285 /** 286 * Create schema object for the given xsd 287 * 288 * @param is inputstream to schema. 289 * @return the schema object. 290 */ 291 public static Schema createSchema(InputStream is) { 292 SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); 293 StreamSource src = new StreamSource(is); 294 try { 295 return factory.newSchema(src); 296 } 297 catch (SAXException e) { 298 throw new RuntimeException(e.getMessage(), e); 299 } 300 } 301 302 public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException { 303 if (xmlData == null || xmlData.length() == 0) { 304 return; 305 } 306 javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile); 307 validateXml(schema, xmlData); 308 } 309 310 /** 311 * Convert Properties to string 312 * 313 * @param props 314 * @return xml string 315 * @throws IOException 316 */ 317 public static String writePropToString(Properties props) throws IOException { 318 try { 319 org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); 320 org.w3c.dom.Element conf = doc.createElement("configuration"); 321 doc.appendChild(conf); 322 conf.appendChild(doc.createTextNode("\n")); 323 for (Enumeration e = props.keys(); e.hasMoreElements();) { 324 String name = (String) e.nextElement(); 325 Object object = props.get(name); 326 String value; 327 if (object instanceof String) { 328 value = (String) object; 329 } 330 else { 331 continue; 332 } 333 org.w3c.dom.Element propNode = doc.createElement("property"); 334 conf.appendChild(propNode); 335 336 org.w3c.dom.Element nameNode = doc.createElement("name"); 337 nameNode.appendChild(doc.createTextNode(name.trim())); 338 propNode.appendChild(nameNode); 339 340 org.w3c.dom.Element valueNode = doc.createElement("value"); 341 valueNode.appendChild(doc.createTextNode(value.trim())); 342 propNode.appendChild(valueNode); 343 344 conf.appendChild(doc.createTextNode("\n")); 345 } 346 347 Source source = new DOMSource(doc); 348 StringWriter stringWriter = new StringWriter(); 349 Result result = new StreamResult(stringWriter); 350 TransformerFactory factory = TransformerFactory.newInstance(); 351 Transformer transformer = factory.newTransformer(); 352 transformer.transform(source, result); 353 354 return stringWriter.getBuffer().toString(); 355 } 356 catch (Exception e) { 357 throw new IOException(e); 358 } 359 } 360 361 /** 362 * Escape characters for text appearing as XML data, between tags. 363 * <P/> 364 * The following characters are replaced with corresponding character entities : 365 * '<' to '<'; 366 * '>' to '>'; 367 * '&' to '&' 368 * '"' to '"' 369 * "'" to "'" 370 * <P/> 371 * Note that JSTL's {@code <c:out>} escapes the exact same set of characters as this method. 372 */ 373 public static String escapeCharsForXML(String aText) { 374 final StringBuilder result = new StringBuilder(); 375 final StringCharacterIterator iterator = new StringCharacterIterator(aText); 376 char character = iterator.current(); 377 while (character != CharacterIterator.DONE) { 378 if (character == '<') { 379 result.append("<"); 380 } 381 else if (character == '>') { 382 result.append(">"); 383 } 384 else if (character == '\"') { 385 result.append("""); 386 } 387 else if (character == '\'') { 388 result.append("'"); 389 } 390 else if (character == '&') { 391 result.append("&"); 392 } 393 else { 394 // the char is not a special one 395 // add it to the result as is 396 result.append(character); 397 } 398 character = iterator.next(); 399 } 400 return result.toString(); 401 } 402 403 }