001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.oozie.util; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.StringReader; 025import java.io.StringWriter; 026import java.text.CharacterIterator; 027import java.text.StringCharacterIterator; 028import java.util.Enumeration; 029import java.util.Iterator; 030import java.util.List; 031import java.util.Map; 032import java.util.Properties; 033 034import javax.xml.XMLConstants; 035import javax.xml.parsers.DocumentBuilder; 036import javax.xml.parsers.DocumentBuilderFactory; 037import javax.xml.parsers.ParserConfigurationException; 038import javax.xml.transform.Result; 039import javax.xml.transform.Source; 040import javax.xml.transform.Transformer; 041import javax.xml.transform.TransformerFactory; 042import javax.xml.transform.dom.DOMSource; 043import javax.xml.transform.stream.StreamResult; 044import javax.xml.transform.stream.StreamSource; 045import javax.xml.validation.Schema; 046import javax.xml.validation.SchemaFactory; 047import javax.xml.validation.Validator; 048 049import org.apache.hadoop.conf.Configuration; 050import org.apache.oozie.service.SchemaService; 051import org.apache.oozie.service.SchemaService.SchemaName; 052import org.apache.oozie.service.Services; 053import org.jdom.Comment; 054import org.jdom.Document; 055import org.jdom.Element; 056import org.jdom.JDOMException; 057import org.jdom.Namespace; 058import org.jdom.input.SAXBuilder; 059import org.jdom.output.Format; 060import org.jdom.output.XMLOutputter; 061import org.xml.sax.SAXException; 062 063/** 064 * XML utility methods. 065 */ 066public class XmlUtils { 067 068 private static SAXBuilder createSAXBuilder() { 069 SAXBuilder saxBuilder = new SAXBuilder(); 070 saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); 071 saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false); 072 saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 073 saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 074 return saxBuilder; 075 } 076 077 /** 078 * Remove comments from any Xml String. 079 * 080 * @param xmlStr XML string to remove comments. 081 * @return String after removing comments. 082 * @throws JDOMException thrown if an error happend while XML parsing. 083 */ 084 public static String removeComments(String xmlStr) throws JDOMException { 085 if (xmlStr == null) { 086 return null; 087 } 088 try { 089 SAXBuilder saxBuilder = createSAXBuilder(); 090 Document document = saxBuilder.build(new StringReader(xmlStr)); 091 removeComments(document); 092 return prettyPrint(document.getRootElement()).toString(); 093 } 094 catch (IOException ex) { 095 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex); 096 } 097 } 098 099 private static void removeComments(List l) { 100 for (Iterator i = l.iterator(); i.hasNext();) { 101 Object node = i.next(); 102 if (node instanceof Comment) { 103 i.remove(); 104 } 105 else { 106 if (node instanceof Element) { 107 removeComments(((Element) node).getContent()); 108 } 109 } 110 } 111 } 112 113 private static void removeComments(Document doc) { 114 removeComments(doc.getContent()); 115 } 116 117 /** 118 * Parse a string assuming it is a valid XML document and return an JDOM Element for it. 119 * 120 * @param xmlStr XML string to parse. 121 * @return JDOM element for the parsed XML string. 122 * @throws JDOMException thrown if an error happend while XML parsing. 123 */ 124 public static Element parseXml(String xmlStr) throws JDOMException { 125 ParamChecker.notNull(xmlStr, "xmlStr"); 126 try { 127 SAXBuilder saxBuilder = createSAXBuilder(); 128 Document document = saxBuilder.build(new StringReader(xmlStr)); 129 return document.getRootElement(); 130 } 131 catch (IOException ex) { 132 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex); 133 } 134 } 135 136 /** 137 * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it. 138 * 139 * @param is inputstream to parse. 140 * @return JDOM element for the parsed XML string. 141 * @throws JDOMException thrown if an error happend while XML parsing. 142 * @throws IOException thrown if an IO error occurred. 143 */ 144 public static Element parseXml(InputStream is) throws JDOMException, IOException { 145 ParamChecker.notNull(is, "is"); 146 SAXBuilder saxBuilder = createSAXBuilder(); 147 Document document = saxBuilder.build(is); 148 return document.getRootElement(); 149 } 150 151 /** 152 * //TODO move this to action registry method Return the value of an attribute from the root element of an XML 153 * document. 154 * 155 * @param filePath path of the XML document. 156 * @param attributeName attribute to retrieve value for. 157 * @return value of the specified attribute. 158 */ 159 public static String getRootAttribute(String filePath, String attributeName) { 160 ParamChecker.notNull(filePath, "filePath"); 161 ParamChecker.notNull(attributeName, "attributeName"); 162 SAXBuilder saxBuilder = createSAXBuilder(); 163 try { 164 Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath)); 165 return doc.getRootElement().getAttributeValue(attributeName); 166 } 167 catch (JDOMException e) { 168 throw new RuntimeException(); 169 } 170 catch (IOException e) { 171 throw new RuntimeException(); 172 } 173 } 174 175 /** 176 * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the 177 * {@link #toString} method is invoked. 178 */ 179 public static class PrettyPrint { 180 private String str; 181 private Element element; 182 183 private PrettyPrint(String str) { 184 this.str = str; 185 } 186 187 private PrettyPrint(Element element) { 188 this.element = ParamChecker.notNull(element, "element"); 189 } 190 191 /** 192 * Return the pretty print representation of an XML document. 193 * 194 * @return the pretty print representation of an XML document. 195 */ 196 @Override 197 public String toString() { 198 if (str != null) { 199 return str; 200 } 201 else { 202 XMLOutputter outputter = new XMLOutputter(); 203 StringWriter stringWriter = new StringWriter(); 204 outputter.setFormat(Format.getPrettyFormat()); 205 try { 206 outputter.output(element, stringWriter); 207 } 208 catch (Exception ex) { 209 throw new RuntimeException(ex); 210 } 211 return stringWriter.toString(); 212 } 213 } 214 } 215 216 /** 217 * Return a pretty print string for a JDOM Element. 218 * 219 * @param element JDOM element. 220 * @return pretty print of the given JDOM Element. 221 */ 222 public static PrettyPrint prettyPrint(Element element) { 223 return new PrettyPrint(element); 224 225 } 226 227 /** 228 * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original 229 * string. 230 * 231 * @param xmlStr XML string. 232 * @return prettyprint of the given XML string or the original string if the given string is not valid XML. 233 */ 234 public static PrettyPrint prettyPrint(String xmlStr) { 235 try { 236 return new PrettyPrint(parseXml(xmlStr)); 237 } 238 catch (Exception e) { 239 return new PrettyPrint(xmlStr); 240 } 241 } 242 243 /** 244 * Return a pretty print string for a Configuration object. 245 * 246 * @param conf Configuration object. 247 * @return prettyprint of the given Configuration object. 248 */ 249 public static PrettyPrint prettyPrint(Configuration conf) { 250 Element root = new Element("configuration"); 251 for (Map.Entry<String, String> entry : conf) { 252 Element property = new Element("property"); 253 Element name = new Element("name"); 254 name.setText(entry.getKey()); 255 Element value = new Element("value"); 256 value.setText(entry.getValue()); 257 property.addContent(name); 258 property.addContent(value); 259 root.addContent(property); 260 } 261 return new PrettyPrint(root); 262 } 263 264 /** 265 * Schema validation for a given xml. <p> 266 * 267 * @param schema for validation 268 * @param xml to be validated 269 */ 270 public static void validateXml(Schema schema, String xml) throws SAXException, IOException { 271 Validator validator = SchemaService.getValidator(schema); 272 validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes()))); 273 } 274 275 public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException { 276 if (xmlData == null || xmlData.length() == 0) { 277 return; 278 } 279 javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile); 280 validateXml(schema, xmlData); 281 } 282 283 /** 284 * Convert Properties to string 285 * 286 * @param props 287 * @return xml string 288 * @throws IOException 289 */ 290 public static String writePropToString(Properties props) throws IOException { 291 try { 292 org.w3c.dom.Document doc = getDocumentBuilder().newDocument(); 293 org.w3c.dom.Element conf = doc.createElement("configuration"); 294 doc.appendChild(conf); 295 conf.appendChild(doc.createTextNode("\n")); 296 for (Enumeration e = props.keys(); e.hasMoreElements();) { 297 String name = (String) e.nextElement(); 298 Object object = props.get(name); 299 String value; 300 if (object instanceof String) { 301 value = (String) object; 302 } 303 else { 304 continue; 305 } 306 org.w3c.dom.Element propNode = doc.createElement("property"); 307 conf.appendChild(propNode); 308 309 org.w3c.dom.Element nameNode = doc.createElement("name"); 310 nameNode.appendChild(doc.createTextNode(name.trim())); 311 propNode.appendChild(nameNode); 312 313 org.w3c.dom.Element valueNode = doc.createElement("value"); 314 valueNode.appendChild(doc.createTextNode(value.trim())); 315 propNode.appendChild(valueNode); 316 317 conf.appendChild(doc.createTextNode("\n")); 318 } 319 320 Source source = new DOMSource(doc); 321 StringWriter stringWriter = new StringWriter(); 322 Result result = new StreamResult(stringWriter); 323 TransformerFactory factory = TransformerFactory.newInstance(); 324 factory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true); 325 Transformer transformer = factory.newTransformer(); 326 transformer.transform(source, result); 327 328 return stringWriter.getBuffer().toString(); 329 } 330 catch (Exception e) { 331 throw new IOException(e); 332 } 333 } 334 335 /** 336 * Returns a DocumentBuilder 337 * @return DocumentBuilder 338 * @throws ParserConfigurationException 339 */ 340 private static DocumentBuilder getDocumentBuilder() throws ParserConfigurationException { 341 DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); 342 docBuilderFactory.setNamespaceAware(true); 343 docBuilderFactory.setXIncludeAware(false); 344 docBuilderFactory.setExpandEntityReferences(false); 345 docBuilderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); 346 //Redundant with disallow-doctype, but just in case 347 docBuilderFactory.setFeature("http://xml.org/sax/features/external-general-entities", false); 348 docBuilderFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 349 docBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 350 // ignore all comments inside the xml file 351 docBuilderFactory.setIgnoringComments(true); 352 return docBuilderFactory.newDocumentBuilder(); 353 } 354 355 /** 356 * Escape characters for text appearing as XML data, between tags. 357 * <p> 358 * The following characters are replaced with corresponding character entities : 359 * '<' to '<' 360 * '>' to '>' 361 * '&' to '&' 362 * '"' to '"' 363 * "'" to "'" 364 * <p> 365 * Note that JSTL's {@code <c:out>} escapes the exact same set of characters as this method. 366 */ 367 public static String escapeCharsForXML(String aText) { 368 final StringBuilder result = new StringBuilder(); 369 final StringCharacterIterator iterator = new StringCharacterIterator(aText); 370 char character = iterator.current(); 371 while (character != CharacterIterator.DONE) { 372 if (character == '<') { 373 result.append("<"); 374 } 375 else if (character == '>') { 376 result.append(">"); 377 } 378 else if (character == '\"') { 379 result.append("""); 380 } 381 else if (character == '\'') { 382 result.append("'"); 383 } 384 else if (character == '&') { 385 result.append("&"); 386 } 387 else { 388 // the char is not a special one 389 // add it to the result as is 390 result.append(character); 391 } 392 character = iterator.next(); 393 } 394 return result.toString(); 395 } 396 397 public static Element getSLAElement(Element elem) { 398 Element eSla_1 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI)); 399 Element eSla_2 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAMESPACE_URI_2)); 400 Element eSla = (eSla_2 != null) ? eSla_2 : eSla_1; 401 402 return eSla; 403 } 404 405}