001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.oozie.util; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.StringReader; 025import java.io.StringWriter; 026import java.text.CharacterIterator; 027import java.text.StringCharacterIterator; 028import java.util.Enumeration; 029import java.util.Iterator; 030import java.util.List; 031import java.util.Map; 032import java.util.Properties; 033 034import javax.xml.XMLConstants; 035import javax.xml.parsers.DocumentBuilderFactory; 036import javax.xml.transform.Result; 037import javax.xml.transform.Source; 038import javax.xml.transform.Transformer; 039import javax.xml.transform.TransformerFactory; 040import javax.xml.transform.dom.DOMSource; 041import javax.xml.transform.stream.StreamResult; 042import javax.xml.transform.stream.StreamSource; 043import javax.xml.validation.Schema; 044import javax.xml.validation.SchemaFactory; 045import javax.xml.validation.Validator; 046 047import org.apache.hadoop.conf.Configuration; 048import org.apache.oozie.service.SchemaService; 049import org.apache.oozie.service.SchemaService.SchemaName; 050import org.apache.oozie.service.Services; 051import org.jdom.Comment; 052import org.jdom.Document; 053import org.jdom.Element; 054import org.jdom.JDOMException; 055import org.jdom.Namespace; 056import org.jdom.input.SAXBuilder; 057import org.jdom.output.Format; 058import org.jdom.output.XMLOutputter; 059import org.xml.sax.EntityResolver; 060import org.xml.sax.InputSource; 061import org.xml.sax.SAXException; 062 063/** 064 * XML utility methods. 065 */ 066public class XmlUtils { 067 068 private static class NoExternalEntityEntityResolver implements EntityResolver { 069 070 @Override 071 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { 072 return new InputSource(new ByteArrayInputStream(new byte[0])); 073 } 074 075 } 076 077 private static SAXBuilder createSAXBuilder() { 078 SAXBuilder saxBuilder = new SAXBuilder(); 079 080 //THIS IS NOT WORKING 081 //saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false); 082 083 //INSTEAD WE ARE JUST SETTING AN EntityResolver that does not resolve entities 084 saxBuilder.setEntityResolver(new NoExternalEntityEntityResolver()); 085 return saxBuilder; 086 } 087 088 /** 089 * Remove comments from any Xml String. 090 * 091 * @param xmlStr XML string to remove comments. 092 * @return String after removing comments. 093 * @throws JDOMException thrown if an error happend while XML parsing. 094 */ 095 public static String removeComments(String xmlStr) throws JDOMException { 096 if (xmlStr == null) { 097 return null; 098 } 099 try { 100 SAXBuilder saxBuilder = createSAXBuilder(); 101 Document document = saxBuilder.build(new StringReader(xmlStr)); 102 removeComments(document); 103 return prettyPrint(document.getRootElement()).toString(); 104 } 105 catch (IOException ex) { 106 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex); 107 } 108 } 109 110 private static void removeComments(List l) { 111 for (Iterator i = l.iterator(); i.hasNext();) { 112 Object node = i.next(); 113 if (node instanceof Comment) { 114 i.remove(); 115 } 116 else { 117 if (node instanceof Element) { 118 removeComments(((Element) node).getContent()); 119 } 120 } 121 } 122 } 123 124 private static void removeComments(Document doc) { 125 removeComments(doc.getContent()); 126 } 127 128 /** 129 * Parse a string assuming it is a valid XML document and return an JDOM Element for it. 130 * 131 * @param xmlStr XML string to parse. 132 * @return JDOM element for the parsed XML string. 133 * @throws JDOMException thrown if an error happend while XML parsing. 134 */ 135 public static Element parseXml(String xmlStr) throws JDOMException { 136 ParamChecker.notNull(xmlStr, "xmlStr"); 137 try { 138 SAXBuilder saxBuilder = createSAXBuilder(); 139 Document document = saxBuilder.build(new StringReader(xmlStr)); 140 return document.getRootElement(); 141 } 142 catch (IOException ex) { 143 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex); 144 } 145 } 146 147 /** 148 * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it. 149 * 150 * @param is inputstream to parse. 151 * @return JDOM element for the parsed XML string. 152 * @throws JDOMException thrown if an error happend while XML parsing. 153 * @throws IOException thrown if an IO error occurred. 154 */ 155 public static Element parseXml(InputStream is) throws JDOMException, IOException { 156 ParamChecker.notNull(is, "is"); 157 SAXBuilder saxBuilder = createSAXBuilder(); 158 Document document = saxBuilder.build(is); 159 return document.getRootElement(); 160 } 161 162 /** 163 * //TODO move this to action registry method Return the value of an attribute from the root element of an XML 164 * document. 165 * 166 * @param filePath path of the XML document. 167 * @param attributeName attribute to retrieve value for. 168 * @return value of the specified attribute. 169 */ 170 public static String getRootAttribute(String filePath, String attributeName) { 171 ParamChecker.notNull(filePath, "filePath"); 172 ParamChecker.notNull(attributeName, "attributeName"); 173 SAXBuilder saxBuilder = createSAXBuilder(); 174 try { 175 Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath)); 176 return doc.getRootElement().getAttributeValue(attributeName); 177 } 178 catch (JDOMException e) { 179 throw new RuntimeException(); 180 } 181 catch (IOException e) { 182 throw new RuntimeException(); 183 } 184 } 185 186 /** 187 * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the 188 * {@link #toString} method is invoked. 189 */ 190 public static class PrettyPrint { 191 private String str; 192 private Element element; 193 194 private PrettyPrint(String str) { 195 this.str = str; 196 } 197 198 private PrettyPrint(Element element) { 199 this.element = ParamChecker.notNull(element, "element"); 200 } 201 202 /** 203 * Return the pretty print representation of an XML document. 204 * 205 * @return the pretty print representation of an XML document. 206 */ 207 @Override 208 public String toString() { 209 if (str != null) { 210 return str; 211 } 212 else { 213 XMLOutputter outputter = new XMLOutputter(); 214 StringWriter stringWriter = new StringWriter(); 215 outputter.setFormat(Format.getPrettyFormat()); 216 try { 217 outputter.output(element, stringWriter); 218 } 219 catch (Exception ex) { 220 throw new RuntimeException(ex); 221 } 222 return stringWriter.toString(); 223 } 224 } 225 } 226 227 /** 228 * Return a pretty print string for a JDOM Element. 229 * 230 * @param element JDOM element. 231 * @return pretty print of the given JDOM Element. 232 */ 233 public static PrettyPrint prettyPrint(Element element) { 234 return new PrettyPrint(element); 235 236 } 237 238 /** 239 * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original 240 * string. 241 * 242 * @param xmlStr XML string. 243 * @return prettyprint of the given XML string or the original string if the given string is not valid XML. 244 */ 245 public static PrettyPrint prettyPrint(String xmlStr) { 246 try { 247 return new PrettyPrint(parseXml(xmlStr)); 248 } 249 catch (Exception e) { 250 return new PrettyPrint(xmlStr); 251 } 252 } 253 254 /** 255 * Return a pretty print string for a Configuration object. 256 * 257 * @param conf Configuration object. 258 * @return prettyprint of the given Configuration object. 259 */ 260 public static PrettyPrint prettyPrint(Configuration conf) { 261 Element root = new Element("configuration"); 262 for (Map.Entry<String, String> entry : conf) { 263 Element property = new Element("property"); 264 Element name = new Element("name"); 265 name.setText(entry.getKey()); 266 Element value = new Element("value"); 267 value.setText(entry.getValue()); 268 property.addContent(name); 269 property.addContent(value); 270 root.addContent(property); 271 } 272 return new PrettyPrint(root); 273 } 274 275 /** 276 * Schema validation for a given xml. <p> 277 * 278 * @param schema for validation 279 * @param xml to be validated 280 * @throws SAXException in case of validation error 281 * @throws IOException in case of IO error 282 */ 283 public static void validateXml(Schema schema, String xml) throws SAXException, IOException { 284 285 Validator validator = schema.newValidator(); 286 validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes()))); 287 } 288 289 /** 290 * Create schema object for the given xsd 291 * 292 * @param is inputstream to schema. 293 * @return the schema object. 294 */ 295 public static Schema createSchema(InputStream is) { 296 SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); 297 StreamSource src = new StreamSource(is); 298 try { 299 return factory.newSchema(src); 300 } 301 catch (SAXException e) { 302 throw new RuntimeException(e.getMessage(), e); 303 } 304 } 305 306 public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException { 307 if (xmlData == null || xmlData.length() == 0) { 308 return; 309 } 310 javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile); 311 validateXml(schema, xmlData); 312 } 313 314 /** 315 * Convert Properties to string 316 * 317 * @param props the properties to convert 318 * @return xml string 319 * @throws IOException if there is an error during conversion 320 */ 321 public static String writePropToString(Properties props) throws IOException { 322 try { 323 org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); 324 org.w3c.dom.Element conf = doc.createElement("configuration"); 325 doc.appendChild(conf); 326 conf.appendChild(doc.createTextNode("\n")); 327 for (Enumeration e = props.keys(); e.hasMoreElements();) { 328 String name = (String) e.nextElement(); 329 Object object = props.get(name); 330 String value; 331 if (object instanceof String) { 332 value = (String) object; 333 } 334 else { 335 continue; 336 } 337 org.w3c.dom.Element propNode = doc.createElement("property"); 338 conf.appendChild(propNode); 339 340 org.w3c.dom.Element nameNode = doc.createElement("name"); 341 nameNode.appendChild(doc.createTextNode(name.trim())); 342 propNode.appendChild(nameNode); 343 344 org.w3c.dom.Element valueNode = doc.createElement("value"); 345 valueNode.appendChild(doc.createTextNode(value.trim())); 346 propNode.appendChild(valueNode); 347 348 conf.appendChild(doc.createTextNode("\n")); 349 } 350 351 Source source = new DOMSource(doc); 352 StringWriter stringWriter = new StringWriter(); 353 Result result = new StreamResult(stringWriter); 354 TransformerFactory factory = TransformerFactory.newInstance(); 355 factory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true); 356 Transformer transformer = factory.newTransformer(); 357 transformer.transform(source, result); 358 359 return stringWriter.getBuffer().toString(); 360 } 361 catch (Exception e) { 362 throw new IOException(e); 363 } 364 } 365 366 /** 367 * Escape characters for text appearing as XML data, between tags. 368 * <p> 369 * The following characters are replaced with corresponding character entities : 370 * '<' to '<' 371 * '>' to '>' 372 * '&' to '&' 373 * '"' to '"' 374 * "'" to "'" 375 * <p> 376 * Note that JSTL's {@code <c:out>} escapes the exact same set of characters as this method. 377 * 378 * @param aText the text to escape 379 * @return the escaped text 380 */ 381 public static String escapeCharsForXML(String aText) { 382 final StringBuilder result = new StringBuilder(); 383 final StringCharacterIterator iterator = new StringCharacterIterator(aText); 384 char character = iterator.current(); 385 while (character != CharacterIterator.DONE) { 386 if (character == '<') { 387 result.append("<"); 388 } 389 else if (character == '>') { 390 result.append(">"); 391 } 392 else if (character == '\"') { 393 result.append("""); 394 } 395 else if (character == '\'') { 396 result.append("'"); 397 } 398 else if (character == '&') { 399 result.append("&"); 400 } 401 else { 402 // the char is not a special one 403 // add it to the result as is 404 result.append(character); 405 } 406 character = iterator.next(); 407 } 408 return result.toString(); 409 } 410 411 public static Element getSLAElement(Element elem) { 412 Element eSla_1 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI)); 413 Element eSla_2 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAMESPACE_URI_2)); 414 Element eSla = (eSla_2 != null) ? eSla_2 : eSla_1; 415 416 return eSla; 417 } 418 419}