001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.oozie.util;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.StringReader;
025import java.io.StringWriter;
026import java.text.CharacterIterator;
027import java.text.StringCharacterIterator;
028import java.util.Enumeration;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032import java.util.Properties;
033
034import javax.xml.XMLConstants;
035import javax.xml.parsers.DocumentBuilderFactory;
036import javax.xml.transform.Result;
037import javax.xml.transform.Source;
038import javax.xml.transform.Transformer;
039import javax.xml.transform.TransformerFactory;
040import javax.xml.transform.dom.DOMSource;
041import javax.xml.transform.stream.StreamResult;
042import javax.xml.transform.stream.StreamSource;
043import javax.xml.validation.Schema;
044import javax.xml.validation.SchemaFactory;
045import javax.xml.validation.Validator;
046
047import org.apache.hadoop.conf.Configuration;
048import org.apache.oozie.service.SchemaService;
049import org.apache.oozie.service.SchemaService.SchemaName;
050import org.apache.oozie.service.Services;
051import org.jdom.Comment;
052import org.jdom.Document;
053import org.jdom.Element;
054import org.jdom.JDOMException;
055import org.jdom.Namespace;
056import org.jdom.input.SAXBuilder;
057import org.jdom.output.Format;
058import org.jdom.output.XMLOutputter;
059import org.xml.sax.EntityResolver;
060import org.xml.sax.InputSource;
061import org.xml.sax.SAXException;
062
063/**
064 * XML utility methods.
065 */
066public class XmlUtils {
067
068    private static class NoExternalEntityEntityResolver implements EntityResolver {
069
070        @Override
071        public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
072            return new InputSource(new ByteArrayInputStream(new byte[0]));
073        }
074
075    }
076
077    private static SAXBuilder createSAXBuilder() {
078        SAXBuilder saxBuilder = new SAXBuilder();
079
080        //THIS IS NOT WORKING
081        //saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
082
083        //INSTEAD WE ARE JUST SETTING AN EntityResolver that does not resolve entities
084        saxBuilder.setEntityResolver(new NoExternalEntityEntityResolver());
085        return saxBuilder;
086    }
087
088    /**
089     * Remove comments from any Xml String.
090     *
091     * @param xmlStr XML string to remove comments.
092     * @return String after removing comments.
093     * @throws JDOMException thrown if an error happend while XML parsing.
094     */
095    public static String removeComments(String xmlStr) throws JDOMException {
096        if (xmlStr == null) {
097            return null;
098        }
099        try {
100            SAXBuilder saxBuilder = createSAXBuilder();
101            Document document = saxBuilder.build(new StringReader(xmlStr));
102            removeComments(document);
103            return prettyPrint(document.getRootElement()).toString();
104        }
105        catch (IOException ex) {
106            throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
107        }
108    }
109
110    private static void removeComments(List l) {
111        for (Iterator i = l.iterator(); i.hasNext();) {
112            Object node = i.next();
113            if (node instanceof Comment) {
114                i.remove();
115            }
116            else {
117                if (node instanceof Element) {
118                    removeComments(((Element) node).getContent());
119                }
120            }
121        }
122    }
123
124    private static void removeComments(Document doc) {
125        removeComments(doc.getContent());
126    }
127
128    /**
129     * Parse a string assuming it is a valid XML document and return an JDOM Element for it.
130     *
131     * @param xmlStr XML string to parse.
132     * @return JDOM element for the parsed XML string.
133     * @throws JDOMException thrown if an error happend while XML parsing.
134     */
135    public static Element parseXml(String xmlStr) throws JDOMException {
136        ParamChecker.notNull(xmlStr, "xmlStr");
137        try {
138            SAXBuilder saxBuilder = createSAXBuilder();
139            Document document = saxBuilder.build(new StringReader(xmlStr));
140            return document.getRootElement();
141        }
142        catch (IOException ex) {
143            throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
144        }
145    }
146
147    /**
148     * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it.
149     *
150     * @param is inputstream to parse.
151     * @return JDOM element for the parsed XML string.
152     * @throws JDOMException thrown if an error happend while XML parsing.
153     * @throws IOException thrown if an IO error occurred.
154     */
155    public static Element parseXml(InputStream is) throws JDOMException, IOException {
156        ParamChecker.notNull(is, "is");
157        SAXBuilder saxBuilder = createSAXBuilder();
158        Document document = saxBuilder.build(is);
159        return document.getRootElement();
160    }
161
162    /**
163     * //TODO move this to action registry method Return the value of an attribute from the root element of an XML
164     * document.
165     *
166     * @param filePath path of the XML document.
167     * @param attributeName attribute to retrieve value for.
168     * @return value of the specified attribute.
169     */
170    public static String getRootAttribute(String filePath, String attributeName) {
171        ParamChecker.notNull(filePath, "filePath");
172        ParamChecker.notNull(attributeName, "attributeName");
173        SAXBuilder saxBuilder = createSAXBuilder();
174        try {
175            Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath));
176            return doc.getRootElement().getAttributeValue(attributeName);
177        }
178        catch (JDOMException e) {
179            throw new RuntimeException();
180        }
181        catch (IOException e) {
182            throw new RuntimeException();
183        }
184    }
185
186    /**
187     * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the
188     * {@link #toString} method is invoked.
189     */
190    public static class PrettyPrint {
191        private String str;
192        private Element element;
193
194        private PrettyPrint(String str) {
195            this.str = str;
196        }
197
198        private PrettyPrint(Element element) {
199            this.element = ParamChecker.notNull(element, "element");
200        }
201
202        /**
203         * Return the pretty print representation of an XML document.
204         *
205         * @return the pretty print representation of an XML document.
206         */
207        @Override
208        public String toString() {
209            if (str != null) {
210                return str;
211            }
212            else {
213                XMLOutputter outputter = new XMLOutputter();
214                StringWriter stringWriter = new StringWriter();
215                outputter.setFormat(Format.getPrettyFormat());
216                try {
217                    outputter.output(element, stringWriter);
218                }
219                catch (Exception ex) {
220                    throw new RuntimeException(ex);
221                }
222                return stringWriter.toString();
223            }
224        }
225    }
226
227    /**
228     * Return a pretty print string for a JDOM Element.
229     *
230     * @param element JDOM element.
231     * @return pretty print of the given JDOM Element.
232     */
233    public static PrettyPrint prettyPrint(Element element) {
234        return new PrettyPrint(element);
235
236    }
237
238    /**
239     * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original
240     * string.
241     *
242     * @param xmlStr XML string.
243     * @return prettyprint of the given XML string or the original string if the given string is not valid XML.
244     */
245    public static PrettyPrint prettyPrint(String xmlStr) {
246        try {
247            return new PrettyPrint(parseXml(xmlStr));
248        }
249        catch (Exception e) {
250            return new PrettyPrint(xmlStr);
251        }
252    }
253
254    /**
255     * Return a pretty print string for a Configuration object.
256     *
257     * @param conf Configuration object.
258     * @return prettyprint of the given Configuration object.
259     */
260    public static PrettyPrint prettyPrint(Configuration conf) {
261        Element root = new Element("configuration");
262        for (Map.Entry<String, String> entry : conf) {
263            Element property = new Element("property");
264            Element name = new Element("name");
265            name.setText(entry.getKey());
266            Element value = new Element("value");
267            value.setText(entry.getValue());
268            property.addContent(name);
269            property.addContent(value);
270            root.addContent(property);
271        }
272        return new PrettyPrint(root);
273    }
274
275    /**
276     * Schema validation for a given xml. <p>
277     *
278     * @param schema for validation
279     * @param xml to be validated
280     */
281    public static void validateXml(Schema schema, String xml) throws SAXException, IOException {
282
283        Validator validator = schema.newValidator();
284        validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes())));
285    }
286
287    /**
288     * Create schema object for the given xsd
289     *
290     * @param is inputstream to schema.
291     * @return the schema object.
292     */
293    public static Schema createSchema(InputStream is) {
294        SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
295        StreamSource src = new StreamSource(is);
296        try {
297            return factory.newSchema(src);
298        }
299        catch (SAXException e) {
300            throw new RuntimeException(e.getMessage(), e);
301        }
302    }
303
304    public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException {
305        if (xmlData == null || xmlData.length() == 0) {
306            return;
307        }
308        javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile);
309        validateXml(schema, xmlData);
310    }
311
312    /**
313     * Convert Properties to string
314     *
315     * @param props
316     * @return xml string
317     * @throws IOException
318     */
319    public static String writePropToString(Properties props) throws IOException {
320        try {
321            org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
322            org.w3c.dom.Element conf = doc.createElement("configuration");
323            doc.appendChild(conf);
324            conf.appendChild(doc.createTextNode("\n"));
325            for (Enumeration e = props.keys(); e.hasMoreElements();) {
326                String name = (String) e.nextElement();
327                Object object = props.get(name);
328                String value;
329                if (object instanceof String) {
330                    value = (String) object;
331                }
332                else {
333                    continue;
334                }
335                org.w3c.dom.Element propNode = doc.createElement("property");
336                conf.appendChild(propNode);
337
338                org.w3c.dom.Element nameNode = doc.createElement("name");
339                nameNode.appendChild(doc.createTextNode(name.trim()));
340                propNode.appendChild(nameNode);
341
342                org.w3c.dom.Element valueNode = doc.createElement("value");
343                valueNode.appendChild(doc.createTextNode(value.trim()));
344                propNode.appendChild(valueNode);
345
346                conf.appendChild(doc.createTextNode("\n"));
347            }
348
349            Source source = new DOMSource(doc);
350            StringWriter stringWriter = new StringWriter();
351            Result result = new StreamResult(stringWriter);
352            TransformerFactory factory = TransformerFactory.newInstance();
353            factory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true);
354            Transformer transformer = factory.newTransformer();
355            transformer.transform(source, result);
356
357            return stringWriter.getBuffer().toString();
358        }
359        catch (Exception e) {
360            throw new IOException(e);
361        }
362    }
363
364    /**
365     * Escape characters for text appearing as XML data, between tags.
366     * <p>
367     * The following characters are replaced with corresponding character entities :
368     * '&lt;' to '&lt;'
369     * '&gt;' to '&gt;'
370     * '&amp;' to '&amp;'
371     * '"' to '&quot;'
372     * "'" to "&#039;"
373     * <p>
374     * Note that JSTL's {@code <c:out>} escapes the exact same set of characters as this method.
375     */
376    public static String escapeCharsForXML(String aText) {
377        final StringBuilder result = new StringBuilder();
378        final StringCharacterIterator iterator = new StringCharacterIterator(aText);
379        char character = iterator.current();
380        while (character != CharacterIterator.DONE) {
381            if (character == '<') {
382                result.append("&lt;");
383            }
384            else if (character == '>') {
385                result.append("&gt;");
386            }
387            else if (character == '\"') {
388                result.append("&quot;");
389            }
390            else if (character == '\'') {
391                result.append("&#039;");
392            }
393            else if (character == '&') {
394                result.append("&amp;");
395            }
396            else {
397                // the char is not a special one
398                // add it to the result as is
399                result.append(character);
400            }
401            character = iterator.next();
402        }
403        return result.toString();
404    }
405
406    public static Element getSLAElement(Element elem) {
407        Element eSla_1 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI));
408        Element eSla_2 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAMESPACE_URI_2));
409        Element eSla = (eSla_2 != null) ? eSla_2 : eSla_1;
410
411        return eSla;
412    }
413
414}