001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.oozie.util;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.StringReader;
025import java.io.StringWriter;
026import java.text.CharacterIterator;
027import java.text.StringCharacterIterator;
028import java.util.Enumeration;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032import java.util.Properties;
033
034import javax.xml.XMLConstants;
035import javax.xml.parsers.DocumentBuilderFactory;
036import javax.xml.transform.Result;
037import javax.xml.transform.Source;
038import javax.xml.transform.Transformer;
039import javax.xml.transform.TransformerFactory;
040import javax.xml.transform.dom.DOMSource;
041import javax.xml.transform.stream.StreamResult;
042import javax.xml.transform.stream.StreamSource;
043import javax.xml.validation.Schema;
044import javax.xml.validation.SchemaFactory;
045import javax.xml.validation.Validator;
046
047import org.apache.hadoop.conf.Configuration;
048import org.apache.oozie.service.SchemaService;
049import org.apache.oozie.service.SchemaService.SchemaName;
050import org.apache.oozie.service.Services;
051import org.jdom.Comment;
052import org.jdom.Document;
053import org.jdom.Element;
054import org.jdom.JDOMException;
055import org.jdom.Namespace;
056import org.jdom.input.SAXBuilder;
057import org.jdom.output.Format;
058import org.jdom.output.XMLOutputter;
059import org.xml.sax.EntityResolver;
060import org.xml.sax.InputSource;
061import org.xml.sax.SAXException;
062
063/**
064 * XML utility methods.
065 */
066public class XmlUtils {
067
068    private static class NoExternalEntityEntityResolver implements EntityResolver {
069
070        @Override
071        public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
072            return new InputSource(new ByteArrayInputStream(new byte[0]));
073        }
074
075    }
076
077    private static SAXBuilder createSAXBuilder() {
078        SAXBuilder saxBuilder = new SAXBuilder();
079
080        //THIS IS NOT WORKING
081        //saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
082
083        //INSTEAD WE ARE JUST SETTING AN EntityResolver that does not resolve entities
084        saxBuilder.setEntityResolver(new NoExternalEntityEntityResolver());
085        return saxBuilder;
086    }
087
088    /**
089     * Remove comments from any Xml String.
090     *
091     * @param xmlStr XML string to remove comments.
092     * @return String after removing comments.
093     * @throws JDOMException thrown if an error happend while XML parsing.
094     */
095    public static String removeComments(String xmlStr) throws JDOMException {
096        if (xmlStr == null) {
097            return null;
098        }
099        try {
100            SAXBuilder saxBuilder = createSAXBuilder();
101            Document document = saxBuilder.build(new StringReader(xmlStr));
102            removeComments(document);
103            return prettyPrint(document.getRootElement()).toString();
104        }
105        catch (IOException ex) {
106            throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
107        }
108    }
109
110    private static void removeComments(List l) {
111        for (Iterator i = l.iterator(); i.hasNext();) {
112            Object node = i.next();
113            if (node instanceof Comment) {
114                i.remove();
115            }
116            else {
117                if (node instanceof Element) {
118                    removeComments(((Element) node).getContent());
119                }
120            }
121        }
122    }
123
124    private static void removeComments(Document doc) {
125        removeComments(doc.getContent());
126    }
127
128    /**
129     * Parse a string assuming it is a valid XML document and return an JDOM Element for it.
130     *
131     * @param xmlStr XML string to parse.
132     * @return JDOM element for the parsed XML string.
133     * @throws JDOMException thrown if an error happend while XML parsing.
134     */
135    public static Element parseXml(String xmlStr) throws JDOMException {
136        ParamChecker.notNull(xmlStr, "xmlStr");
137        try {
138            SAXBuilder saxBuilder = createSAXBuilder();
139            Document document = saxBuilder.build(new StringReader(xmlStr));
140            return document.getRootElement();
141        }
142        catch (IOException ex) {
143            throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
144        }
145    }
146
147    /**
148     * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it.
149     *
150     * @param is inputstream to parse.
151     * @return JDOM element for the parsed XML string.
152     * @throws JDOMException thrown if an error happend while XML parsing.
153     * @throws IOException thrown if an IO error occurred.
154     */
155    public static Element parseXml(InputStream is) throws JDOMException, IOException {
156        ParamChecker.notNull(is, "is");
157        SAXBuilder saxBuilder = createSAXBuilder();
158        Document document = saxBuilder.build(is);
159        return document.getRootElement();
160    }
161
162    /**
163     * //TODO move this to action registry method Return the value of an attribute from the root element of an XML
164     * document.
165     *
166     * @param filePath path of the XML document.
167     * @param attributeName attribute to retrieve value for.
168     * @return value of the specified attribute.
169     */
170    public static String getRootAttribute(String filePath, String attributeName) {
171        ParamChecker.notNull(filePath, "filePath");
172        ParamChecker.notNull(attributeName, "attributeName");
173        SAXBuilder saxBuilder = createSAXBuilder();
174        try {
175            Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath));
176            return doc.getRootElement().getAttributeValue(attributeName);
177        }
178        catch (JDOMException e) {
179            throw new RuntimeException();
180        }
181        catch (IOException e) {
182            throw new RuntimeException();
183        }
184    }
185
186    /**
187     * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the
188     * {@link #toString} method is invoked.
189     */
190    public static class PrettyPrint {
191        private String str;
192        private Element element;
193
194        private PrettyPrint(String str) {
195            this.str = str;
196        }
197
198        private PrettyPrint(Element element) {
199            this.element = ParamChecker.notNull(element, "element");
200        }
201
202        /**
203         * Return the pretty print representation of an XML document.
204         *
205         * @return the pretty print representation of an XML document.
206         */
207        @Override
208        public String toString() {
209            if (str != null) {
210                return str;
211            }
212            else {
213                XMLOutputter outputter = new XMLOutputter();
214                StringWriter stringWriter = new StringWriter();
215                outputter.setFormat(Format.getPrettyFormat());
216                try {
217                    outputter.output(element, stringWriter);
218                }
219                catch (Exception ex) {
220                    throw new RuntimeException(ex);
221                }
222                return stringWriter.toString();
223            }
224        }
225    }
226
227    /**
228     * Return a pretty print string for a JDOM Element.
229     *
230     * @param element JDOM element.
231     * @return pretty print of the given JDOM Element.
232     */
233    public static PrettyPrint prettyPrint(Element element) {
234        return new PrettyPrint(element);
235
236    }
237
238    /**
239     * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original
240     * string.
241     *
242     * @param xmlStr XML string.
243     * @return prettyprint of the given XML string or the original string if the given string is not valid XML.
244     */
245    public static PrettyPrint prettyPrint(String xmlStr) {
246        try {
247            return new PrettyPrint(parseXml(xmlStr));
248        }
249        catch (Exception e) {
250            return new PrettyPrint(xmlStr);
251        }
252    }
253
254    /**
255     * Return a pretty print string for a Configuration object.
256     *
257     * @param conf Configuration object.
258     * @return prettyprint of the given Configuration object.
259     */
260    public static PrettyPrint prettyPrint(Configuration conf) {
261        Element root = new Element("configuration");
262        for (Map.Entry<String, String> entry : conf) {
263            Element property = new Element("property");
264            Element name = new Element("name");
265            name.setText(entry.getKey());
266            Element value = new Element("value");
267            value.setText(entry.getValue());
268            property.addContent(name);
269            property.addContent(value);
270            root.addContent(property);
271        }
272        return new PrettyPrint(root);
273    }
274
275    /**
276     * Schema validation for a given xml. <p>
277     *
278     * @param schema for validation
279     * @param xml to be validated
280     * @throws SAXException in case of validation error
281     * @throws IOException in case of IO error
282     */
283    public static void validateXml(Schema schema, String xml) throws SAXException, IOException {
284
285        Validator validator = schema.newValidator();
286        validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes())));
287    }
288
289    /**
290     * Create schema object for the given xsd
291     *
292     * @param is inputstream to schema.
293     * @return the schema object.
294     */
295    public static Schema createSchema(InputStream is) {
296        SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
297        StreamSource src = new StreamSource(is);
298        try {
299            return factory.newSchema(src);
300        }
301        catch (SAXException e) {
302            throw new RuntimeException(e.getMessage(), e);
303        }
304    }
305
306    public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException {
307        if (xmlData == null || xmlData.length() == 0) {
308            return;
309        }
310        javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile);
311        validateXml(schema, xmlData);
312    }
313
314    /**
315     * Convert Properties to string
316     *
317     * @param props the properties to convert
318     * @return xml string
319     * @throws IOException if there is an error during conversion
320     */
321    public static String writePropToString(Properties props) throws IOException {
322        try {
323            org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
324            org.w3c.dom.Element conf = doc.createElement("configuration");
325            doc.appendChild(conf);
326            conf.appendChild(doc.createTextNode("\n"));
327            for (Enumeration e = props.keys(); e.hasMoreElements();) {
328                String name = (String) e.nextElement();
329                Object object = props.get(name);
330                String value;
331                if (object instanceof String) {
332                    value = (String) object;
333                }
334                else {
335                    continue;
336                }
337                org.w3c.dom.Element propNode = doc.createElement("property");
338                conf.appendChild(propNode);
339
340                org.w3c.dom.Element nameNode = doc.createElement("name");
341                nameNode.appendChild(doc.createTextNode(name.trim()));
342                propNode.appendChild(nameNode);
343
344                org.w3c.dom.Element valueNode = doc.createElement("value");
345                valueNode.appendChild(doc.createTextNode(value.trim()));
346                propNode.appendChild(valueNode);
347
348                conf.appendChild(doc.createTextNode("\n"));
349            }
350
351            Source source = new DOMSource(doc);
352            StringWriter stringWriter = new StringWriter();
353            Result result = new StreamResult(stringWriter);
354            TransformerFactory factory = TransformerFactory.newInstance();
355            factory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true);
356            Transformer transformer = factory.newTransformer();
357            transformer.transform(source, result);
358
359            return stringWriter.getBuffer().toString();
360        }
361        catch (Exception e) {
362            throw new IOException(e);
363        }
364    }
365
366    /**
367     * Escape characters for text appearing as XML data, between tags.
368     * <p>
369     * The following characters are replaced with corresponding character entities :
370     * '&lt;' to '&lt;'
371     * '&gt;' to '&gt;'
372     * '&amp;' to '&amp;'
373     * '"' to '&quot;'
374     * "'" to "&#039;"
375     * <p>
376     * Note that JSTL's {@code <c:out>} escapes the exact same set of characters as this method.
377     *
378     * @param aText the text to escape
379     * @return the escaped text
380     */
381    public static String escapeCharsForXML(String aText) {
382        final StringBuilder result = new StringBuilder();
383        final StringCharacterIterator iterator = new StringCharacterIterator(aText);
384        char character = iterator.current();
385        while (character != CharacterIterator.DONE) {
386            if (character == '<') {
387                result.append("&lt;");
388            }
389            else if (character == '>') {
390                result.append("&gt;");
391            }
392            else if (character == '\"') {
393                result.append("&quot;");
394            }
395            else if (character == '\'') {
396                result.append("&#039;");
397            }
398            else if (character == '&') {
399                result.append("&amp;");
400            }
401            else {
402                // the char is not a special one
403                // add it to the result as is
404                result.append(character);
405            }
406            character = iterator.next();
407        }
408        return result.toString();
409    }
410
411    public static Element getSLAElement(Element elem) {
412        Element eSla_1 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI));
413        Element eSla_2 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAMESPACE_URI_2));
414        Element eSla = (eSla_2 != null) ? eSla_2 : eSla_1;
415
416        return eSla;
417    }
418
419}