001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.oozie.util;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.StringReader;
025import java.io.StringWriter;
026import java.text.CharacterIterator;
027import java.text.StringCharacterIterator;
028import java.util.Enumeration;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032import java.util.Properties;
033
034import javax.xml.XMLConstants;
035import javax.xml.parsers.DocumentBuilder;
036import javax.xml.parsers.DocumentBuilderFactory;
037import javax.xml.parsers.ParserConfigurationException;
038import javax.xml.transform.Result;
039import javax.xml.transform.Source;
040import javax.xml.transform.Transformer;
041import javax.xml.transform.TransformerFactory;
042import javax.xml.transform.dom.DOMSource;
043import javax.xml.transform.stream.StreamResult;
044import javax.xml.transform.stream.StreamSource;
045import javax.xml.validation.Schema;
046import javax.xml.validation.SchemaFactory;
047import javax.xml.validation.Validator;
048
049import org.apache.hadoop.conf.Configuration;
050import org.apache.oozie.service.SchemaService;
051import org.apache.oozie.service.SchemaService.SchemaName;
052import org.apache.oozie.service.Services;
053import org.jdom.Comment;
054import org.jdom.Document;
055import org.jdom.Element;
056import org.jdom.JDOMException;
057import org.jdom.Namespace;
058import org.jdom.input.SAXBuilder;
059import org.jdom.output.Format;
060import org.jdom.output.XMLOutputter;
061import org.xml.sax.SAXException;
062
063/**
064 * XML utility methods.
065 */
066public class XmlUtils {
067
068    private static SAXBuilder createSAXBuilder() {
069        SAXBuilder saxBuilder = new SAXBuilder();
070        saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
071        saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
072        saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
073        saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
074        return saxBuilder;
075    }
076
077    /**
078     * Remove comments from any Xml String.
079     *
080     * @param xmlStr XML string to remove comments.
081     * @return String after removing comments.
082     * @throws JDOMException thrown if an error happend while XML parsing.
083     */
084    public static String removeComments(String xmlStr) throws JDOMException {
085        if (xmlStr == null) {
086            return null;
087        }
088        try {
089            SAXBuilder saxBuilder = createSAXBuilder();
090            Document document = saxBuilder.build(new StringReader(xmlStr));
091            removeComments(document);
092            return prettyPrint(document.getRootElement()).toString();
093        }
094        catch (IOException ex) {
095            throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
096        }
097    }
098
099    private static void removeComments(List l) {
100        for (Iterator i = l.iterator(); i.hasNext();) {
101            Object node = i.next();
102            if (node instanceof Comment) {
103                i.remove();
104            }
105            else {
106                if (node instanceof Element) {
107                    removeComments(((Element) node).getContent());
108                }
109            }
110        }
111    }
112
113    private static void removeComments(Document doc) {
114        removeComments(doc.getContent());
115    }
116
117    /**
118     * Parse a string assuming it is a valid XML document and return an JDOM Element for it.
119     *
120     * @param xmlStr XML string to parse.
121     * @return JDOM element for the parsed XML string.
122     * @throws JDOMException thrown if an error happend while XML parsing.
123     */
124    public static Element parseXml(String xmlStr) throws JDOMException {
125        ParamChecker.notNull(xmlStr, "xmlStr");
126        try {
127            SAXBuilder saxBuilder = createSAXBuilder();
128            Document document = saxBuilder.build(new StringReader(xmlStr));
129            return document.getRootElement();
130        }
131        catch (IOException ex) {
132            throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
133        }
134    }
135
136    /**
137     * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it.
138     *
139     * @param is inputstream to parse.
140     * @return JDOM element for the parsed XML string.
141     * @throws JDOMException thrown if an error happend while XML parsing.
142     * @throws IOException thrown if an IO error occurred.
143     */
144    public static Element parseXml(InputStream is) throws JDOMException, IOException {
145        ParamChecker.notNull(is, "is");
146        SAXBuilder saxBuilder = createSAXBuilder();
147        Document document = saxBuilder.build(is);
148        return document.getRootElement();
149    }
150
151    /**
152     * //TODO move this to action registry method Return the value of an attribute from the root element of an XML
153     * document.
154     *
155     * @param filePath path of the XML document.
156     * @param attributeName attribute to retrieve value for.
157     * @return value of the specified attribute.
158     */
159    public static String getRootAttribute(String filePath, String attributeName) {
160        ParamChecker.notNull(filePath, "filePath");
161        ParamChecker.notNull(attributeName, "attributeName");
162        SAXBuilder saxBuilder = createSAXBuilder();
163        try {
164            Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath));
165            return doc.getRootElement().getAttributeValue(attributeName);
166        }
167        catch (JDOMException e) {
168            throw new RuntimeException();
169        }
170        catch (IOException e) {
171            throw new RuntimeException();
172        }
173    }
174
175    /**
176     * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the
177     * {@link #toString} method is invoked.
178     */
179    public static class PrettyPrint {
180        private String str;
181        private Element element;
182
183        private PrettyPrint(String str) {
184            this.str = str;
185        }
186
187        private PrettyPrint(Element element) {
188            this.element = ParamChecker.notNull(element, "element");
189        }
190
191        /**
192         * Return the pretty print representation of an XML document.
193         *
194         * @return the pretty print representation of an XML document.
195         */
196        @Override
197        public String toString() {
198            if (str != null) {
199                return str;
200            }
201            else {
202                XMLOutputter outputter = new XMLOutputter();
203                StringWriter stringWriter = new StringWriter();
204                outputter.setFormat(Format.getPrettyFormat());
205                try {
206                    outputter.output(element, stringWriter);
207                }
208                catch (Exception ex) {
209                    throw new RuntimeException(ex);
210                }
211                return stringWriter.toString();
212            }
213        }
214    }
215
216    /**
217     * Return a pretty print string for a JDOM Element.
218     *
219     * @param element JDOM element.
220     * @return pretty print of the given JDOM Element.
221     */
222    public static PrettyPrint prettyPrint(Element element) {
223        return new PrettyPrint(element);
224
225    }
226
227    /**
228     * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original
229     * string.
230     *
231     * @param xmlStr XML string.
232     * @return prettyprint of the given XML string or the original string if the given string is not valid XML.
233     */
234    public static PrettyPrint prettyPrint(String xmlStr) {
235        try {
236            return new PrettyPrint(parseXml(xmlStr));
237        }
238        catch (Exception e) {
239            return new PrettyPrint(xmlStr);
240        }
241    }
242
243    /**
244     * Return a pretty print string for a Configuration object.
245     *
246     * @param conf Configuration object.
247     * @return prettyprint of the given Configuration object.
248     */
249    public static PrettyPrint prettyPrint(Configuration conf) {
250        Element root = new Element("configuration");
251        for (Map.Entry<String, String> entry : conf) {
252            Element property = new Element("property");
253            Element name = new Element("name");
254            name.setText(entry.getKey());
255            Element value = new Element("value");
256            value.setText(entry.getValue());
257            property.addContent(name);
258            property.addContent(value);
259            root.addContent(property);
260        }
261        return new PrettyPrint(root);
262    }
263
264    /**
265     * Schema validation for a given xml. <p>
266     *
267     * @param schema for validation
268     * @param xml to be validated
269     */
270    public static void validateXml(Schema schema, String xml) throws SAXException, IOException {
271        Validator validator = SchemaService.getValidator(schema);
272        validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes())));
273    }
274
275    public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException {
276        if (xmlData == null || xmlData.length() == 0) {
277            return;
278        }
279        javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile);
280        validateXml(schema, xmlData);
281    }
282
283    /**
284     * Convert Properties to string
285     *
286     * @param props
287     * @return xml string
288     * @throws IOException
289     */
290    public static String writePropToString(Properties props) throws IOException {
291        try {
292            org.w3c.dom.Document doc = getDocumentBuilder().newDocument();
293            org.w3c.dom.Element conf = doc.createElement("configuration");
294            doc.appendChild(conf);
295            conf.appendChild(doc.createTextNode("\n"));
296            for (Enumeration e = props.keys(); e.hasMoreElements();) {
297                String name = (String) e.nextElement();
298                Object object = props.get(name);
299                String value;
300                if (object instanceof String) {
301                    value = (String) object;
302                }
303                else {
304                    continue;
305                }
306                org.w3c.dom.Element propNode = doc.createElement("property");
307                conf.appendChild(propNode);
308
309                org.w3c.dom.Element nameNode = doc.createElement("name");
310                nameNode.appendChild(doc.createTextNode(name.trim()));
311                propNode.appendChild(nameNode);
312
313                org.w3c.dom.Element valueNode = doc.createElement("value");
314                valueNode.appendChild(doc.createTextNode(value.trim()));
315                propNode.appendChild(valueNode);
316
317                conf.appendChild(doc.createTextNode("\n"));
318            }
319
320            Source source = new DOMSource(doc);
321            StringWriter stringWriter = new StringWriter();
322            Result result = new StreamResult(stringWriter);
323            TransformerFactory factory = TransformerFactory.newInstance();
324            factory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true);
325            Transformer transformer = factory.newTransformer();
326            transformer.transform(source, result);
327
328            return stringWriter.getBuffer().toString();
329        }
330        catch (Exception e) {
331            throw new IOException(e);
332        }
333    }
334
335    /**
336     * Returns a DocumentBuilder
337     * @return DocumentBuilder
338     * @throws ParserConfigurationException
339     */
340    private static DocumentBuilder getDocumentBuilder() throws ParserConfigurationException {
341        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
342        docBuilderFactory.setNamespaceAware(true);
343        docBuilderFactory.setXIncludeAware(false);
344        docBuilderFactory.setExpandEntityReferences(false);
345        docBuilderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
346        //Redundant with disallow-doctype, but just in case
347        docBuilderFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
348        docBuilderFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
349        docBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
350        // ignore all comments inside the xml file
351        docBuilderFactory.setIgnoringComments(true);
352        return docBuilderFactory.newDocumentBuilder();
353    }
354
355    /**
356     * Escape characters for text appearing as XML data, between tags.
357     * <p>
358     * The following characters are replaced with corresponding character entities :
359     * '&lt;' to '&lt;'
360     * '&gt;' to '&gt;'
361     * '&amp;' to '&amp;'
362     * '"' to '&quot;'
363     * "'" to "&#039;"
364     * <p>
365     * Note that JSTL's {@code <c:out>} escapes the exact same set of characters as this method.
366     */
367    public static String escapeCharsForXML(String aText) {
368        final StringBuilder result = new StringBuilder();
369        final StringCharacterIterator iterator = new StringCharacterIterator(aText);
370        char character = iterator.current();
371        while (character != CharacterIterator.DONE) {
372            if (character == '<') {
373                result.append("&lt;");
374            }
375            else if (character == '>') {
376                result.append("&gt;");
377            }
378            else if (character == '\"') {
379                result.append("&quot;");
380            }
381            else if (character == '\'') {
382                result.append("&#039;");
383            }
384            else if (character == '&') {
385                result.append("&amp;");
386            }
387            else {
388                // the char is not a special one
389                // add it to the result as is
390                result.append(character);
391            }
392            character = iterator.next();
393        }
394        return result.toString();
395    }
396
397    public static Element getSLAElement(Element elem) {
398        Element eSla_1 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI));
399        Element eSla_2 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAMESPACE_URI_2));
400        Element eSla = (eSla_2 != null) ? eSla_2 : eSla_1;
401
402        return eSla;
403    }
404
405}