001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.oozie.util;
019
020import java.io.ByteArrayInputStream;
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.StringReader;
024import java.io.StringWriter;
025import java.text.CharacterIterator;
026import java.text.StringCharacterIterator;
027import java.util.Enumeration;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031import java.util.Properties;
032
033import javax.xml.XMLConstants;
034import javax.xml.parsers.DocumentBuilderFactory;
035import javax.xml.transform.Result;
036import javax.xml.transform.Source;
037import javax.xml.transform.Transformer;
038import javax.xml.transform.TransformerFactory;
039import javax.xml.transform.dom.DOMSource;
040import javax.xml.transform.stream.StreamResult;
041import javax.xml.transform.stream.StreamSource;
042import javax.xml.validation.Schema;
043import javax.xml.validation.SchemaFactory;
044import javax.xml.validation.Validator;
045
046import org.apache.hadoop.conf.Configuration;
047import org.apache.oozie.service.SchemaService;
048import org.apache.oozie.service.SchemaService.SchemaName;
049import org.apache.oozie.service.Services;
050import org.jdom.Comment;
051import org.jdom.Document;
052import org.jdom.Element;
053import org.jdom.JDOMException;
054import org.jdom.Namespace;
055import org.jdom.input.SAXBuilder;
056import org.jdom.output.Format;
057import org.jdom.output.XMLOutputter;
058import org.xml.sax.EntityResolver;
059import org.xml.sax.InputSource;
060import org.xml.sax.SAXException;
061
062/**
063 * XML utility methods.
064 */
065public class XmlUtils {
066
067    private static class NoExternalEntityEntityResolver implements EntityResolver {
068
069        @Override
070        public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
071            return new InputSource(new ByteArrayInputStream(new byte[0]));
072        }
073
074    }
075
076    private static SAXBuilder createSAXBuilder() {
077        SAXBuilder saxBuilder = new SAXBuilder();
078
079        //THIS IS NOT WORKING
080        //saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
081
082        //INSTEAD WE ARE JUST SETTING AN EntityResolver that does not resolve entities
083        saxBuilder.setEntityResolver(new NoExternalEntityEntityResolver());
084        return saxBuilder;
085    }
086
087    /**
088     * Remove comments from any Xml String.
089     *
090     * @param xmlStr XML string to remove comments.
091     * @return String after removing comments.
092     * @throws JDOMException thrown if an error happend while XML parsing.
093     */
094    public static String removeComments(String xmlStr) throws JDOMException {
095        if (xmlStr == null) {
096            return null;
097        }
098        try {
099            SAXBuilder saxBuilder = createSAXBuilder();
100            Document document = saxBuilder.build(new StringReader(xmlStr));
101            removeComments(document);
102            return prettyPrint(document.getRootElement()).toString();
103        }
104        catch (IOException ex) {
105            throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
106        }
107    }
108
109    private static void removeComments(List l) {
110        for (Iterator i = l.iterator(); i.hasNext();) {
111            Object node = i.next();
112            if (node instanceof Comment) {
113                i.remove();
114            }
115            else {
116                if (node instanceof Element) {
117                    removeComments(((Element) node).getContent());
118                }
119            }
120        }
121    }
122
123    private static void removeComments(Document doc) {
124        removeComments(doc.getContent());
125    }
126
127    /**
128     * Parse a string assuming it is a valid XML document and return an JDOM Element for it.
129     *
130     * @param xmlStr XML string to parse.
131     * @return JDOM element for the parsed XML string.
132     * @throws JDOMException thrown if an error happend while XML parsing.
133     */
134    public static Element parseXml(String xmlStr) throws JDOMException {
135        ParamChecker.notNull(xmlStr, "xmlStr");
136        try {
137            SAXBuilder saxBuilder = createSAXBuilder();
138            Document document = saxBuilder.build(new StringReader(xmlStr));
139            return document.getRootElement();
140        }
141        catch (IOException ex) {
142            throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
143        }
144    }
145
146    /**
147     * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it.
148     *
149     * @param is inputstream to parse.
150     * @return JDOM element for the parsed XML string.
151     * @throws JDOMException thrown if an error happend while XML parsing.
152     * @throws IOException thrown if an IO error occurred.
153     */
154    public static Element parseXml(InputStream is) throws JDOMException, IOException {
155        ParamChecker.notNull(is, "is");
156        SAXBuilder saxBuilder = createSAXBuilder();
157        Document document = saxBuilder.build(is);
158        return document.getRootElement();
159    }
160
161    /**
162     * //TODO move this to action registry method Return the value of an attribute from the root element of an XML
163     * document.
164     *
165     * @param filePath path of the XML document.
166     * @param attributeName attribute to retrieve value for.
167     * @return value of the specified attribute.
168     */
169    public static String getRootAttribute(String filePath, String attributeName) {
170        ParamChecker.notNull(filePath, "filePath");
171        ParamChecker.notNull(attributeName, "attributeName");
172        SAXBuilder saxBuilder = createSAXBuilder();
173        try {
174            Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath));
175            return doc.getRootElement().getAttributeValue(attributeName);
176        }
177        catch (JDOMException e) {
178            throw new RuntimeException();
179        }
180        catch (IOException e) {
181            throw new RuntimeException();
182        }
183    }
184
185    /**
186     * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the
187     * {@link #toString} method is invoked.
188     */
189    public static class PrettyPrint {
190        private String str;
191        private Element element;
192
193        private PrettyPrint(String str) {
194            this.str = str;
195        }
196
197        private PrettyPrint(Element element) {
198            this.element = ParamChecker.notNull(element, "element");
199        }
200
201        /**
202         * Return the pretty print representation of an XML document.
203         *
204         * @return the pretty print representation of an XML document.
205         */
206        @Override
207        public String toString() {
208            if (str != null) {
209                return str;
210            }
211            else {
212                XMLOutputter outputter = new XMLOutputter();
213                StringWriter stringWriter = new StringWriter();
214                outputter.setFormat(Format.getPrettyFormat());
215                try {
216                    outputter.output(element, stringWriter);
217                }
218                catch (Exception ex) {
219                    throw new RuntimeException(ex);
220                }
221                return stringWriter.toString();
222            }
223        }
224    }
225
226    /**
227     * Return a pretty print string for a JDOM Element.
228     *
229     * @param element JDOM element.
230     * @return pretty print of the given JDOM Element.
231     */
232    public static PrettyPrint prettyPrint(Element element) {
233        return new PrettyPrint(element);
234
235    }
236
237    /**
238     * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original
239     * string.
240     *
241     * @param xmlStr XML string.
242     * @return prettyprint of the given XML string or the original string if the given string is not valid XML.
243     */
244    public static PrettyPrint prettyPrint(String xmlStr) {
245        try {
246            return new PrettyPrint(parseXml(xmlStr));
247        }
248        catch (Exception e) {
249            return new PrettyPrint(xmlStr);
250        }
251    }
252
253    /**
254     * Return a pretty print string for a Configuration object.
255     *
256     * @param conf Configuration object.
257     * @return prettyprint of the given Configuration object.
258     */
259    public static PrettyPrint prettyPrint(Configuration conf) {
260        Element root = new Element("configuration");
261        for (Map.Entry<String, String> entry : conf) {
262            Element property = new Element("property");
263            Element name = new Element("name");
264            name.setText(entry.getKey());
265            Element value = new Element("value");
266            value.setText(entry.getValue());
267            property.addContent(name);
268            property.addContent(value);
269            root.addContent(property);
270        }
271        return new PrettyPrint(root);
272    }
273
274    /**
275     * Schema validation for a given xml. <p/>
276     *
277     * @param schema for validation
278     * @param xml to be validated
279     */
280    public static void validateXml(Schema schema, String xml) throws SAXException, IOException {
281
282        Validator validator = schema.newValidator();
283        validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes())));
284    }
285
286    /**
287     * Create schema object for the given xsd
288     *
289     * @param is inputstream to schema.
290     * @return the schema object.
291     */
292    public static Schema createSchema(InputStream is) {
293        SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
294        StreamSource src = new StreamSource(is);
295        try {
296            return factory.newSchema(src);
297        }
298        catch (SAXException e) {
299            throw new RuntimeException(e.getMessage(), e);
300        }
301    }
302
303    public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException {
304        if (xmlData == null || xmlData.length() == 0) {
305            return;
306        }
307        javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile);
308        validateXml(schema, xmlData);
309    }
310
311    /**
312     * Convert Properties to string
313     *
314     * @param props
315     * @return xml string
316     * @throws IOException
317     */
318    public static String writePropToString(Properties props) throws IOException {
319        try {
320            org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
321            org.w3c.dom.Element conf = doc.createElement("configuration");
322            doc.appendChild(conf);
323            conf.appendChild(doc.createTextNode("\n"));
324            for (Enumeration e = props.keys(); e.hasMoreElements();) {
325                String name = (String) e.nextElement();
326                Object object = props.get(name);
327                String value;
328                if (object instanceof String) {
329                    value = (String) object;
330                }
331                else {
332                    continue;
333                }
334                org.w3c.dom.Element propNode = doc.createElement("property");
335                conf.appendChild(propNode);
336
337                org.w3c.dom.Element nameNode = doc.createElement("name");
338                nameNode.appendChild(doc.createTextNode(name.trim()));
339                propNode.appendChild(nameNode);
340
341                org.w3c.dom.Element valueNode = doc.createElement("value");
342                valueNode.appendChild(doc.createTextNode(value.trim()));
343                propNode.appendChild(valueNode);
344
345                conf.appendChild(doc.createTextNode("\n"));
346            }
347
348            Source source = new DOMSource(doc);
349            StringWriter stringWriter = new StringWriter();
350            Result result = new StreamResult(stringWriter);
351            TransformerFactory factory = TransformerFactory.newInstance();
352            Transformer transformer = factory.newTransformer();
353            transformer.transform(source, result);
354
355            return stringWriter.getBuffer().toString();
356        }
357        catch (Exception e) {
358            throw new IOException(e);
359        }
360    }
361
362    /**
363     * Escape characters for text appearing as XML data, between tags.
364     * <P/>
365     * The following characters are replaced with corresponding character entities :
366     * '<' to '&lt';
367     * '>' to '&gt';
368     * '&' to '&amp;'
369     * '"' to '&quot;'
370     * "'" to "&#039;"
371     * <P/>
372     * Note that JSTL's {@code <c:out>} escapes the exact same set of characters as this method.
373     */
374    public static String escapeCharsForXML(String aText) {
375        final StringBuilder result = new StringBuilder();
376        final StringCharacterIterator iterator = new StringCharacterIterator(aText);
377        char character = iterator.current();
378        while (character != CharacterIterator.DONE) {
379            if (character == '<') {
380                result.append("&lt;");
381            }
382            else if (character == '>') {
383                result.append("&gt;");
384            }
385            else if (character == '\"') {
386                result.append("&quot;");
387            }
388            else if (character == '\'') {
389                result.append("&#039;");
390            }
391            else if (character == '&') {
392                result.append("&amp;");
393            }
394            else {
395                // the char is not a special one
396                // add it to the result as is
397                result.append(character);
398            }
399            character = iterator.next();
400        }
401        return result.toString();
402    }
403
404    public static Element getSLAElement(Element elem) {
405        Element eSla_1 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI));
406        Element eSla_2 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAMESPACE_URI_2));
407        Element eSla = (eSla_2 != null) ? eSla_2 : eSla_1;
408
409        return eSla;
410    }
411
412}