001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *      http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.oozie.util;
019    
020    import java.io.ByteArrayInputStream;
021    import java.io.IOException;
022    import java.io.InputStream;
023    import java.io.StringReader;
024    import java.io.StringWriter;
025    import java.text.CharacterIterator;
026    import java.text.StringCharacterIterator;
027    import java.util.Enumeration;
028    import java.util.Iterator;
029    import java.util.List;
030    import java.util.Map;
031    import java.util.Properties;
032    
033    import javax.xml.XMLConstants;
034    import javax.xml.parsers.DocumentBuilderFactory;
035    import javax.xml.transform.Result;
036    import javax.xml.transform.Source;
037    import javax.xml.transform.Transformer;
038    import javax.xml.transform.TransformerFactory;
039    import javax.xml.transform.dom.DOMSource;
040    import javax.xml.transform.stream.StreamResult;
041    import javax.xml.transform.stream.StreamSource;
042    import javax.xml.validation.Schema;
043    import javax.xml.validation.SchemaFactory;
044    import javax.xml.validation.Validator;
045    
046    import org.apache.hadoop.conf.Configuration;
047    import org.apache.oozie.service.SchemaService;
048    import org.apache.oozie.service.SchemaService.SchemaName;
049    import org.apache.oozie.service.Services;
050    import org.jdom.Comment;
051    import org.jdom.Document;
052    import org.jdom.Element;
053    import org.jdom.JDOMException;
054    import org.jdom.Namespace;
055    import org.jdom.input.SAXBuilder;
056    import org.jdom.output.Format;
057    import org.jdom.output.XMLOutputter;
058    import org.xml.sax.EntityResolver;
059    import org.xml.sax.InputSource;
060    import org.xml.sax.SAXException;
061    
062    /**
063     * XML utility methods.
064     */
065    public class XmlUtils {
066    
067        private static class NoExternalEntityEntityResolver implements EntityResolver {
068    
069            @Override
070            public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
071                return new InputSource(new ByteArrayInputStream(new byte[0]));
072            }
073    
074        }
075    
076        private static SAXBuilder createSAXBuilder() {
077            SAXBuilder saxBuilder = new SAXBuilder();
078    
079            //THIS IS NOT WORKING
080            //saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
081    
082            //INSTEAD WE ARE JUST SETTING AN EntityResolver that does not resolve entities
083            saxBuilder.setEntityResolver(new NoExternalEntityEntityResolver());
084            return saxBuilder;
085        }
086    
087        /**
088         * Remove comments from any Xml String.
089         *
090         * @param xmlStr XML string to remove comments.
091         * @return String after removing comments.
092         * @throws JDOMException thrown if an error happend while XML parsing.
093         */
094        public static String removeComments(String xmlStr) throws JDOMException {
095            if (xmlStr == null) {
096                return null;
097            }
098            try {
099                SAXBuilder saxBuilder = createSAXBuilder();
100                Document document = saxBuilder.build(new StringReader(xmlStr));
101                removeComments(document);
102                return prettyPrint(document.getRootElement()).toString();
103            }
104            catch (IOException ex) {
105                throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
106            }
107        }
108    
109        private static void removeComments(List l) {
110            for (Iterator i = l.iterator(); i.hasNext();) {
111                Object node = i.next();
112                if (node instanceof Comment) {
113                    i.remove();
114                }
115                else {
116                    if (node instanceof Element) {
117                        removeComments(((Element) node).getContent());
118                    }
119                }
120            }
121        }
122    
123        private static void removeComments(Document doc) {
124            removeComments(doc.getContent());
125        }
126    
127        /**
128         * Parse a string assuming it is a valid XML document and return an JDOM Element for it.
129         *
130         * @param xmlStr XML string to parse.
131         * @return JDOM element for the parsed XML string.
132         * @throws JDOMException thrown if an error happend while XML parsing.
133         */
134        public static Element parseXml(String xmlStr) throws JDOMException {
135            ParamChecker.notNull(xmlStr, "xmlStr");
136            try {
137                SAXBuilder saxBuilder = createSAXBuilder();
138                Document document = saxBuilder.build(new StringReader(xmlStr));
139                return document.getRootElement();
140            }
141            catch (IOException ex) {
142                throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
143            }
144        }
145    
146        /**
147         * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it.
148         *
149         * @param is inputstream to parse.
150         * @return JDOM element for the parsed XML string.
151         * @throws JDOMException thrown if an error happend while XML parsing.
152         * @throws IOException thrown if an IO error occurred.
153         */
154        public static Element parseXml(InputStream is) throws JDOMException, IOException {
155            ParamChecker.notNull(is, "is");
156            SAXBuilder saxBuilder = createSAXBuilder();
157            Document document = saxBuilder.build(is);
158            return document.getRootElement();
159        }
160    
161        /**
162         * //TODO move this to action registry method Return the value of an attribute from the root element of an XML
163         * document.
164         *
165         * @param filePath path of the XML document.
166         * @param attributeName attribute to retrieve value for.
167         * @return value of the specified attribute.
168         */
169        public static String getRootAttribute(String filePath, String attributeName) {
170            ParamChecker.notNull(filePath, "filePath");
171            ParamChecker.notNull(attributeName, "attributeName");
172            SAXBuilder saxBuilder = createSAXBuilder();
173            try {
174                Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath));
175                return doc.getRootElement().getAttributeValue(attributeName);
176            }
177            catch (JDOMException e) {
178                throw new RuntimeException();
179            }
180            catch (IOException e) {
181                throw new RuntimeException();
182            }
183        }
184    
185        /**
186         * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the
187         * {@link #toString} method is invoked.
188         */
189        public static class PrettyPrint {
190            private String str;
191            private Element element;
192    
193            private PrettyPrint(String str) {
194                this.str = str;
195            }
196    
197            private PrettyPrint(Element element) {
198                this.element = ParamChecker.notNull(element, "element");
199            }
200    
201            /**
202             * Return the pretty print representation of an XML document.
203             *
204             * @return the pretty print representation of an XML document.
205             */
206            @Override
207            public String toString() {
208                if (str != null) {
209                    return str;
210                }
211                else {
212                    XMLOutputter outputter = new XMLOutputter();
213                    StringWriter stringWriter = new StringWriter();
214                    outputter.setFormat(Format.getPrettyFormat());
215                    try {
216                        outputter.output(element, stringWriter);
217                    }
218                    catch (Exception ex) {
219                        throw new RuntimeException(ex);
220                    }
221                    return stringWriter.toString();
222                }
223            }
224        }
225    
226        /**
227         * Return a pretty print string for a JDOM Element.
228         *
229         * @param element JDOM element.
230         * @return pretty print of the given JDOM Element.
231         */
232        public static PrettyPrint prettyPrint(Element element) {
233            return new PrettyPrint(element);
234    
235        }
236    
237        /**
238         * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original
239         * string.
240         *
241         * @param xmlStr XML string.
242         * @return prettyprint of the given XML string or the original string if the given string is not valid XML.
243         */
244        public static PrettyPrint prettyPrint(String xmlStr) {
245            try {
246                return new PrettyPrint(parseXml(xmlStr));
247            }
248            catch (Exception e) {
249                return new PrettyPrint(xmlStr);
250            }
251        }
252    
253        /**
254         * Return a pretty print string for a Configuration object.
255         *
256         * @param conf Configuration object.
257         * @return prettyprint of the given Configuration object.
258         */
259        public static PrettyPrint prettyPrint(Configuration conf) {
260            Element root = new Element("configuration");
261            for (Map.Entry<String, String> entry : conf) {
262                Element property = new Element("property");
263                Element name = new Element("name");
264                name.setText(entry.getKey());
265                Element value = new Element("value");
266                value.setText(entry.getValue());
267                property.addContent(name);
268                property.addContent(value);
269                root.addContent(property);
270            }
271            return new PrettyPrint(root);
272        }
273    
274        /**
275         * Schema validation for a given xml. <p/>
276         *
277         * @param schema for validation
278         * @param xml to be validated
279         */
280        public static void validateXml(Schema schema, String xml) throws SAXException, IOException {
281    
282            Validator validator = schema.newValidator();
283            validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes())));
284        }
285    
286        /**
287         * Create schema object for the given xsd
288         *
289         * @param is inputstream to schema.
290         * @return the schema object.
291         */
292        public static Schema createSchema(InputStream is) {
293            SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
294            StreamSource src = new StreamSource(is);
295            try {
296                return factory.newSchema(src);
297            }
298            catch (SAXException e) {
299                throw new RuntimeException(e.getMessage(), e);
300            }
301        }
302    
303        public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException {
304            if (xmlData == null || xmlData.length() == 0) {
305                return;
306            }
307            javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile);
308            validateXml(schema, xmlData);
309        }
310    
311        /**
312         * Convert Properties to string
313         *
314         * @param props
315         * @return xml string
316         * @throws IOException
317         */
318        public static String writePropToString(Properties props) throws IOException {
319            try {
320                org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
321                org.w3c.dom.Element conf = doc.createElement("configuration");
322                doc.appendChild(conf);
323                conf.appendChild(doc.createTextNode("\n"));
324                for (Enumeration e = props.keys(); e.hasMoreElements();) {
325                    String name = (String) e.nextElement();
326                    Object object = props.get(name);
327                    String value;
328                    if (object instanceof String) {
329                        value = (String) object;
330                    }
331                    else {
332                        continue;
333                    }
334                    org.w3c.dom.Element propNode = doc.createElement("property");
335                    conf.appendChild(propNode);
336    
337                    org.w3c.dom.Element nameNode = doc.createElement("name");
338                    nameNode.appendChild(doc.createTextNode(name.trim()));
339                    propNode.appendChild(nameNode);
340    
341                    org.w3c.dom.Element valueNode = doc.createElement("value");
342                    valueNode.appendChild(doc.createTextNode(value.trim()));
343                    propNode.appendChild(valueNode);
344    
345                    conf.appendChild(doc.createTextNode("\n"));
346                }
347    
348                Source source = new DOMSource(doc);
349                StringWriter stringWriter = new StringWriter();
350                Result result = new StreamResult(stringWriter);
351                TransformerFactory factory = TransformerFactory.newInstance();
352                Transformer transformer = factory.newTransformer();
353                transformer.transform(source, result);
354    
355                return stringWriter.getBuffer().toString();
356            }
357            catch (Exception e) {
358                throw new IOException(e);
359            }
360        }
361    
362        /**
363         * Escape characters for text appearing as XML data, between tags.
364         * <P/>
365         * The following characters are replaced with corresponding character entities :
366         * '<' to '&lt';
367         * '>' to '&gt';
368         * '&' to '&amp;'
369         * '"' to '&quot;'
370         * "'" to "&#039;"
371         * <P/>
372         * Note that JSTL's {@code <c:out>} escapes the exact same set of characters as this method.
373         */
374        public static String escapeCharsForXML(String aText) {
375            final StringBuilder result = new StringBuilder();
376            final StringCharacterIterator iterator = new StringCharacterIterator(aText);
377            char character = iterator.current();
378            while (character != CharacterIterator.DONE) {
379                if (character == '<') {
380                    result.append("&lt;");
381                }
382                else if (character == '>') {
383                    result.append("&gt;");
384                }
385                else if (character == '\"') {
386                    result.append("&quot;");
387                }
388                else if (character == '\'') {
389                    result.append("&#039;");
390                }
391                else if (character == '&') {
392                    result.append("&amp;");
393                }
394                else {
395                    // the char is not a special one
396                    // add it to the result as is
397                    result.append(character);
398                }
399                character = iterator.next();
400            }
401            return result.toString();
402        }
403    
404        public static Element getSLAElement(Element elem) {
405            Element eSla_1 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI));
406            Element eSla_2 = elem.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAMESPACE_URI_2));
407            Element eSla = (eSla_2 != null) ? eSla_2 : eSla_1;
408    
409            return eSla;
410        }
411    
412    }