001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     * 
010     *      http://www.apache.org/licenses/LICENSE-2.0
011     * 
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.oozie.util;
019    
020    import java.io.ByteArrayInputStream;
021    import java.io.IOException;
022    import java.io.InputStream;
023    import java.io.StringReader;
024    import java.io.StringWriter;
025    import java.text.CharacterIterator;
026    import java.text.StringCharacterIterator;
027    import java.util.Enumeration;
028    import java.util.Iterator;
029    import java.util.List;
030    import java.util.Map;
031    import java.util.Properties;
032    
033    import javax.xml.XMLConstants;
034    import javax.xml.parsers.DocumentBuilderFactory;
035    import javax.xml.transform.Result;
036    import javax.xml.transform.Source;
037    import javax.xml.transform.Transformer;
038    import javax.xml.transform.TransformerFactory;
039    import javax.xml.transform.dom.DOMSource;
040    import javax.xml.transform.stream.StreamResult;
041    import javax.xml.transform.stream.StreamSource;
042    import javax.xml.validation.Schema;
043    import javax.xml.validation.SchemaFactory;
044    import javax.xml.validation.Validator;
045    
046    import org.apache.hadoop.conf.Configuration;
047    import org.apache.oozie.service.SchemaService;
048    import org.apache.oozie.service.Services;
049    import org.apache.oozie.service.SchemaService.SchemaName;
050    import org.jdom.Comment;
051    import org.jdom.Document;
052    import org.jdom.Element;
053    import org.jdom.JDOMException;
054    import org.jdom.input.SAXBuilder;
055    import org.jdom.output.Format;
056    import org.jdom.output.XMLOutputter;
057    import org.xml.sax.EntityResolver;
058    import org.xml.sax.InputSource;
059    import org.xml.sax.SAXException;
060    
061    /**
062     * XML utility methods.
063     */
064    public class XmlUtils {
065        public static final String SLA_NAME_SPACE_URI = "uri:oozie:sla:0.1";
066    
067        private static class NoExternalEntityEntityResolver implements EntityResolver {
068    
069            public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
070                return new InputSource(new ByteArrayInputStream(new byte[0]));
071            }
072    
073        }
074    
075        private static SAXBuilder createSAXBuilder() {
076            SAXBuilder saxBuilder = new SAXBuilder();
077    
078            //THIS IS NOT WORKING
079            //saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
080    
081            //INSTEAD WE ARE JUST SETTING AN EntityResolver that does not resolve entities
082            saxBuilder.setEntityResolver(new NoExternalEntityEntityResolver());
083            return saxBuilder;
084        }
085    
086        /**
087         * Remove comments from any Xml String.
088         *
089         * @param xmlStr XML string to remove comments.
090         * @return String after removing comments.
091         * @throws JDOMException thrown if an error happend while XML parsing.
092         */
093        public static String removeComments(String xmlStr) throws JDOMException {
094            if (xmlStr == null) {
095                return null;
096            }
097            try {
098                SAXBuilder saxBuilder = createSAXBuilder();
099                Document document = saxBuilder.build(new StringReader(xmlStr));
100                removeComments(document);
101                return prettyPrint(document.getRootElement()).toString();
102            }
103            catch (IOException ex) {
104                throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
105            }
106        }
107    
108        private static void removeComments(List l) {
109            for (Iterator i = l.iterator(); i.hasNext();) {
110                Object node = i.next();
111                if (node instanceof Comment) {
112                    i.remove();
113                }
114                else {
115                    if (node instanceof Element) {
116                        removeComments(((Element) node).getContent());
117                    }
118                }
119            }
120        }
121    
122        private static void removeComments(Document doc) {
123            removeComments(doc.getContent());
124        }
125    
126        /**
127         * Parse a string assuming it is a valid XML document and return an JDOM Element for it.
128         *
129         * @param xmlStr XML string to parse.
130         * @return JDOM element for the parsed XML string.
131         * @throws JDOMException thrown if an error happend while XML parsing.
132         */
133        public static Element parseXml(String xmlStr) throws JDOMException {
134            ParamChecker.notNull(xmlStr, "xmlStr");
135            try {
136                SAXBuilder saxBuilder = createSAXBuilder();
137                Document document = saxBuilder.build(new StringReader(xmlStr));
138                return document.getRootElement();
139            }
140            catch (IOException ex) {
141                throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
142            }
143        }
144    
145        /**
146         * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it.
147         *
148         * @param is inputstream to parse.
149         * @return JDOM element for the parsed XML string.
150         * @throws JDOMException thrown if an error happend while XML parsing.
151         * @throws IOException thrown if an IO error occurred.
152         */
153        public static Element parseXml(InputStream is) throws JDOMException, IOException {
154            ParamChecker.notNull(is, "is");
155            SAXBuilder saxBuilder = createSAXBuilder();
156            Document document = saxBuilder.build(is);
157            return document.getRootElement();
158        }
159    
160        /**
161         * //TODO move this to action registry method Return the value of an attribute from the root element of an XML
162         * document.
163         *
164         * @param filePath path of the XML document.
165         * @param attributeName attribute to retrieve value for.
166         * @return value of the specified attribute.
167         */
168        public static String getRootAttribute(String filePath, String attributeName) {
169            ParamChecker.notNull(filePath, "filePath");
170            ParamChecker.notNull(attributeName, "attributeName");
171            SAXBuilder saxBuilder = createSAXBuilder();
172            try {
173                Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath));
174                return doc.getRootElement().getAttributeValue(attributeName);
175            }
176            catch (JDOMException e) {
177                throw new RuntimeException();
178            }
179            catch (IOException e) {
180                throw new RuntimeException();
181            }
182        }
183    
184        /**
185         * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the
186         * {@link #toString} method is invoked.
187         */
188        public static class PrettyPrint {
189            private String str;
190            private Element element;
191    
192            private PrettyPrint(String str) {
193                this.str = str;
194            }
195    
196            private PrettyPrint(Element element) {
197                this.element = ParamChecker.notNull(element, "element");
198            }
199    
200            /**
201             * Return the pretty print representation of an XML document.
202             *
203             * @return the pretty print representation of an XML document.
204             */
205            @Override
206            public String toString() {
207                if (str != null) {
208                    return str;
209                }
210                else {
211                    XMLOutputter outputter = new XMLOutputter();
212                    StringWriter stringWriter = new StringWriter();
213                    outputter.setFormat(Format.getPrettyFormat());
214                    try {
215                        outputter.output(element, stringWriter);
216                    }
217                    catch (Exception ex) {
218                        throw new RuntimeException(ex);
219                    }
220                    return stringWriter.toString();
221                }
222            }
223        }
224    
225        /**
226         * Return a pretty print string for a JDOM Element.
227         *
228         * @param element JDOM element.
229         * @return pretty print of the given JDOM Element.
230         */
231        public static PrettyPrint prettyPrint(Element element) {
232            return new PrettyPrint(element);
233    
234        }
235    
236        /**
237         * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original
238         * string.
239         *
240         * @param xmlStr XML string.
241         * @return prettyprint of the given XML string or the original string if the given string is not valid XML.
242         */
243        public static PrettyPrint prettyPrint(String xmlStr) {
244            try {
245                return new PrettyPrint(parseXml(xmlStr));
246            }
247            catch (Exception e) {
248                return new PrettyPrint(xmlStr);
249            }
250        }
251    
252        /**
253         * Return a pretty print string for a Configuration object.
254         *
255         * @param conf Configuration object.
256         * @return prettyprint of the given Configuration object.
257         */
258        public static PrettyPrint prettyPrint(Configuration conf) {
259            Element root = new Element("configuration");
260            for (Map.Entry<String, String> entry : conf) {
261                Element property = new Element("property");
262                Element name = new Element("name");
263                name.setText(entry.getKey());
264                Element value = new Element("value");
265                value.setText(entry.getValue());
266                property.addContent(name);
267                property.addContent(value);
268                root.addContent(property);
269            }
270            return new PrettyPrint(root);
271        }
272    
273        /**
274         * Schema validation for a given xml. <p/>
275         *
276         * @param schema for validation
277         * @param xml to be validated
278         */
279        public static void validateXml(Schema schema, String xml) throws SAXException, IOException {
280    
281            Validator validator = schema.newValidator();
282            validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes())));
283        }
284    
285        /**
286         * Create schema object for the given xsd
287         *
288         * @param is inputstream to schema.
289         * @return the schema object.
290         */
291        public static Schema createSchema(InputStream is) {
292            SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
293            StreamSource src = new StreamSource(is);
294            try {
295                return factory.newSchema(src);
296            }
297            catch (SAXException e) {
298                throw new RuntimeException(e.getMessage(), e);
299            }
300        }
301    
302        public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException {
303            if (xmlData == null || xmlData.length() == 0) {
304                return;
305            }
306            javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile);
307            validateXml(schema, xmlData);
308        }
309    
310        /**
311         * Convert Properties to string
312         *
313         * @param props
314         * @return xml string
315         * @throws IOException
316         */
317        public static String writePropToString(Properties props) throws IOException {
318            try {
319                org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
320                org.w3c.dom.Element conf = doc.createElement("configuration");
321                doc.appendChild(conf);
322                conf.appendChild(doc.createTextNode("\n"));
323                for (Enumeration e = props.keys(); e.hasMoreElements();) {
324                    String name = (String) e.nextElement();
325                    Object object = props.get(name);
326                    String value;
327                    if (object instanceof String) {
328                        value = (String) object;
329                    }
330                    else {
331                        continue;
332                    }
333                    org.w3c.dom.Element propNode = doc.createElement("property");
334                    conf.appendChild(propNode);
335    
336                    org.w3c.dom.Element nameNode = doc.createElement("name");
337                    nameNode.appendChild(doc.createTextNode(name.trim()));
338                    propNode.appendChild(nameNode);
339    
340                    org.w3c.dom.Element valueNode = doc.createElement("value");
341                    valueNode.appendChild(doc.createTextNode(value.trim()));
342                    propNode.appendChild(valueNode);
343    
344                    conf.appendChild(doc.createTextNode("\n"));
345                }
346    
347                Source source = new DOMSource(doc);
348                StringWriter stringWriter = new StringWriter();
349                Result result = new StreamResult(stringWriter);
350                TransformerFactory factory = TransformerFactory.newInstance();
351                Transformer transformer = factory.newTransformer();
352                transformer.transform(source, result);
353    
354                return stringWriter.getBuffer().toString();
355            }
356            catch (Exception e) {
357                throw new IOException(e);
358            }
359        }
360    
361        /**
362         * Escape characters for text appearing as XML data, between tags.
363         * <P/>
364         * The following characters are replaced with corresponding character entities :
365         * '<' to '&lt';
366         * '>' to '&gt';
367         * '&' to '&amp;'
368         * '"' to '&quot;'
369         * "'" to "&#039;"
370         * <P/>
371         * Note that JSTL's {@code <c:out>} escapes the exact same set of characters as this method.
372         */
373        public static String escapeCharsForXML(String aText) {
374            final StringBuilder result = new StringBuilder();
375            final StringCharacterIterator iterator = new StringCharacterIterator(aText);
376            char character = iterator.current();
377            while (character != CharacterIterator.DONE) {
378                if (character == '<') {
379                    result.append("&lt;");
380                }
381                else if (character == '>') {
382                    result.append("&gt;");
383                }
384                else if (character == '\"') {
385                    result.append("&quot;");
386                }
387                else if (character == '\'') {
388                    result.append("&#039;");
389                }
390                else if (character == '&') {
391                    result.append("&amp;");
392                }
393                else {
394                    // the char is not a special one
395                    // add it to the result as is
396                    result.append(character);
397                }
398                character = iterator.next();
399            }
400            return result.toString();
401        }
402    
403    }