001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.oozie.util;
020
021import java.io.DataInput;
022import java.io.DataOutput;
023import java.io.IOException;
024import java.nio.charset.StandardCharsets;
025
026/**
027 * This class provides a workaround for the 64k limit for string in DataOutput.
028 */
029public final class StringSerializationUtil {
030    // Using unique string to indicate version. This is to make sure that it
031    // doesn't match with user data.
032    private static final String DATA_VERSION = "V==1";
033    private static final int CONVERSION_TRESHOLD = 60000;
034
035    private StringSerializationUtil() {
036    }
037
038    /**
039     * Writes out value to dOut. Converts it to byte array if the length of the UTF-8 byte array representation of the
040     * string is longer than 60k bytes.
041     *
042     * @param dOut the targed output stream
043     * @param value the string to write
044     * @throws IOException in case of error during serialization
045     */
046    public static void writeString(DataOutput dOut, String value) throws IOException {
047        if (value == null) {
048            dOut.writeUTF(value);
049            return;
050        }
051
052        byte[] data = value.getBytes(StandardCharsets.UTF_8.name());
053        if (data.length > CONVERSION_TRESHOLD) {
054            dOut.writeUTF(DATA_VERSION);
055            dOut.writeInt(data.length);
056            dOut.write(data);
057        } else {
058            dOut.writeUTF(value);
059        }
060    }
061
062    public static String readString(DataInput dIn) throws IOException {
063        String value = dIn.readUTF();
064        if (DATA_VERSION.equals(value)) {
065            int length = dIn.readInt();
066            byte[] data = new byte[length];
067            dIn.readFully(data);
068            value = new String(data, StandardCharsets.UTF_8.name());
069        }
070        return value;
071    }
072}