::Go back to Oozie Documentation Index::


Oozie DistCp Action Extension

DistCp Action

The DistCp action uses Hadoop distributed copy to copy files from one cluster to another or within the same cluster.

IMPORTANT: The DistCp action may not work properly with all configurations (secure, insecure) in all versions of Hadoop.

Both Hadoop clusters have to be configured with proxyuser for the Oozie process as explained here on the Quick Start page.

Syntax:

<workflow-app name="[WF-DEF-NAME]" xmlns="uri:oozie:workflow:0.4">
    ...
    <action name="[NODE-NAME]">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode1}</name-node>
            <arg>${nameNode1}/path/to/input.txt</arg>
            <arg>${nameNode2}/path/to/output.txt</arg>
            </distcp>
        <ok to="[NODE-NAME]"/>
        <error to="[NODE-NAME]"/>
    </action>
    ...
</workflow-app>

The first arg indicates the input and the second arg indicates the output. In the above example, the input is on namenode1 and the output is on namenode2 .

IMPORTANT: If using the DistCp action between 2 secure clusters, the following property must be added to the configuration of the action:

<property>
    <name>oozie.launcher.mapreduce.job.hdfs-servers</name>
    <value>${nameNode1},${nameNode2}</value>
</property>

Appendix, DistCp XML-Schema

AE.A Appendix A, DistCp XML-Schema

DistCp Action Schema Version 0.2

<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
           xmlns:distcp="uri:oozie:distcp-action:0.2" elementFormDefault="qualified"
           targetNamespace="uri:oozie:distcp-action:0.2">
.
    <xs:element name="distcp" type="distcp:ACTION"/>
.
    <xs:complexType name="ACTION">
        <xs:sequence>
                <xs:element name="job-tracker" type="xs:string" minOccurs="0" maxOccurs="1"/>
                <xs:element name="name-node" type="xs:string" minOccurs="0" maxOccurs="1"/>
                <xs:element name="prepare" type="distcp:PREPARE" minOccurs="0" maxOccurs="1"/>
                <xs:element name="configuration" type="distcp:CONFIGURATION" minOccurs="0" maxOccurs="1"/>
                <xs:element name="java-opts" type="xs:string" minOccurs="0" maxOccurs="1"/>
                <xs:element name="arg" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
        </xs:sequence>
    </xs:complexType>
.
    <xs:complexType name="CONFIGURATION">
        <xs:sequence>
            <xs:element name="property" minOccurs="1" maxOccurs="unbounded">
                <xs:complexType>
                    <xs:sequence>
                        <xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
                        <xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
                        <xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/>
                    </xs:sequence>
                </xs:complexType>
            </xs:element>
        </xs:sequence>
    </xs:complexType>
.
    <xs:complexType name="PREPARE">
        <xs:sequence>
            <xs:element name="delete" type="distcp:DELETE" minOccurs="0" maxOccurs="unbounded"/>
            <xs:element name="mkdir" type="distcp:MKDIR" minOccurs="0" maxOccurs="unbounded"/>
        </xs:sequence>
    </xs:complexType>
.
    <xs:complexType name="DELETE">
        <xs:attribute name="path" type="xs:string" use="required"/>
    </xs:complexType>
.
    <xs:complexType name="MKDIR">
        <xs:attribute name="path" type="xs:string" use="required"/>
    </xs:complexType>
.
</xs:schema>

DistCp Action Schema Version 0.1

<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
           xmlns:distcp="uri:oozie:distcp-action:0.1" elementFormDefault="qualified"
           targetNamespace="uri:oozie:distcp-action:0.1">
.
    <xs:element name="distcp" type="distcp:ACTION"/>
.
    <xs:complexType name="ACTION">
        <xs:sequence>
                <xs:element name="job-tracker" type="xs:string" minOccurs="1" maxOccurs="1"/>
                <xs:element name="name-node" type="xs:string" minOccurs="1" maxOccurs="1"/>
                <xs:element name="prepare" type="distcp:PREPARE" minOccurs="0" maxOccurs="1"/>
                <xs:element name="configuration" type="distcp:CONFIGURATION" minOccurs="0" maxOccurs="1"/>
                <xs:element name="java-opts" type="xs:string" minOccurs="0" maxOccurs="1"/>
                <xs:element name="arg" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
        </xs:sequence>
    </xs:complexType>
.
    <xs:complexType name="CONFIGURATION">
        <xs:sequence>
            <xs:element name="property" minOccurs="1" maxOccurs="unbounded">
                <xs:complexType>
                    <xs:sequence>
                        <xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
                        <xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
                        <xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/>
                    </xs:sequence>
                </xs:complexType>
            </xs:element>
        </xs:sequence>
    </xs:complexType>
.
    <xs:complexType name="PREPARE">
        <xs:sequence>
            <xs:element name="delete" type="distcp:DELETE" minOccurs="0" maxOccurs="unbounded"/>
            <xs:element name="mkdir" type="distcp:MKDIR" minOccurs="0" maxOccurs="unbounded"/>
        </xs:sequence>
    </xs:complexType>
.
    <xs:complexType name="DELETE">
        <xs:attribute name="path" type="xs:string" use="required"/>
    </xs:complexType>
.
    <xs:complexType name="MKDIR">
        <xs:attribute name="path" type="xs:string" use="required"/>
    </xs:complexType>
.
</xs:schema>

::Go back to Oozie Documentation Index::