##########################

## ubuntu docker 이미지 생성

Dockerfile 파일 작성



FROM ubuntu:16.04


RUN apt-get update && \

       apt-get upgrade -y && \

       apt-get install -y \

          ssh rsync \

          net-tools vim git man manpages-dev wget curl

          


ENTRYPOINT service ssh restart && bash



 

> docker build -t hadoop-dev .

> docker run -t -d --name hadoop-dev -p 50070 -p 8088 -p 19888 -p 8042 hadoop-dev





##########################

## users packages


# apt-get update

# apt-get upgrade -y

# apt-get install -y linux-source

# apt-get install -y automake autoconf





##########################

## hadoop requirements packages



---------------------

-- oracle jdk 1.7


** 방법1. oracle-java7-installer 설치

  # apt-get install -y software-properties-common

  # add-apt-repository ppa:webupd8team/java -y

  # apt-get update

  # apt-get install -y oracle-java7-installer


  - 에러 발생하는 경우 : download failed

    jdk-7u80-linux-x64.tar.gz 직접 다운로드

    http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html


    > docker cp jdk-7u80-linux-x64.tar.gz hadoop:/root/

    # cp jdk-7u80-linux-x64.tar.gz /var/cache/oracle-jdk7-installer/

    # apt-get install oracle-java7-installer


  # vi /etc/bash.bashrc

  export JAVA_HOME="/usr/lib/jvm/java-7-oracle"

    


** 방법2. 수동 설치

  jdk-7u80-linux-x64.tar.gz 직접 다운로드

  http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html


  > docker cp jdk-7u80-linux-x64.tar.gz hadoop:/root/


  # tar zxvf jdk-7u80-linux-x64.tar.gz

  # mv jdk1.7.0_80 /usr/lib/jvm/


  # update-alternatives --install /usr/bin/java java /usr/lib/jvm/jdk1.7.0_80/bin/java 1

  # update-alternatives --install /usr/bin/javac javac /usr/lib/jvm/jdk1.7.0_80/bin/javac 1

  # update-alternatives --install /usr/bin/javaws javaws /usr/lib/jvm/jdk1.7.0_80/bin/javaws 1

  (

    설치된 java가 여러개인 경우 선택/확인 하기

    # update-alternatives --config java

    # update-alternatives --config javac

    # update-alternatives --config javaws

  )


  # vi /etc/bash.bashrc

  export JAVA_HOME="/usr/lib/jvm/jdk1.7.0_80"



---------------------

-- maven

# apt-get install -y maven


---------------------

-- native lib

# apt-get install -y build-essential autoconf automake libtool cmake zlib1g-dev pkg-config libssl-dev


---------------------

-- protocol buffer 2.5.0

# wget https://github.com/google/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.gz

# ./configure --prefix=/usr

# make; make install


---------------------

-- snappy

# apt-get install -y snappy libsnappy-dev


---------------------

-- bzip2

# apt-get install -y bzip2 libbz2-dev


---------------------

-- jansson

# apt-get install -y libjansson-dev


---------------------

-- FUSE

# apt-get install -y fuse libfuse-dev





##########################

## compile hadoop


> docker cp hadoop-2.7.3-src.tar.gz hadoop:/root/

# tar xvfz hadoop-2.7.3-src.tar.gz

# mvn package -Pdist,native -DskipTests -Dtar -Dmaven.javadoc.skip=true -Drequire.snappy -Drequire.openssl



** 설치본 tar

  hadoop-2.7.3-src/hadoop-dist/target/hadoop-2.7.3.tar.gz


** native library 체크

  # ~/hadoop-2.7.3/bin# ./hadoop checknative -a

  17/06/08 08:15:01 INFO bzip2.Bzip2Factory: Successfully loaded & initialized native-bzip2 library system-native

  17/06/08 08:15:01 INFO zlib.ZlibFactory: Successfully loaded & initialized native-zlib library

  Native library checking:

  hadoop:  true /root/hadoop-2.7.3/lib/native/libhadoop.so.1.0.0

  zlib:    true /lib/x86_64-linux-gnu/libz.so.1

  snappy:  true /usr/lib/x86_64-linux-gnu/libsnappy.so.1

  lz4:     true revision:99

  bzip2:   true /lib/x86_64-linux-gnu/libbz2.so.1

  openssl: true /usr/lib/x86_64-linux-gnu/libcrypto.so



  

  

##########################

## install hadoop - Pseudo-Distributed Operation


http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html


-- etc/hadoop/hadoop-env.sh

export JAVA_HOME=/usr/lib/jvm/java-7-oracle

export HADOOP_LOG_DIR=/root/logs

export HADOOP_PID_DIR=$HADOOP_LOG_DIR



# apt-get install -y ssh rsync

# service ssh restart

# ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa

# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys

# chmod 0600 ~/.ssh/authorized_keys


-- /etc/hosts

localhost namenode

localhost secondarynamenode

localhost datanode1



-- etc/hadoop/core-site.xml

<configuration>

   <property>

     <name>fs.defaultFS</name>

     <value>hdfs://namenode:9000</value>

   </property>

   <property>

     <name>hadoop.tmp.dir</name>

     <value>/root/data</value>

   </property>

 </configuration>


-- etc/hadoop/hdfs-site.xml

<configuration>

    <property>

        <name>dfs.replication</name>

        <value>1</value>

    </property>

    <property>

        <name>dfs.webhdfs.enabled</name>

        <value>true</value>

    </property>

</configuration>




-- for yarn : etc/hadoop/mapred-site.xml

<configuration>

  <property>

    <name>mapreduce.framework.name</name>

    <value>yarn</value>

  </property>

  <!--property>

    <name>mapreduce.map.memory.mb</name>

    <value>1024</value>

  </property>

  <property>

    <name>mapreduce.reduce.memory.mb</name>

    <value>1024</value>

  </property--> 

</configuration>



-- for yarn : etc/hadoop/yarn-site.xml

<configuration>

  <property>

    <name>yarn.nodemanager.aux-services</name>

    <value>mapreduce_shuffle</value>

  </property>

  <!--property>

    <name>yarn.nodemanager.resource.memory-mb</name>

    <value>8192</value>

  </property>

  <property>

    <name>yarn.nodemanager.resource.cpu-vcores</name>

    <value>8</value>

  </property> 

  <property>

    <name>yarn.scheduler.minimum-allocation-mb</name>

    <value>1024</value>

  </property>

  <property>

    <name>yarn.scheduler.maximum-allocation-mb</name>

    <value>8192</value>

  </property>

  <property>

    <name>yarn.app.mapreduce.am.resource.mb</name>

    <value>256</value>

  </property-->

</configuration>



--for yarn : etc/hadoop/yarn-env.sh

export YARN_NODEMANAGER_HEAPSIZE=1024




-- 시작

# bin/hdfs namenode -format

# sbin/start-dfs.sh

# sbin/start-yarn.sh

# sbin/mr-jobhistory-daemon.sh start historyserver



-- test

# bin/hdfs dfs -mkdir /user/root

# bin/hdfs dfs -put etc/hadoop input

# bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar grep input output 'dfs[a-z.]+'

# bin/hdfs dfs -cat output/*

# bin/hdfs dfs -rm -r output



-- 중지

# sbin/stop-yarn.sh

# sbin/stop-dfs.sh

# sbin/mr-jobhistory-daemon.sh stop historyserver




-- web

NameNode - http://localhost:50070/

ResourceManager - http://localhost:8088/

MapReduce JobHistory- http://localhost:19888/

Node Manager - http://localhost:8042/