##########################
## ubuntu docker 이미지 생성
Dockerfile 파일 작성
FROM ubuntu:16.04
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y \
ssh rsync \
net-tools vim git man manpages-dev wget curl
ENTRYPOINT service ssh restart && bash
> docker build -t hadoop-dev .
> docker run -t -d --name hadoop-dev -p 50070 -p 8088 -p 19888 -p 8042 hadoop-dev
##########################
## users packages
# apt-get update
# apt-get upgrade -y
# apt-get install -y linux-source
# apt-get install -y automake autoconf
##########################
## hadoop requirements packages
---------------------
-- oracle jdk 1.7
** 방법1. oracle-java7-installer 설치
# apt-get install -y software-properties-common
# add-apt-repository ppa:webupd8team/java -y
# apt-get update
# apt-get install -y oracle-java7-installer
- 에러 발생하는 경우 : download failed
jdk-7u80-linux-x64.tar.gz 직접 다운로드
http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html
> docker cp jdk-7u80-linux-x64.tar.gz hadoop:/root/
# cp jdk-7u80-linux-x64.tar.gz /var/cache/oracle-jdk7-installer/
# apt-get install oracle-java7-installer
# vi /etc/bash.bashrc
export JAVA_HOME="/usr/lib/jvm/java-7-oracle"
** 방법2. 수동 설치
jdk-7u80-linux-x64.tar.gz 직접 다운로드
http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html
> docker cp jdk-7u80-linux-x64.tar.gz hadoop:/root/
# tar zxvf jdk-7u80-linux-x64.tar.gz
# mv jdk1.7.0_80 /usr/lib/jvm/
# update-alternatives --install /usr/bin/java java /usr/lib/jvm/jdk1.7.0_80/bin/java 1
# update-alternatives --install /usr/bin/javac javac /usr/lib/jvm/jdk1.7.0_80/bin/javac 1
# update-alternatives --install /usr/bin/javaws javaws /usr/lib/jvm/jdk1.7.0_80/bin/javaws 1
(
설치된 java가 여러개인 경우 선택/확인 하기
# update-alternatives --config java
# update-alternatives --config javac
# update-alternatives --config javaws
)
# vi /etc/bash.bashrc
export JAVA_HOME="/usr/lib/jvm/jdk1.7.0_80"
---------------------
-- maven
# apt-get install -y maven
---------------------
-- native lib
# apt-get install -y build-essential autoconf automake libtool cmake zlib1g-dev pkg-config libssl-dev
---------------------
-- protocol buffer 2.5.0
# wget https://github.com/google/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.gz
# ./configure --prefix=/usr
# make; make install
---------------------
-- snappy
# apt-get install -y snappy libsnappy-dev
---------------------
-- bzip2
# apt-get install -y bzip2 libbz2-dev
---------------------
-- jansson
# apt-get install -y libjansson-dev
---------------------
-- FUSE
# apt-get install -y fuse libfuse-dev
##########################
## compile hadoop
> docker cp hadoop-2.7.3-src.tar.gz hadoop:/root/
# tar xvfz hadoop-2.7.3-src.tar.gz
# mvn package -Pdist,native -DskipTests -Dtar -Dmaven.javadoc.skip=true -Drequire.snappy -Drequire.openssl
** 설치본 tar
hadoop-2.7.3-src/hadoop-dist/target/hadoop-2.7.3.tar.gz
** native library 체크
# ~/hadoop-2.7.3/bin# ./hadoop checknative -a
17/06/08 08:15:01 INFO bzip2.Bzip2Factory: Successfully loaded & initialized native-bzip2 library system-native
17/06/08 08:15:01 INFO zlib.ZlibFactory: Successfully loaded & initialized native-zlib library
Native library checking:
hadoop: true /root/hadoop-2.7.3/lib/native/libhadoop.so.1.0.0
zlib: true /lib/x86_64-linux-gnu/libz.so.1
snappy: true /usr/lib/x86_64-linux-gnu/libsnappy.so.1
lz4: true revision:99
bzip2: true /lib/x86_64-linux-gnu/libbz2.so.1
openssl: true /usr/lib/x86_64-linux-gnu/libcrypto.so
##########################
## install hadoop - Pseudo-Distributed Operation
http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html
-- etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-7-oracle
export HADOOP_LOG_DIR=/root/logs
export HADOOP_PID_DIR=$HADOOP_LOG_DIR
# apt-get install -y ssh rsync
# service ssh restart
# ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
# chmod 0600 ~/.ssh/authorized_keys
-- /etc/hosts
localhost namenode
localhost secondarynamenode
localhost datanode1
-- etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://namenode:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/root/data</value>
</property>
</configuration>
-- etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
-- for yarn : etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--property>
<name>mapreduce.map.memory.mb</name>
<value>1024</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>1024</value>
</property-->
</configuration>
-- for yarn : etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>8192</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>8</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>8192</value>
</property>
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>256</value>
</property-->
</configuration>
--for yarn : etc/hadoop/yarn-env.sh
export YARN_NODEMANAGER_HEAPSIZE=1024
-- 시작
# bin/hdfs namenode -format
# sbin/start-dfs.sh
# sbin/start-yarn.sh
# sbin/mr-jobhistory-daemon.sh start historyserver
-- test
# bin/hdfs dfs -mkdir /user/root
# bin/hdfs dfs -put etc/hadoop input
# bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar grep input output 'dfs[a-z.]+'
# bin/hdfs dfs -cat output/*
# bin/hdfs dfs -rm -r output
-- 중지
# sbin/stop-yarn.sh
# sbin/stop-dfs.sh
# sbin/mr-jobhistory-daemon.sh stop historyserver
-- web
NameNode - http://localhost:50070/
ResourceManager - http://localhost:8088/
MapReduce JobHistory- http://localhost:19888/
Node Manager - http://localhost:8042/
'가지가지' 카테고리의 다른 글
hadoop 1.x.x 기준 mapreduce 튜닝 관련 (0) | 2017.06.20 |
---|---|
ambari-2.5.1 컴파일 및 설치 [컴파일 실패] (0) | 2017.06.16 |
hive-2.1.1 설치 (on hadoop-2.7.3) (0) | 2017.06.15 |
hadoop 2.7.3 - listen port (0) | 2017.06.12 |
hadoop 2.7.3 - maprduce eclipse 프로젝트 및 디버깅 (0) | 2017.06.07 |
hadoop 2.7.3 - 소스 개발환경 (windows, eclipse) (0) | 2017.06.05 |
hadoop 2.7.3 - windows 컴파일 (use visual studio 2017) (0) | 2017.06.05 |
docker getstarted + @ 명렁어 정리 (0) | 2017.05.12 |