Install Hadoop on Ubuntu operating system

sudo apt install openjdk-8-jdk
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 
export PATH=$PATH:/usr/lib/jvm/java-8-openjdk-amd64/bin
export HADOOP_HOME=~/hadoop-3.2.3/
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export HADOOP_STREAMING=$HADOOP_HOME/share/hadoop/tools/lib/hadoop-streaming-3.2.3.jar
export HADOOP_LOG_DIR=$HADOOP_HOME/logs
export PDSH_RCMD_TYPE=ssh
sudo apt-get install ssh
tar -zxvf ~/Downloads/hadoop-3.2.3.tar.gz 
now open hadoop-env.hsudo nano hadoop-env.hJAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 (set the path for JAVA_HOME)
<configuration> 
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value> </property>
<property>
<name>hadoop.proxyuser.dataflair.groups</name> <value>*</value>
</property>
<property>
<name>hadoop.proxyuser.dataflair.hosts</name> <value>*</value>
</property>
<property>
<name>hadoop.proxyuser.server.hosts</name> <value>*</value>
</property>
<property>
<name>hadoop.proxyuser.server.groups</name> <value>*</value>
</property>
</configuration>
<configuration> 
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
<configuration> 
<property>
<name>mapreduce.framework.name</name> <value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>

<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
</configuration>
<configuration> 
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>

<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREP END_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
ssh localhost 
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys
hadoop-3.2.3/bin/hdfs namenode -format
export PDSH_RCMD_TYPE=ssh
start-all.sh(Start NameNode daemon and DataNode daemon) 
localhost:9870
codewitharjun@cwa:~$ hadoop fs -mkdir /user
codewitharjun@cwa:~$ hadoop fs -mkdir /user/arjun.gautam
codewitharjun@cwa:~$ touch demo.csv
codewitharjun@cwa:~$ hadoop fs -put demo.csv /user/arjun.gautam
stop-all.sh

--

--

--

Learn Code with Fun.

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

What are you doing using templates on your first web page?

How to create a Data Engineer project with MYSQL, Python, and AWS smoothly…or not?

Azure Services cheat sheet

What are internal tools?

Galaxy Shooter — Phase I: Framework

Nullable Columns in a Database

An empty room to illustrate the lack of something.

FindFurryFriend Project (part 1) - Rails API (github repo, models, migrations, seed data)

Leetcode — 78. Subsets (Medium)

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Code With Arjun

Code With Arjun

Learn Code with Fun.

More from Medium

Installing Python-ldap on Windows

Intro to Python SSH Client Paramiko

Docker Series Part 1

Python program to convert exponential to float