Wednesday, July 10, 2013

HBase connection - Java API

HBase Connection using Java API


HBase is a column oriented database which stores its contents by column rather than by row. Instead of retrieving a record or row at a time, an entire column is retrieved and thus it becomes very powerful and efficient since data analytics is usually concerned with only one field or column of a record. The access becomes much faster, and much more relevant data can be extracted from the database in a shorter period of time.

 A sample code for connecting HBase is given below.

package sample;

import java.io.IOException;

import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;



public class HbaseTest {

    private static Configuration conf = null;

    /**
     * Initialization
     */
    static {
        //String hbaseZookeeperQuorum = "bdcltvmserv04:2181";
        String hbaseZookeeperQuorum = "quorum:2181";
        String hbaseZookeeperClientPort = "2181";
        conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum",
                hbaseZookeeperQuorum);
        conf.set("hbase.zookeeper.property.clientPort",
                hbaseZookeeperClientPort);
        //conf.set("hbase.master", "bdcltvmserv08:60000");
    }

    /**
     * Create a table
     */
    public static void creatTable(String tableName, String[] familys)
            throws Exception {
   
        HBaseAdmin admin = new HBaseAdmin(conf);
        if (admin.tableExists(tableName)) {
            System.out.println("table already exists!");
        } else {
            HTableDescriptor tableDesc = new HTableDescriptor(tableName);
            for (int i = 0; i < familys.length; i++) {
                tableDesc.addFamily(new HColumnDescriptor(familys[i]));
            }
            admin.createTable(tableDesc);
            System.out.println("create table " + tableName + " ok.");
        }
    }

    /**
     * Delete a table
     */
    public static void deleteTable(String tableName) throws Exception {
        try {
            HBaseAdmin admin = new HBaseAdmin(conf);
            admin.disableTable(tableName);
            admin.deleteTable(tableName);
            System.out.println("delete table " + tableName + " ok.");
        } catch (MasterNotRunningException e) {
            e.printStackTrace();
        } catch (ZooKeeperConnectionException e) {
            e.printStackTrace();
        }
    }

    /**
     * Put (or insert) a row
     */
    public static void addRecord(String tableName, String rowKey,
            String family, String qualifier, String value) throws Exception {
        try {
            HTable table = new HTable(conf, tableName);
            Put put = new Put(Bytes.toBytes(rowKey));
            put.add(Bytes.toBytes(family), Bytes.toBytes(qualifier),
                    Bytes.toBytes(value));
            table.put(put);
            System.out.println("insert recored " + rowKey + " to table "
                    + tableName + " ok.");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Delete a row
     */
    public static void delRecord(String tableName, String rowKey)
            throws IOException {
        HTable table = new HTable(conf, tableName);
        List<Delete> list = new ArrayList<Delete>();
        Delete del = new Delete(rowKey.getBytes());
        list.add(del);
        table.delete(list);
        System.out.println("del recored " + rowKey + " ok.");
    }

    /**
     * Get a row
     */
    public static void getOneRecord(String tableName, String rowKey)
            throws IOException {
        HTable table = new HTable(conf, tableName);
        Get get = new Get(rowKey.getBytes());
        Result rs = table.get(get);
        for (KeyValue kv : rs.raw()) {
            System.out.print(new String(kv.getRow()) + " ");
            System.out.print(new String(kv.getFamily()) + ":");
            System.out.print(new String(kv.getQualifier()) + " ");
            System.out.print(kv.getTimestamp() + " ");
            System.out.println(new String(kv.getValue()));
        }
    }

    /**
     * Scan (or list) a table
     */
    public static void getAllRecord(String tableName) {
        try {
            HTable table = new HTable(conf, tableName);
            Scan s = new Scan();
            ResultScanner ss = table.getScanner(s);
            for (Result r : ss) {
                for (KeyValue kv : r.raw()) {
                    System.out.print(new String(kv.getRow()) + " ");
                    System.out.print(new String(kv.getFamily()) + ":");
                    System.out.print(new String(kv.getQualifier()) + " ");
                    System.out.print(kv.getTimestamp() + " ");
                    System.out.println(new String(kv.getValue()));
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] agrs) {
        try {

            String tablename = "employee";
            String[] familys = {"data"};
            HbaseTest.creatTable(tablename, familys);

            // add record zkb
            HbaseTest.addRecord(tablename, "row1", "data", "city", "Banglore");
            HbaseTest.addRecord(tablename, "row2", "data", "city", "Noida");
            HbaseTest.addRecord(tablename, "row3", "data", "city", "Banglore");
            HbaseTest.addRecord(tablename, "row4", "data", "city", "Noida");
            HbaseTest.addRecord(tablename, "row5", "data", "city", "Noida");
            HbaseTest.addRecord(tablename, "row6", "data", "city", "Trivandrum");
            HbaseTest.addRecord(tablename, "row7", "data", "city", "Ernakulam");
            HbaseTest.addRecord(tablename, "row8", "data", "city", "Ernakulam");
            HbaseTest.addRecord(tablename, "row9", "data", "city", "Trivandrum");
            HbaseTest.addRecord(tablename, "row10", "data", "city", "Trivandrum");
            HbaseTest.addRecord(tablename, "row11", "data", "city", "Ernakulam");
            System.out.println("===========show all record========");
            HbaseTest.getAllRecord(tablename);
           
        System.out.println("===========delete table========");
            HbaseTest.deleteTable(tablename);
           
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}


Output


HBase Installation in Hadoop Cluster



Installing Hbase



Hbase is a Distributed, non-relational and open source database. One of the key value store type database is Hbase which runs over Hadoop architecture and Hdfs file system.

 Copy the HBase tar in a particular location and untar it. We are using hbase-0.94.x.tar.gz
Ø    cd /home/hduser/utilities
Ø    tar –xzvf hbase-0.94.x.tar.gz
Edit the /home/hduser/utilities/habse/conf/hbase-env.sh and define the $JAVA_HOME
HBASE_MANAGES_ZK needs to be set as ‘true’ to use Zookeeper. If it is set as ‘false’ HBase will not consider Zookeeper
Ø    nano  /hbase/conf/hbase-env.sh
Ø    export HBASE_MANAGES_ZK=true


Edit the hbase-site.xml



Notes:

·         The value of  ‘ hbase.rootdir’ is the hostname and port number of system where namenode is running .(The port number should be same as that of core-site.xml, where namenode is running)
We have to create a folder in hdfs to store HBase data( this folder should mention in ‘hbase.rootdir’ value
Ø  Hadoop fs –ls /
Ø  Hadoop fs –mkdir /hbase

·               For multinode Hbase installation, the property ‘hbase.zookeeper.quorum’ is important. The property is used to identify the Zookeeper installed system

To Start Hbase daemons


Ø    /home/hduser/hbase/bin/start-hbase.sh




To stop Hbase daemons

Ø  /home/hduser/utilities/hbase/bin/stop-hbase.sh
The UI of Hbase is as follows. The default UI port of Hbase is 60010
http://<Ip-address of the system:60010>