hmaster、hregionserver、zookeeper、hregion、root表、meta表、hfile、hstore、memstore、blockcache
Configuration conf = HBaseConfiguration.create();
HBaseAdmin admin = new HBaseAdmin(conf);
TableName tableName = TableName.valueOf("test");
HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
tableDescriptor.addFamily(columnDescriptor);
admin.createTable(tableDescriptor);
HTable table = new HTable(conf, tableName);
byte[] row = Bytes.toBytes("row1");
Put put = new Put(row);
byte[] colfam = Bytes.toBytes("data");
byte[] col = Bytes.toBytes(String.valueOf(1));
byte[] val = Bytes.toBytes("value1");
put.add(colfam, col, val);
table.put(put);
byte[] row = Bytes.toBytes("row1");
Get get = new Get(row);
Result result = table.get(get);
byte[] colfam = Bytes.toBytes("data");
byte[] col = Bytes.toBytes(String.valueOf(1));
System.out.println("get value is " + Bytes.toString(result.getValue(colfam, col)));
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
int i = 0;
for (Result scanresult : scanner) {
byte[] scancol = Bytes.toBytes(String.valueOf(++i));
System.out.println("scan value is " + Bytes.toString(scanresult.getValue(colfam, scancol)));
}
注意: HBaseAdmin,HTable,ResultScanner 对象最后都要close()
package ExampleClient;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
public class ExampleClient {
public static void main(String[] args) throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
// TODO Auto-generated method stub
Configuration conf = HBaseConfiguration.create();
HBaseAdmin admin = new HBaseAdmin(conf);
try {
TableName tableName = TableName.valueOf("test");
HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
HColumnDescriptor columnDescriptor = new HColumnDescriptor("data");
tableDescriptor.addFamily(columnDescriptor);
admin.createTable(tableDescriptor);
HTable table = new HTable(conf, tableName);
try {
for (int i = 1; i <= 3; ++i) {
byte[] row = Bytes.toBytes("row" + i);
Put put = new Put(row);
byte[] colfam = Bytes.toBytes("data");
byte[] col = Bytes.toBytes(String.valueOf(i));
byte[] val = Bytes.toBytes("value" + i);
put.add(colfam, col, val);
table.put(put);
}
byte[] row = Bytes.toBytes("row1");
Get get = new Get(row);
Result result = table.get(get);
byte[] colfam = Bytes.toBytes("data");
byte[] col = Bytes.toBytes(String.valueOf(1));
System.out.println("get result is " + Bytes.toString(result.getValue(colfam, col)));
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
try {
int i = 0;
for (Result scanresult : scanner) {
byte[] scancol = Bytes.toBytes(String.valueOf(++i));
System.out.println("scan result is " + Bytes.toString(scanresult.getValue(colfam, scancol)));
}
} finally {
// TODO: handle finally clause
scanner.close();
}
} finally {
// TODO: handle finally clause
table.close();
}
} finally {
// TODO: handle finally clause
admin.close();
}
}
}
根节点和枝节点分别记录每个叶子节点的最小值,并用一个指针指向叶子节点。
B+树对读友好。叶子节点里每个键值都指向真正的数据块,每个叶子节点都有前指针和后指针,这是为了做范围查询时,叶子节点间可以直接跳转。
B+树对写不友好。最大的性能问题是会产生大量的随机IO,随着新数据的插入,叶子节点会慢慢分裂,逻辑上连续的叶子节点在物理上往往不连续,甚至分离的很远,但做范围查询时,会产生大量读随机IO。
关系数据库中常用B+树组织数据。如上图所示,内部节点已经存满,再插入一个新记录时,需要在B+树中插入一个新的内部节点,再链到B+树中。这里的问题是新的内部节点在磁盘上可能存放在很远的地方,在顺序扫描数据时,不得不seek磁盘。
LSM树本质上就是在读写之间取得平衡,和B+树相比,它牺牲了部分读性能,用来大幅提高写性能。
它的原理是把一颗大树拆分成N棵小树, 它首先写入到内存中(内存没有寻道速度的问题,随机写的性能得到大幅提升),在内存中构建一颗有序小树,随着小树越来越大,内存的小树会flush到磁盘上。当读时,由于不知道数据在哪棵小树上,因此必须遍历所有的小树,但在每颗小树内部数据是有序的。