《快学BigData》--Hadoop总结（F）（39）

小徐

发布于 2019-08-05 14:43:14

2770

发布于 2019-08-05 14:43:14

文章被收录于专栏：Greenplum

Hadoop总结 - - - - - - - - - - - - - - - - - - - - - - - - - - - - 210

概述 - - - - - - - - - - - - - - - - - - - - - - - - - - - - 211

CDH - - - - - - - - - - - - - - - - - - - - - - - - - - - - 211

安装Hadoop2.6.4 非Zookeeper集群版 - - - - - - - - - - - - - - - 211

安装Hadoop2.6.4 Zookeeper集群版 - - - - - - - - - - - - - - - 216

MapReduce整体的流程详解 - - - - - - - - - - - - - - - - - - - - 225

Hadoop HDFS 系统详解 - - - - - - - - - - - - - - - - - - - - - 226

JAVA 操作HDFS - - - - - - - - - - - - - - - - - - - - - - - - 241

Hadoop MapReduce 实例 - - - - - - - - - - - - - - - - - - - - 248

Hadoop 其他总结 - - - - - - - - - - - - - - - - - - - - - - - - 259

Hadoop 优化总结 - - - - - - - - - - - - - - - - - - - - - - - - 259

JAVA操作HDFS

链接：http://pan.baidu.com/s/1qX9krmO 密码：4w2p 请配置windows开发插件，如果无法下载请联系作者。

1-1）、环境的准备

在项目中把hadoop安装包中的share包导入到项目中。

1-2）、代码实现

或者在JAVA项目中导入hadoop安装包中的share下的包

A）、文件的增删改查

package hdfs;

import java.io.FileNotFoundException;

import java.io.IOException;

import java.net.URI;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.BlockLocation;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.LocatedFileStatus;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.fs.RemoteIterator;

import org.junit.Before;

import org.junit.Test;

/**

* HDFS属性的操作

public class HdfsClient {

FileSystem fs = null;

@Before

public void init() throws Exception {

// 加入winutils.exe工具

System.setProperty("hadoop.home.dir",

"E:\\winutils-hadoop-2.6.4\\hadoop-2.6.4");

// 构造一个配置参数对象，设置一个参数：我们要访问的hdfs的URI

// 从而FileSystem.get()方法就知道应该是去构造一个访问hdfs文件系统的客户端，以及hdfs的访问地址

// new Configuration();的时候，它就会去加载jar包中的hdfs-default.xml

// 然后再加载classpath下的hdfs-site.xml

Configuration conf = new Configuration();

conf.set("fs.defaultFS", "hdfs://hadoop1:9000");

/**

* 参数优先级： 1、客户端代码中设置的值 2、classpath下的用户自定义配置文件 3、然后是服务器的默认配置

conf.set("dfs.replication", "3");

// 获取一个hdfs的访问客户端，根据参数，这个实例应该是DistributedFileSystem的实例

// fs = FileSystem.get(conf);

// 如果这样去获取，那conf里面就可以不要配"fs.defaultFS"参数，而且，这个客户端的身份标识已经是hadoop用户

fs = FileSystem.get(new URI("hdfs://hadoop1:9000"), conf, "root");

}

/**

* 往hdfs上传文件

* @throws Exception

@Test

public void testAddFileToHdfs() throws Exception {

// 要上传的文件所在的本地路径

Path src = new Path("D:\\hadoop\\AverageScoreInput\\china.txt");

// 要上传到hdfs的目标路径

Path dst = new Path("/updateFile/a.text");

fs.copyFromLocalFile(src, dst);

fs.close();

}

/**

* 从hdfs中复制文件到本地文件系统

* @throws IOException

* @throws IllegalArgumentException

@Test

public void testDownloadFileToLocal() throws IllegalArgumentException,

IOException {

fs.copyToLocalFile(new Path("/updateFile/a.text"), new Path("d:/"));

fs.close();

}

/**

* 删除与修改文件夹的名字

* @throws IllegalArgumentException

* @throws IOException

@Test

public void testMkdirAndDeleteAndRename() throws IllegalArgumentException,

IOException {

// 创建目录

fs.mkdirs(new Path("/mkdirDirs"));

// 删除文件夹，如果是非空文件夹，参数2必须给值true

fs.delete(new Path("/updateFile"), true);

// 重命名文件或文件夹

fs.rename(new Path("/mkdirDirs"), new Path("/mkdirDirs1"));

fs.close();

}

/**

* 查看目录信息，只显示文件

* @throws IOException

* @throws IllegalArgumentException

* @throws FileNotFoundException

@Test

public void testListFiles() throws FileNotFoundException,

IllegalArgumentException, IOException {

// 思考：为什么返回迭代器，而不是List之类的容器

RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(

new Path("/"), true);

while (listFiles.hasNext()) {

LocatedFileStatus fileStatus = listFiles.next();

System.out.println(fileStatus.getPath().getName());

System.out.println(fileStatus.getBlockSize());

System.out.println(fileStatus.getPermission());

System.out.println(fileStatus.getLen());

BlockLocation[] blockLocations = fileStatus.getBlockLocations();

for (BlockLocation bl : blockLocations) {

System.out.println("block-length:" + bl.getLength() + "--"

+ "block-offset:" + bl.getOffset());

String[] hosts = bl.getHosts();

for (String host : hosts) {

System.out.println(host);

}

/**

* 查看文件及文件夹信息

* @throws IOException

* @throws IllegalArgumentException

* @throws FileNotFoundException

@Test

public void testListAll() throws FileNotFoundException,

IllegalArgumentException, IOException {

FileStatus[] listStatus = fs.listStatus(new Path("/"));

String flag = "d-- ";

for (FileStatus fstatus : listStatus) {

if (fstatus.isFile())

flag = "f-- ";

System.out.println(flag + fstatus.getPath().getName());

}

B）、通过流的方式访问Hdfs

package hdfs;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.IOException;

import java.net.URI;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataInputStream;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

//import org.apache.commons.io.IOUtils;

import org.apache.hadoop.io.IOUtils;

import org.junit.Before;

import org.junit.Test;

public class StreamHdfs {

FileSystem fs = null;

@Before

public void init() throws Exception {

// 加入winutils.exe工具

System.setProperty("hadoop.home.dir",

"E:\\winutils-hadoop-2.6.4\\hadoop-2.6.4");

Configuration conf = new Configuration();

fs = FileSystem.get(new URI("hdfs://hadoop1:9000"), conf, "root");

}

/**

* 通过流的方式上传文件到hdfs

* @throws Exception

@Test

public void testUpload() throws Exception {

FSDataOutputStream outputStream = fs.create(

new Path("/angelababy.love"), true);

FileInputStream inputStream = new FileInputStream("c:/angelababy.love");

// IOUtils.copy(inputStream, outputStream);

}

/**

* 把HDFS上的文件的信息下载到本地的文件中

* @throws IllegalArgumentException

* @throws IOException

@Test

public void testDownLoadFileToLocal() throws IllegalArgumentException,

IOException {

// 先获取一个文件的输入流----针对hdfs上的

FSDataInputStream in = fs.open(new Path(

"/wordcount/capacity-scheduler.xml"));

// 再构造一个文件的输出流----针对本地的

FileOutputStream out = new FileOutputStream(new File("d:/china.txt"));

// 再将输入流中数据传输到输出流

IOUtils.copyBytes(in, out, 4096);

}

/**

* hdfs支持随机定位进行文件读取，而且可以方便地读取指定长度用于上层分布式运算框架并发处理数据

* @throws IllegalArgumentException

* @throws IOException

@Test

public void testRandomAccess() throws IllegalArgumentException, IOException {

// 先获取一个文件的输入流----针对hdfs上的

FSDataInputStream in = fs.open(new Path(

"/wordcount/capacity-scheduler.xml"));

// 可以将流的起始偏移量进行自定义

in.seek(22);

// 再构造一个文件的输出流----针对本地的

FileOutputStream out = new FileOutputStream(new File("d:/china.txt"));

IOUtils.copyBytes(in, out, 19L, true);

}

/**

* 显示hdfs上文件的内容

* @throws IOException

* @throws IllegalArgumentException

@Test

public void testCat() throws IllegalArgumentException, IOException {

FSDataInputStream in = fs.open(new Path(

"/wordcount/capacity-scheduler.xml"));

IOUtils.copyBytes(in, System.out, 1024);

}

本文参与腾讯云自媒体同步曝光计划，分享自微信公众号。

原始发表：2018-03-23，如有侵权请联系 cloudcommunity@tencent.com 删除

hadoop

大数据

node.js

xml

本文分享自河马coding 微信公众号，前往查看

如有侵权，请联系 cloudcommunity@tencent.com 删除。

本文参与腾讯云自媒体同步曝光计划，欢迎热爱写作的你一起参与！

登录后参与评论

0 条评论

热度