signed

QiShunwang

“诚信为本、客户至上”

HDFS 编程不可怕,一篇文章搞定它

2021/6/8 23:40:02   来源:

前言

本文隶属于专栏《1000个问题搞定大数据技术体系》,该专栏为笔者原创,引用请注明来源,不足和错误之处请在评论区帮忙指出,谢谢!

本专栏目录结构和参考文献请见1000个问题搞定大数据技术体系

正文

从 HDFS 文件系统里面读取一个文件到本地

package com.shockang.study.bigdata.hdfs.operation;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.net.URI;

public class HDFSReader {

    public static void main(String[] args) throws Exception {
        String srcFile = "hdfs://node-01:9000/data/hdfs01.mp4";
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(srcFile), conf);
        try (FSDataInputStream hdfsInStream = fs.open(new Path(srcFile));
             BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream("/home/shockang/hdfs02.mp4"));) {
            IOUtils.copyBytes(hdfsInStream, outputStream, 4096, true);
        }
    }
}
package com.shockang.study.bigdata.hdfs.operation;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class CopyToLocalFile {
    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();
        conf.set("fs.default.name", "hdfs://node1:9000");
        FileSystem fs = FileSystem.get(conf);
        Path src = new Path("hdfs:/");
        Path dst = new Path("/home/shockang/file1");
        fs.copyToLocalFile(src, dst);
        System.out.println("download success");
        fs.close();
    }
}

将本地的文件上传到 HDFS

package com.shockang.study.bigdata.hdfs.operation;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;

public class HDFSWriter {

    public static void main(String[] args) throws Exception {
        String src = "/home/shockang/hdfs01.mp4";
        String dst = "hdfs://node-01:9000/data/hdfs01.mp4";
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(dst), conf);
        OutputStream out = fs.create(new Path(dst));
        try (InputStream in = new BufferedInputStream(new FileInputStream(src));) {
            IOUtils.copyBytes(in, out, 4096, true);
        }
    }
}
package com.shockang.study.bigdata.hdfs.operation;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class CopyFromLocalFile {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.default.name", "hdfs://node1:9000");
        FileSystem fs = FileSystem.get(conf);
        Path src = new Path("/home/shockang/file1");
        Path dst = new Path("hdfs:/");
        fs.copyFromLocalFile(src, dst);
        System.out.println("upload success");
        fs.close();
    }
}

创建 HDFS 目录

package com.shockang.study.bigdata.hdfs.operation;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class CreateDir {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.default.name", "hdfs://node1:9000");
        FileSystem fs = FileSystem.get(conf);
        boolean ok = fs.mkdirs(new Path("hdfs:/mydir"));
        System.out.println("mkdirs result:" + ok);
        fs.close();
    }
}

删除 HDFS 文件

package com.shockang.study.bigdata.hdfs.operation;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class DeleteFile {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.default.name", "hdfs://node1:9000");
        FileSystem fs = FileSystem.get(conf);
        boolean ok = fs.deleteOnExit(new Path("hdfs:/newFile.txt"));
        System.out.println("delete result:" + ok);
        fs.close();
    }
}

查看 HDFS 文件目录树结构

package com.shockang.study.bigdata.hdfs.operation;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class ListStatus {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.default.name", "hdfs://node1:9000");
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] status = fs.listStatus(new Path("hdfs:/"));
        if (status.length > 0) {
            for (FileStatus file : status) {
                show(fs, file);
            }
        }
        fs.close();
    }

    private static void show(FileSystem fs, FileStatus file) throws Exception {
        Path path = file.getPath();
        System.out.println(path);
        if (file.isDirectory()) {
            FileStatus[] status = fs.listStatus(path);
            for (FileStatus fileStatus : status) {
                show(fs, fileStatus);
            }
        }
    }
}