Hadoop Java API-Hadoop实例操作

摘要 Hadoop Java

属于 :API 标签: hadoop 发布于:2020-03-26 09:45:15

启动hadoop

g:
cd G:\deploy\sbin
start-all

pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>com.demo</groupId>
  <artifactId>hdop</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>hdop</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>

   
	<dependencies>
		
	
 
		<dependency>
			<groupId>jdk.tools</groupId>
			<artifactId>jdk.tools</artifactId>
			<version>1.8</version>
			<scope>system</scope>
			<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-common</artifactId>
			<version>2.8.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>2.8.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-hdfs</artifactId>
			<version>2.8.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-mapreduce-client-core</artifactId>
			<version>2.8.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-auth</artifactId>
			<version>2.8.1</version>
		</dependency>
		<dependency>
			<groupId>log4j</groupId>
			<artifactId>log4j</artifactId>
			<version>1.2.17</version>
		</dependency>
		<dependency>
			<groupId>commons-logging</groupId>
			<artifactId>commons-logging</artifactId>
			<version>1.2</version>
		</dependency>
		<dependency>
			<groupId>com.google.guava</groupId>
			<artifactId>guava</artifactId>
			<version>19.0</version>
		</dependency>
		<dependency>
			<groupId>commons-collections</groupId>
			<artifactId>commons-collections</artifactId>
			<version>3.2.2</version>
		</dependency>
		<dependency>
			<groupId>commons-cli</groupId>
			<artifactId>commons-cli</artifactId>
			<version>1.2</version>
		</dependency>
		<dependency>
			<groupId>commons-lang</groupId>
			<artifactId>commons-lang</artifactId>
			<version>2.6</version>
		</dependency>
		<dependency>
			<groupId>commons-configuration</groupId>
			<artifactId>commons-configuration</artifactId>
			<version>1.9</version>
		</dependency>
		<dependency>
			<groupId>org.apache.avro</groupId>
			<artifactId>avro</artifactId>
			<version>1.7.7</version>
		</dependency>
		<dependency>
			<groupId>commons-io</groupId>
			<artifactId>commons-io</artifactId>
			<version>2.5</version>
		</dependency>
		<!-- https://mvnrepository.com/artifact/junit/junit -->
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
		</dependency>
 
	</dependencies>
	<build>
		<plugins>
			<!--编译配置 -->
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<configuration>
					<source>1.8</source>
					<target>1.8</target>
					<encoding>UTF-8</encoding>
				</configuration>
			</plugin>
			<!--资源文件问题 -->
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-resources-plugin</artifactId>
				<version>2.6</version>
				<configuration>
					<encoding>UTF-8</encoding>
				</configuration>
			</plugin>
			<!-- 把依赖的jar包拷到lib目录下 -->
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-dependency-plugin</artifactId>
				<executions>
					<execution>
						<id>copy-dependencies</id>
						<phase>package</phase>
						<goals>
							<goal>copy-dependencies</goal>
						</goals>
						<configuration>
							<outputDirectory>${project.build.directory}/lib</outputDirectory>
							<overWriteReleases>false</overWriteReleases>
							<overWriteSnapshots>false</overWriteSnapshots>
							<overWriteIfNewer>true</overWriteIfNewer>
						</configuration>
					</execution>
				</executions>
			</plugin>
 
		</plugins>
	</build>
</project>


操作类

package com.demo.hdop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

/**
   *  测试hadoop
 * 
 * @author Administrator
 *
 */
public class HadoopTest {

	public static void main(String[] args) throws Exception {
		HadoopTest t = new HadoopTest();
		t.listFiles();
	}

	/**
	 * 查询所有的文件夹及文件
	 * 
	 * @throws IOException
	 */
	@org.junit.Test
	public void listFiles() throws IOException {
		Configuration conf = new Configuration();
		conf.set("fs.default.name", "hdfs://0.0.0.0:19000");
		FileSystem fs = FileSystem.newInstance(conf);
		// true 表示递归查找 false 不进行递归查找
		RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(new Path("/"), true);
		while (iterator.hasNext()) {
			LocatedFileStatus next = iterator.next();
			System.out.println(next.getPath());
		}
		System.out.println("----------------------------------------------------------");
		FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
		for (int i = 0; i < fileStatuses.length; i++) {
			FileStatus fileStatus = fileStatuses[i];
			System.out.println(fileStatus.getPath());
		}
	}

	/**
	 * 上传文件到hdfs上
	 */
	@org.junit.Test
	public void upload() throws IOException {
		long currentTimeMillis = System.currentTimeMillis();
		Configuration conf = new Configuration();
//		conf.set("fs.defaultFS", "hdfs://0.0.0.0:19000");
		conf.set("fs.default.name", "hdfs://0.0.0.0:19000");
		FileSystem fs = FileSystem.get(conf);
		fs.copyFromLocalFile(new Path("D:\\mysql-connector-java-5.1.18.jar"), new Path("/test1")); // 第一个为本地文件路径,第二个为hadoop路径
		long c = System.currentTimeMillis() - currentTimeMillis;
		System.out.println(c);
	}

	/**
	 * 将hdfs上文件下载到本地
	 */
	@org.junit.Test
	public void download() throws IOException {
		long currentTimeMillis = System.currentTimeMillis();
		Configuration conf = new Configuration();
//		conf.set("fs.defaultFS", "hdfs://0.0.0.0:19000");
		conf.set("fs.default.name", "hdfs://0.0.0.0:19000");
		System.setProperty("hadoop.home.dir", "g:\\deploy/");//设定你的hadoop的目录
		FileSystem fs = FileSystem.newInstance(conf);
		fs.copyToLocalFile(new Path("/testFile2"), new Path("g:\\")); //
		System.out.println("成功");
		long c = System.currentTimeMillis() - currentTimeMillis;
		System.out.println(c);

	}

	/**
	 * 在hdfs更目录下面创建test1文件夹
	 * 
	 * @throws IOException
	 */
	@org.junit.Test
	public void mkdir() throws IOException {
		Configuration conf = new Configuration();
		//conf.set("fs.defaultFS", "hdfs://192.168.10.110:9001");
		conf.set("fs.default.name", "hdfs://0.0.0.0:19000");
		
		FileSystem fs = FileSystem.newInstance(conf);
		fs.mkdirs(new Path("/testDir"));
		System.out.println("成功");
	}
	
	@org.junit.Test
	public void newFile() throws IOException {
		Configuration conf = new Configuration();
		//conf.set("fs.defaultFS", "hdfs://192.168.10.110:9001");
		conf.set("fs.default.name", "hdfs://0.0.0.0:19000");
		
		FileSystem fs = FileSystem.newInstance(conf);
		FSDataOutputStream out = fs.create(new Path("/testFile2"));
		String sayHi = "你好啊,这是新创建的一个文件。Hello, this is a new file.";
		byte[] buff = sayHi.getBytes();
		out.write(buff, 0, buff.length);
		out.close();
		System.out.println("成功");
	}

}

Run as Junit