Java Code Examples for org.apache.hadoop.fs.FileSystem#create()
The following examples show how to use
org.apache.hadoop.fs.FileSystem#create() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestIFile.java From hadoop with Apache License 2.0 | 6 votes |
@Test /** Same as above but create a reader. */ public void testIFileReaderWithCodec() throws Exception { Configuration conf = new Configuration(); FileSystem localFs = FileSystem.getLocal(conf); FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); Path path = new Path(new Path("build/test.ifile"), "data"); DefaultCodec codec = new GzipCodec(); codec.setConf(conf); FSDataOutputStream out = rfs.create(path); IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class, codec, null); writer.close(); FSDataInputStream in = rfs.open(path); IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(), codec, null); reader.close(); // test check sum byte[] ab= new byte[100]; int readed= reader.checksumIn.readWithChecksum(ab, 0, ab.length); assertEquals( readed,reader.checksumIn.getChecksum().length); }
Example 2
Source File: TajoMaster.java From tajo with Apache License 2.0 | 6 votes |
private void writeSystemConf() throws IOException { // Storing the system configs Path systemConfPath = TajoConf.getSystemConfPath(systemConf); if (!defaultFS.exists(systemConfPath.getParent())) { defaultFS.mkdirs(systemConfPath.getParent()); } if (defaultFS.exists(systemConfPath)) { defaultFS.delete(systemConfPath, false); } // In TajoMaster HA, some master might see LeaseExpiredException because of lease mismatch. Thus, // we need to create below xml file at HdfsServiceTracker::writeSystemConf. if (!systemConf.getBoolVar(TajoConf.ConfVars.TAJO_MASTER_HA_ENABLE)) { try (FSDataOutputStream out = FileSystem.create(defaultFS, systemConfPath, new FsPermission(SYSTEM_CONF_FILE_PERMISSION))) { systemConf.writeXml(out); } defaultFS.setReplication(systemConfPath, (short) systemConf.getIntVar(ConfVars.SYSTEM_CONF_REPLICA_COUNT)); } }
Example 3
Source File: DistributedPentomino.java From hadoop-gpu with Apache License 2.0 | 6 votes |
/** * Create the input file with all of the possible combinations of the * given depth. * @param fs the filesystem to write into * @param dir the directory to write the input file into * @param pent the puzzle * @param depth the depth to explore when generating prefixes */ private static void createInputDirectory(FileSystem fs, Path dir, Pentomino pent, int depth ) throws IOException { fs.mkdirs(dir); List<int[]> splits = pent.getSplits(depth); PrintStream file = new PrintStream(new BufferedOutputStream (fs.create(new Path(dir, "part1")), 64*1024)); for(int[] prefix: splits) { for(int i=0; i < prefix.length; ++i) { if (i != 0) { file.print(','); } file.print(prefix[i]); } file.print('\n'); } file.close(); }
Example 4
Source File: TestSetTimes.java From hadoop-gpu with Apache License 2.0 | 5 votes |
private FSDataOutputStream writeFile(FileSystem fileSys, Path name, int repl) throws IOException { FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf().getInt("io.file.buffer.size", 4096), (short)repl, (long)blockSize); byte[] buffer = new byte[fileSize]; Random rand = new Random(seed); rand.nextBytes(buffer); stm.write(buffer); return stm; }
Example 5
Source File: TestTrash.java From hadoop-gpu with Apache License 2.0 | 5 votes |
protected static Path writeFile(FileSystem fs, Path f) throws IOException { DataOutputStream out = fs.create(f); out.writeBytes("dhruba: " + f); out.close(); assertTrue(fs.exists(f)); return f; }
Example 6
Source File: UtilsForTests.java From big-c with Apache License 2.0 | 5 votes |
/** * This creates a file in the dfs * @param dfs FileSystem Local File System where file needs to be picked * @param URIPATH Path dfs path where file needs to be copied * @param permission FsPermission File permission * @return returns the DataOutputStream */ public static DataOutputStream createTmpFileDFS(FileSystem dfs, Path URIPATH, FsPermission permission, String input) throws Exception { //Creating the path with the file DataOutputStream file = FileSystem.create(dfs, URIPATH, permission); file.writeBytes(input); file.close(); return file; }
Example 7
Source File: PersistedHDFSManager.java From Knowage-Server with GNU Affero General Public License v3.0 | 5 votes |
public FSDataOutputStream openHdfsFile(String fileName, String folderName) { logger.debug("Begin file opening"); FSDataOutputStream fsOS = null; Path filePath = null; try { FileSystem fs = hdfs.getFs(); filePath = fs.getWorkingDirectory(); if (folderName != null && folderName.length() > 0) { filePath = Path.mergePaths(filePath, new Path(Path.SEPARATOR, folderName)); if (!fs.exists(filePath) || !fs.isDirectory(filePath)) { fs.mkdirs(filePath); } } filePath = Path.mergePaths(filePath, new Path(Path.SEPARATOR + fileName)); boolean existsFile = fs.exists(filePath); if (existsFile) { logger.debug("File is already present in folder, it will be deleted and replaced with new file"); fs.delete(filePath, true); } fsOS = fs.create(filePath, true); } catch (IOException e) { logger.error("Impossible to open file in File System"); throw new SpagoBIRuntimeException("Impossible to open file in File System" + e); } logger.debug("File opened"); return fsOS; }
Example 8
Source File: CopyFromS3.java From emr-sample-apps with Apache License 2.0 | 5 votes |
/** * This method constructs the JobConf to be used to run the map reduce job to * download the files from S3. This is a potentially expensive method since it * makes multiple calls to S3 to get a listing of all the input data. Clients * are encouraged to cache the returned JobConf reference and not call this * method multiple times unless necessary. * * @return the JobConf to be used to run the map reduce job to download the * files from S3. */ public JobConf getJobConf() throws IOException, ParseException { JobConf conf = new JobConf(CopyFromS3.class); conf.setJobName("CopyFromS3"); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(S3CopyMapper.class); // We configure a reducer, even though we don't use it right now. // The idea is that, in the future we may. conf.setReducerClass(HDFSWriterReducer.class); conf.setNumReduceTasks(0); FileInputFormat.setInputPaths(conf, new Path(tempFile)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setOutputFormat(TextOutputFormat.class); conf.setCompressMapOutput(true); JobClient jobClient = new JobClient(conf); FileSystem inputFS = FileSystem.get(URI.create(inputPathPrefix), conf); DatePathFilter datePathFilter = new DatePathFilter(startDate, endDate); List<Path> filePaths = getFilePaths(inputFS, new Path(inputPathPrefix), datePathFilter, jobClient.getDefaultMaps()); // Write the file names to a temporary index file to be used // as input to the map tasks. FileSystem outputFS = FileSystem.get(URI.create(tempFile), conf); FSDataOutputStream outputStream = outputFS.create(new Path(tempFile), true); try { for (Path path : filePaths) { outputStream.writeBytes(path.toString() + "\n"); } } finally { outputStream.close(); } conf.setNumMapTasks(Math.min(filePaths.size(), jobClient.getDefaultMaps())); return conf; }
Example 9
Source File: TestLineRecordReaderJobs.java From big-c with Apache License 2.0 | 5 votes |
/** * Writes the input test file * * @param conf * @throws IOException */ public void createInputFile(Configuration conf) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Path file = new Path(inputDir, "test.txt"); Writer writer = new OutputStreamWriter(localFs.create(file)); writer.write("abc\ndef\t\nghi\njkl"); writer.close(); }
Example 10
Source File: TestUtils.java From succinct with Apache License 2.0 | 5 votes |
public static FSDataInputStream getStream(ShortBuffer buf) throws IOException { File tmpDir = Files.createTempDir(); Path filePath = new Path(tmpDir.getAbsolutePath() + "/testOut"); FileSystem fs = FileSystem.get(filePath.toUri(), new Configuration()); FSDataOutputStream fOut = fs.create(filePath); buf.rewind(); while (buf.hasRemaining()) { fOut.writeShort(buf.get()); } fOut.close(); buf.rewind(); return fs.open(filePath); }
Example 11
Source File: TestSeekBug.java From RDFS with Apache License 2.0 | 5 votes |
private void writeFile(FileSystem fileSys, Path name) throws IOException { // create and write a file that contains 1MB DataOutputStream stm = fileSys.create(name); byte[] buffer = new byte[ONEMB]; Random rand = new Random(seed); rand.nextBytes(buffer); stm.write(buffer); stm.close(); }
Example 12
Source File: Distcp.java From aegisthus with Apache License 2.0 | 5 votes |
protected void writeManifest(Job job, List<FileStatus> files) throws IOException { Path out = new Path(job.getConfiguration().get(OPT_DISTCP_TARGET)); FileSystem fsOut = out.getFileSystem(job.getConfiguration()); DataOutputStream dos = fsOut.create(new Path(out, "_manifest/.manifest")); for (FileStatus file : files) { Path output = new Path(out, file.getPath().getName()); dos.writeBytes(output.toUri().toString()); dos.write('\n'); } dos.close(); }
Example 13
Source File: AvroHdfsFileSink.java From components with Apache License 2.0 | 5 votes |
@Override protected void mergeOutput(FileSystem fs, String sourceFolder, String targetFile) throws IOException { try (DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>())) { FileStatus[] sourceStatuses = FileSystemUtil.listSubFiles(fs, sourceFolder); Schema schema = null; String inputCodec = null; OutputStream output = new BufferedOutputStream(fs.create(new Path(targetFile))); for (FileStatus sourceStatus : sourceStatuses) { try (DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>( new BufferedInputStream(fs.open(sourceStatus.getPath())), new GenericDatumReader<GenericRecord>())) { if (schema == null) { schema = reader.getSchema(); for (String key : reader.getMetaKeys()) { if (!DataFileWriter.isReservedMeta(key)) { writer.setMeta(key, reader.getMeta(key)); } } inputCodec = reader.getMetaString(DataFileConstants.CODEC); if (inputCodec == null) { inputCodec = DataFileConstants.NULL_CODEC; } writer.setCodec(CodecFactory.fromString(inputCodec)); writer.create(schema, output); } writer.appendAllFrom(reader, false); } } } }
Example 14
Source File: JobControlTestUtils.java From RDFS with Apache License 2.0 | 5 votes |
/** * Generates data that can be used for Job Control tests. * * @param fs FileSystem to create data in. * @param dirPath Path to create the data in. * @throws IOException If an error occurs creating the data. */ static void generateData(FileSystem fs, Path dirPath) throws IOException { FSDataOutputStream out = fs.create(new Path(dirPath, "data.txt")); for (int i = 0; i < 10000; i++) { String line = generateRandomLine(); out.write(line.getBytes("UTF-8")); } out.close(); }
Example 15
Source File: DataUtils.java From sparkboost with Apache License 2.0 | 5 votes |
/** * Generate a new LibSvm output file giving each document an index corresponding to the index tha documents had on * original input LibSvm file. * * @param sc The spark context. * @param dataFile The data file. * @param outputFile The output file. */ public static void generateLibSvmFileWithIDs(JavaSparkContext sc, String dataFile, String outputFile) { if (sc == null) throw new NullPointerException("The Spark Context is 'null'"); if (dataFile == null || dataFile.isEmpty()) throw new IllegalArgumentException("The dataFile is 'null'"); ArrayList<MultilabelPoint> points = new ArrayList<>(); try { Path pt = new Path(dataFile); FileSystem fs = FileSystem.get(pt.toUri(), new Configuration()); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt))); Path ptOut = new Path(outputFile); BufferedWriter bw = new BufferedWriter((new OutputStreamWriter(fs.create(ptOut)))); try { int docID = 0; String line = br.readLine(); while (line != null) { bw.write("" + docID + "\t" + line + "\n"); line = br.readLine(); docID++; } } finally { br.close(); bw.close(); } } catch (Exception e) { throw new RuntimeException("Reading input LibSVM data file", e); } }
Example 16
Source File: TestRaidDfs.java From RDFS with Apache License 2.0 | 5 votes |
public static long createTestFile(FileSystem fileSys, Path name, int repl, long fileSize, long blockSize, int seed) throws IOException { CRC32 crc = new CRC32(); Random rand = new Random(seed); FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf().getInt("io.file.buffer.size", 4096), (short)repl, blockSize); LOG.info("create file " + name + " size: " + fileSize + " blockSize: " + blockSize + " repl: " + repl); // fill random data into file byte[] b = new byte[(int)blockSize]; long numBlocks = fileSize / blockSize; for (int i = 0; i < numBlocks; i++) { rand.nextBytes(b); stm.write(b); crc.update(b); } long lastBlock = fileSize - numBlocks * blockSize; if (lastBlock > 0) { b = new byte[(int)lastBlock]; rand.nextBytes(b); stm.write(b); crc.update(b); } stm.close(); return crc.getValue(); }
Example 17
Source File: RegexBulkLoadToolIT.java From phoenix with Apache License 2.0 | 5 votes |
@Ignore @Test public void testImportWithIndex() throws Exception { Statement stmt = conn.createStatement(); stmt.execute("CREATE TABLE TABLE3 (ID INTEGER NOT NULL PRIMARY KEY, " + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)"); String ddl = "CREATE INDEX TABLE3_IDX ON TABLE3 " + " (FIRST_NAME ASC)" + " INCLUDE (LAST_NAME)"; stmt.execute(ddl); FileSystem fs = FileSystem.get(getUtility().getConfiguration()); FSDataOutputStream outputStream = fs.create(new Path("/tmp/input3.csv")); PrintWriter printWriter = new PrintWriter(outputStream); printWriter.println("1,FirstName 1,LastName 1"); printWriter.println("2,FirstName 2,LastName 2"); printWriter.close(); RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool(); regexBulkLoadTool.setConf(getUtility().getConfiguration()); int exitCode = regexBulkLoadTool.run(new String[] { "--input", "/tmp/input3.csv", "--table", "table3", "--regex", "([^,]*),([^,]*),([^,]*)", "--zookeeper", zkQuorum}); assertEquals(0, exitCode); ResultSet rs = stmt.executeQuery("SELECT id, FIRST_NAME FROM TABLE3 where first_name='FirstName 2'"); assertTrue(rs.next()); assertEquals(2, rs.getInt(1)); assertEquals("FirstName 2", rs.getString(2)); rs.close(); stmt.close(); }
Example 18
Source File: TestBlockTokenWithDFS.java From hadoop with Apache License 2.0 | 4 votes |
private void createFile(FileSystem fs, Path filename) throws IOException { FSDataOutputStream out = fs.create(filename); out.write(rawData); out.close(); }
Example 19
Source File: TestDistributedFileSystem.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testCreateWithCustomChecksum() throws Exception { Configuration conf = getTestConfiguration(); MiniDFSCluster cluster = null; Path testBasePath = new Path("/test/csum"); // create args Path path1 = new Path(testBasePath, "file_wtih_crc1"); Path path2 = new Path(testBasePath, "file_with_crc2"); ChecksumOpt opt1 = new ChecksumOpt(DataChecksum.Type.CRC32C, 512); ChecksumOpt opt2 = new ChecksumOpt(DataChecksum.Type.CRC32, 512); // common args FsPermission perm = FsPermission.getDefault().applyUMask( FsPermission.getUMask(conf)); EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.OVERWRITE, CreateFlag.CREATE); short repl = 1; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); FileSystem dfs = cluster.getFileSystem(); dfs.mkdirs(testBasePath); // create two files with different checksum types FSDataOutputStream out1 = dfs.create(path1, perm, flags, 4096, repl, 131072L, null, opt1); FSDataOutputStream out2 = dfs.create(path2, perm, flags, 4096, repl, 131072L, null, opt2); for (int i = 0; i < 1024; i++) { out1.write(i); out2.write(i); } out1.close(); out2.close(); // the two checksums must be different. MD5MD5CRC32FileChecksum sum1 = (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path1); MD5MD5CRC32FileChecksum sum2 = (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path2); assertFalse(sum1.equals(sum2)); // check the individual params assertEquals(DataChecksum.Type.CRC32C, sum1.getCrcType()); assertEquals(DataChecksum.Type.CRC32, sum2.getCrcType()); } finally { if (cluster != null) { cluster.getFileSystem().delete(testBasePath, true); cluster.shutdown(); } } }
Example 20
Source File: TestFileAppend4.java From RDFS with Apache License 2.0 | 4 votes |
void replicationTest(int badDN) throws Exception { LOG.info("START"); cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs1 = cluster.getFileSystem(); try { int halfBlock = (int)BLOCK_SIZE/2; short rep = 3; // replication assertTrue(BLOCK_SIZE%4 == 0); file1 = new Path("/appendWithReplication.dat"); // write 1/2 block & sync stm = fs1.create(file1, true, (int)BLOCK_SIZE*2, rep, BLOCK_SIZE); AppendTestUtil.write(stm, 0, halfBlock); stm.sync(); assertNumCurrentReplicas(rep); // close one of the datanodes cluster.stopDataNode(badDN); // write 1/4 block & sync AppendTestUtil.write(stm, halfBlock, (int)BLOCK_SIZE/4); stm.sync(); assertNumCurrentReplicas((short)(rep - 1)); // restart the cluster /* * we put the namenode in safe mode first so he doesn't process * recoverBlock() commands from the remaining DFSClient as datanodes * are serially shutdown */ cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_ENTER); fs1.close(); cluster.shutdown(); LOG.info("STOPPED first instance of the cluster"); cluster = new MiniDFSCluster(conf, 3, false, null); cluster.getNameNode().getNamesystem().stallReplicationWork(); cluster.waitActive(); fs1 = cluster.getFileSystem(); LOG.info("START second instance."); recoverFile(fs1); LOG.info("Recovered file"); // the 2 DNs with the larger sequence number should win BlockLocation[] bl = fs1.getFileBlockLocations( fs1.getFileStatus(file1), 0, BLOCK_SIZE); LOG.info("Checking blocks"); assertTrue("Should have one block", bl.length == 1); // Wait up to 1 second for block replication - we may have // only replication 1 for a brief moment after close, since // closing only waits for fs.replcation.min replicas, and // it may take some millis before the other DN reports block waitForBlockReplication(fs1, file1.toString(), 2, 1); assertFileSize(fs1, BLOCK_SIZE*3/4); checkFile(fs1, BLOCK_SIZE*3/4); LOG.info("Checking replication"); // verify that, over time, the block has been replicated to 3 DN cluster.getNameNode().getNamesystem().restartReplicationWork(); waitForBlockReplication(fs1, file1.toString(), 3, 20); } finally { fs1.close(); cluster.shutdown(); } }