org.apache.hadoop.fs.FileSystem#create

Source File: TestIFile.java From hadoop with Apache License 2.0

6 votes

@Test
/** Same as above but create a reader. */
public void testIFileReaderWithCodec() throws Exception {
  Configuration conf = new Configuration();
  FileSystem localFs = FileSystem.getLocal(conf);
  FileSystem rfs = ((LocalFileSystem)localFs).getRaw();
  Path path = new Path(new Path("build/test.ifile"), "data");
  DefaultCodec codec = new GzipCodec();
  codec.setConf(conf);
  FSDataOutputStream out = rfs.create(path);
  IFile.Writer<Text, Text> writer =
      new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class,
                                   codec, null);
  writer.close();
  FSDataInputStream in = rfs.open(path);
  IFile.Reader<Text, Text> reader =
    new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(),
        codec, null);
  reader.close();
  
  // test check sum 
  byte[] ab= new byte[100];
  int readed= reader.checksumIn.readWithChecksum(ab, 0, ab.length);
  assertEquals( readed,reader.checksumIn.getChecksum().length);
  
}

Source File: TajoMaster.java From tajo with Apache License 2.0

6 votes

private void writeSystemConf() throws IOException {
  // Storing the system configs
  Path systemConfPath = TajoConf.getSystemConfPath(systemConf);

  if (!defaultFS.exists(systemConfPath.getParent())) {
    defaultFS.mkdirs(systemConfPath.getParent());
  }

  if (defaultFS.exists(systemConfPath)) {
    defaultFS.delete(systemConfPath, false);
  }

  // In TajoMaster HA, some master might see LeaseExpiredException because of lease mismatch. Thus,
  // we need to create below xml file at HdfsServiceTracker::writeSystemConf.
  if (!systemConf.getBoolVar(TajoConf.ConfVars.TAJO_MASTER_HA_ENABLE)) {
    try (FSDataOutputStream out = FileSystem.create(defaultFS, systemConfPath,
            new FsPermission(SYSTEM_CONF_FILE_PERMISSION))) {
      systemConf.writeXml(out);
    }
    defaultFS.setReplication(systemConfPath, (short) systemConf.getIntVar(ConfVars.SYSTEM_CONF_REPLICA_COUNT));
  }
}

Source File: DistributedPentomino.java From hadoop-gpu with Apache License 2.0

6 votes

/**
 * Create the input file with all of the possible combinations of the 
 * given depth.
 * @param fs the filesystem to write into
 * @param dir the directory to write the input file into
 * @param pent the puzzle 
 * @param depth the depth to explore when generating prefixes
 */
private static void createInputDirectory(FileSystem fs, 
                                         Path dir,
                                         Pentomino pent,
                                         int depth
                                         ) throws IOException {
  fs.mkdirs(dir);
  List<int[]> splits = pent.getSplits(depth);
  PrintStream file = 
    new PrintStream(new BufferedOutputStream
                    (fs.create(new Path(dir, "part1")), 64*1024));
  for(int[] prefix: splits) {
    for(int i=0; i < prefix.length; ++i) {
      if (i != 0) {
        file.print(',');          
      }
      file.print(prefix[i]);
    }
    file.print('\n');
  }
  file.close();
}

Source File: TestSetTimes.java From hadoop-gpu with Apache License 2.0

5 votes

private FSDataOutputStream writeFile(FileSystem fileSys, Path name, int repl)
  throws IOException {
  FSDataOutputStream stm = fileSys.create(name, true, 
                                          fileSys.getConf().getInt("io.file.buffer.size", 4096),
                                          (short)repl, (long)blockSize);
  byte[] buffer = new byte[fileSize];
  Random rand = new Random(seed);
  rand.nextBytes(buffer);
  stm.write(buffer);
  return stm;
}

Source File: TestTrash.java From hadoop-gpu with Apache License 2.0

5 votes

protected static Path writeFile(FileSystem fs, Path f) throws IOException {
  DataOutputStream out = fs.create(f);
  out.writeBytes("dhruba: " + f);
  out.close();
  assertTrue(fs.exists(f));
  return f;
}

Source File: UtilsForTests.java From big-c with Apache License 2.0

5 votes

/**
 * This creates a file in the dfs
 * @param dfs FileSystem Local File System where file needs to be picked
 * @param URIPATH Path dfs path where file needs to be copied
 * @param permission FsPermission File permission
 * @return returns the DataOutputStream
 */
public static DataOutputStream
    createTmpFileDFS(FileSystem dfs, Path URIPATH,
    FsPermission permission, String input) throws Exception {
  //Creating the path with the file
  DataOutputStream file =
    FileSystem.create(dfs, URIPATH, permission);
  file.writeBytes(input);
  file.close();
  return file;
}

Source File: PersistedHDFSManager.java From Knowage-Server with GNU Affero General Public License v3.0

5 votes

public FSDataOutputStream openHdfsFile(String fileName, String folderName) {
	logger.debug("Begin file opening");
	FSDataOutputStream fsOS = null;
	Path filePath = null;
	try {
		FileSystem fs = hdfs.getFs();
		filePath = fs.getWorkingDirectory();
		if (folderName != null && folderName.length() > 0) {
			filePath = Path.mergePaths(filePath, new Path(Path.SEPARATOR, folderName));
			if (!fs.exists(filePath) || !fs.isDirectory(filePath)) {
				fs.mkdirs(filePath);
			}
		}
		filePath = Path.mergePaths(filePath, new Path(Path.SEPARATOR + fileName));
		boolean existsFile = fs.exists(filePath);
		if (existsFile) {
			logger.debug("File is already present in folder, it will be deleted and replaced with new file");
			fs.delete(filePath, true);
		}
		fsOS = fs.create(filePath, true);
	} catch (IOException e) {
		logger.error("Impossible to open file in File System");
		throw new SpagoBIRuntimeException("Impossible to open file in File System" + e);
	}
	logger.debug("File opened");
	return fsOS;
}

Source File: CopyFromS3.java From emr-sample-apps with Apache License 2.0

5 votes

/**
 * This method constructs the JobConf to be used to run the map reduce job to
 * download the files from S3. This is a potentially expensive method since it
 * makes multiple calls to S3 to get a listing of all the input data. Clients
 * are encouraged to cache the returned JobConf reference and not call this
 * method multiple times unless necessary.
 * 
 * @return the JobConf to be used to run the map reduce job to download the
 *         files from S3.
 */
public JobConf getJobConf() throws IOException, ParseException {
  JobConf conf = new JobConf(CopyFromS3.class);
  conf.setJobName("CopyFromS3");
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setMapperClass(S3CopyMapper.class);
  // We configure a reducer, even though we don't use it right now.
  // The idea is that, in the future we may. 
  conf.setReducerClass(HDFSWriterReducer.class);
  conf.setNumReduceTasks(0);

  FileInputFormat.setInputPaths(conf, new Path(tempFile));
  FileOutputFormat.setOutputPath(conf, new Path(outputPath));
  conf.setOutputFormat(TextOutputFormat.class);
  conf.setCompressMapOutput(true);

  JobClient jobClient = new JobClient(conf);

  FileSystem inputFS = FileSystem.get(URI.create(inputPathPrefix), conf);
  DatePathFilter datePathFilter = new DatePathFilter(startDate, endDate);
  List<Path> filePaths = getFilePaths(inputFS, new Path(inputPathPrefix), datePathFilter, jobClient.getDefaultMaps());

  // Write the file names to a temporary index file to be used
  // as input to the map tasks.
  FileSystem outputFS = FileSystem.get(URI.create(tempFile), conf);
  FSDataOutputStream outputStream = outputFS.create(new Path(tempFile), true);
  try {
    for (Path path : filePaths) {
      outputStream.writeBytes(path.toString() + "\n");
    }
  }
  finally {
    outputStream.close();
  }

  conf.setNumMapTasks(Math.min(filePaths.size(), jobClient.getDefaultMaps()));

  return conf;
}

Source File: TestLineRecordReaderJobs.java From big-c with Apache License 2.0

5 votes

/**
 * Writes the input test file
 *
 * @param conf
 * @throws IOException
 */
public void createInputFile(Configuration conf) throws IOException {
  FileSystem localFs = FileSystem.getLocal(conf);
  Path file = new Path(inputDir, "test.txt");
  Writer writer = new OutputStreamWriter(localFs.create(file));
  writer.write("abc\ndef\t\nghi\njkl");
  writer.close();
}

Source File: TestUtils.java From succinct with Apache License 2.0

5 votes

public static FSDataInputStream getStream(ShortBuffer buf) throws IOException {
  File tmpDir = Files.createTempDir();
  Path filePath = new Path(tmpDir.getAbsolutePath() + "/testOut");
  FileSystem fs = FileSystem.get(filePath.toUri(), new Configuration());
  FSDataOutputStream fOut = fs.create(filePath);
  buf.rewind();
  while (buf.hasRemaining()) {
    fOut.writeShort(buf.get());
  }
  fOut.close();
  buf.rewind();
  return fs.open(filePath);
}

Source File: TestSeekBug.java From RDFS with Apache License 2.0

5 votes

private void writeFile(FileSystem fileSys, Path name) throws IOException {
  // create and write a file that contains 1MB
  DataOutputStream stm = fileSys.create(name);
  byte[] buffer = new byte[ONEMB];
  Random rand = new Random(seed);
  rand.nextBytes(buffer);
  stm.write(buffer);
  stm.close();
}

Source File: Distcp.java From aegisthus with Apache License 2.0

5 votes

protected void writeManifest(Job job, List<FileStatus> files) throws IOException {
	Path out = new Path(job.getConfiguration().get(OPT_DISTCP_TARGET));
	FileSystem fsOut = out.getFileSystem(job.getConfiguration());
	DataOutputStream dos = fsOut.create(new Path(out, "_manifest/.manifest"));
	for (FileStatus file : files) {
		Path output = new Path(out, file.getPath().getName());
		dos.writeBytes(output.toUri().toString());
		dos.write('\n');
	}
	dos.close();
}

Source File: AvroHdfsFileSink.java From components with Apache License 2.0

5 votes

@Override
protected void mergeOutput(FileSystem fs, String sourceFolder, String targetFile) throws IOException {
    try (DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>())) {
        FileStatus[] sourceStatuses = FileSystemUtil.listSubFiles(fs, sourceFolder);
        Schema schema = null;
        String inputCodec = null;
        OutputStream output = new BufferedOutputStream(fs.create(new Path(targetFile)));
        for (FileStatus sourceStatus : sourceStatuses) {
            try (DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(
                    new BufferedInputStream(fs.open(sourceStatus.getPath())), new GenericDatumReader<GenericRecord>())) {

                if (schema == null) {
                    schema = reader.getSchema();
                    for (String key : reader.getMetaKeys()) {
                        if (!DataFileWriter.isReservedMeta(key)) {
                            writer.setMeta(key, reader.getMeta(key));
                        }
                    }
                    inputCodec = reader.getMetaString(DataFileConstants.CODEC);
                    if (inputCodec == null) {
                        inputCodec = DataFileConstants.NULL_CODEC;
                    }
                    writer.setCodec(CodecFactory.fromString(inputCodec));
                    writer.create(schema, output);
                }
                writer.appendAllFrom(reader, false);
            }
        }
    }
}

Source File: JobControlTestUtils.java From RDFS with Apache License 2.0

5 votes

/**
 * Generates data that can be used for Job Control tests.
 * 
 * @param fs FileSystem to create data in.
 * @param dirPath Path to create the data in.
 * @throws IOException If an error occurs creating the data.
 */
static void generateData(FileSystem fs, Path dirPath) throws IOException {
  FSDataOutputStream out = fs.create(new Path(dirPath, "data.txt"));
  for (int i = 0; i < 10000; i++) {
    String line = generateRandomLine();
    out.write(line.getBytes("UTF-8"));
  }
  out.close();
}

Source File: DataUtils.java From sparkboost with Apache License 2.0

5 votes

/**
 * Generate a new LibSvm output file giving each document an index corresponding to the index tha documents had on
 * original input LibSvm file.
 *
 * @param sc         The spark context.
 * @param dataFile   The data file.
 * @param outputFile The output file.
 */
public static void generateLibSvmFileWithIDs(JavaSparkContext sc, String dataFile, String outputFile) {
    if (sc == null)
        throw new NullPointerException("The Spark Context is 'null'");
    if (dataFile == null || dataFile.isEmpty())
        throw new IllegalArgumentException("The dataFile is 'null'");

    ArrayList<MultilabelPoint> points = new ArrayList<>();
    try {
        Path pt = new Path(dataFile);
        FileSystem fs = FileSystem.get(pt.toUri(), new Configuration());
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt)));

        Path ptOut = new Path(outputFile);
        BufferedWriter bw = new BufferedWriter((new OutputStreamWriter(fs.create(ptOut))));

        try {
            int docID = 0;
            String line = br.readLine();
            while (line != null) {
                bw.write("" + docID + "\t" + line + "\n");
                line = br.readLine();
                docID++;
            }
        } finally {
            br.close();
            bw.close();
        }
    } catch (Exception e) {
        throw new RuntimeException("Reading input LibSVM data file", e);
    }

}

Source File: TestRaidDfs.java From RDFS with Apache License 2.0

5 votes

public static long createTestFile(FileSystem fileSys, Path name, int repl,
                      long fileSize, long blockSize, int seed)
  throws IOException {
  CRC32 crc = new CRC32();
  Random rand = new Random(seed);
  FSDataOutputStream stm = fileSys.create(name, true,
                                          fileSys.getConf().getInt("io.file.buffer.size", 4096),
                                          (short)repl, blockSize);
  LOG.info("create file " + name + " size: " + fileSize + " blockSize: " + 
           blockSize + " repl: " + repl);
  // fill random data into file
  byte[] b = new byte[(int)blockSize];
  long numBlocks = fileSize / blockSize;
  for (int i = 0; i < numBlocks; i++) {
    rand.nextBytes(b);
    stm.write(b);
    crc.update(b);
  }
  long lastBlock = fileSize - numBlocks * blockSize;
  if (lastBlock > 0) {
    b = new byte[(int)lastBlock];
    rand.nextBytes(b);
    stm.write(b);
    crc.update(b);
  }
  stm.close();
  return crc.getValue();
}

Source File: RegexBulkLoadToolIT.java From phoenix with Apache License 2.0

5 votes

@Ignore
@Test
public void testImportWithIndex() throws Exception {


    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE TABLE3 (ID INTEGER NOT NULL PRIMARY KEY, " +
        "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
    String ddl = "CREATE INDEX TABLE3_IDX ON TABLE3 "
            + " (FIRST_NAME ASC)"
            + " INCLUDE (LAST_NAME)";
    stmt.execute(ddl);
    
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input3.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,FirstName 1,LastName 1");
    printWriter.println("2,FirstName 2,LastName 2");
    printWriter.close();

    RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool();
    regexBulkLoadTool.setConf(getUtility().getConfiguration());
    int exitCode = regexBulkLoadTool.run(new String[] {
            "--input", "/tmp/input3.csv",
            "--table", "table3",
            "--regex", "([^,]*),([^,]*),([^,]*)",
            "--zookeeper", zkQuorum});
    assertEquals(0, exitCode);

    ResultSet rs = stmt.executeQuery("SELECT id, FIRST_NAME FROM TABLE3 where first_name='FirstName 2'");
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertEquals("FirstName 2", rs.getString(2));

    rs.close();
    stmt.close();
}

Source File: TestBlockTokenWithDFS.java From hadoop with Apache License 2.0

4 votes

private void createFile(FileSystem fs, Path filename) throws IOException {
  FSDataOutputStream out = fs.create(filename);
  out.write(rawData);
  out.close();
}

Source File: TestDistributedFileSystem.java From big-c with Apache License 2.0

4 votes

@Test
public void testCreateWithCustomChecksum() throws Exception {
  Configuration conf = getTestConfiguration();
  MiniDFSCluster cluster = null;
  Path testBasePath = new Path("/test/csum");
  // create args 
  Path path1 = new Path(testBasePath, "file_wtih_crc1");
  Path path2 = new Path(testBasePath, "file_with_crc2");
  ChecksumOpt opt1 = new ChecksumOpt(DataChecksum.Type.CRC32C, 512);
  ChecksumOpt opt2 = new ChecksumOpt(DataChecksum.Type.CRC32, 512);

  // common args
  FsPermission perm = FsPermission.getDefault().applyUMask(
      FsPermission.getUMask(conf));
  EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.OVERWRITE,
      CreateFlag.CREATE);
  short repl = 1;

  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    FileSystem dfs = cluster.getFileSystem();

    dfs.mkdirs(testBasePath);

    // create two files with different checksum types
    FSDataOutputStream out1 = dfs.create(path1, perm, flags, 4096, repl,
        131072L, null, opt1);
    FSDataOutputStream out2 = dfs.create(path2, perm, flags, 4096, repl,
        131072L, null, opt2);

    for (int i = 0; i < 1024; i++) {
      out1.write(i);
      out2.write(i);
    }
    out1.close();
    out2.close();

    // the two checksums must be different.
    MD5MD5CRC32FileChecksum sum1 =
        (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path1);
    MD5MD5CRC32FileChecksum sum2 =
        (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path2);
    assertFalse(sum1.equals(sum2));

    // check the individual params
    assertEquals(DataChecksum.Type.CRC32C, sum1.getCrcType());
    assertEquals(DataChecksum.Type.CRC32,  sum2.getCrcType());

  } finally {
    if (cluster != null) {
      cluster.getFileSystem().delete(testBasePath, true);
      cluster.shutdown();
    }
  }
}

Source File: TestFileAppend4.java From RDFS with Apache License 2.0

4 votes

void replicationTest(int badDN) throws Exception {
  LOG.info("START");
  cluster = new MiniDFSCluster(conf, 3, true, null);
  FileSystem fs1 = cluster.getFileSystem();
  try {
    int halfBlock = (int)BLOCK_SIZE/2;
    short rep = 3; // replication
    assertTrue(BLOCK_SIZE%4 == 0);

    file1 = new Path("/appendWithReplication.dat");

    // write 1/2 block & sync
    stm = fs1.create(file1, true, (int)BLOCK_SIZE*2, rep, BLOCK_SIZE);
    AppendTestUtil.write(stm, 0, halfBlock);
    stm.sync();
    assertNumCurrentReplicas(rep);

    // close one of the datanodes
    cluster.stopDataNode(badDN);

    // write 1/4 block & sync
    AppendTestUtil.write(stm, halfBlock, (int)BLOCK_SIZE/4);
    stm.sync();
    assertNumCurrentReplicas((short)(rep - 1));

    // restart the cluster
    /*
     * we put the namenode in safe mode first so he doesn't process
     * recoverBlock() commands from the remaining DFSClient as datanodes
     * are serially shutdown
     */
    cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_ENTER);
    fs1.close();
    cluster.shutdown();
    LOG.info("STOPPED first instance of the cluster");
    cluster = new MiniDFSCluster(conf, 3, false, null);
    cluster.getNameNode().getNamesystem().stallReplicationWork();
    cluster.waitActive();
    fs1 = cluster.getFileSystem();
    LOG.info("START second instance.");

    recoverFile(fs1);
    LOG.info("Recovered file");

    // the 2 DNs with the larger sequence number should win
    BlockLocation[] bl = fs1.getFileBlockLocations(
        fs1.getFileStatus(file1), 0, BLOCK_SIZE);
    LOG.info("Checking blocks");
    assertTrue("Should have one block", bl.length == 1);

    // Wait up to 1 second for block replication - we may have
    // only replication 1 for a brief moment after close, since
    // closing only waits for fs.replcation.min replicas, and
    // it may take some millis before the other DN reports block
    waitForBlockReplication(fs1, file1.toString(), 2, 1);

    assertFileSize(fs1, BLOCK_SIZE*3/4);
    checkFile(fs1, BLOCK_SIZE*3/4);

    LOG.info("Checking replication");
    // verify that, over time, the block has been replicated to 3 DN
    cluster.getNameNode().getNamesystem().restartReplicationWork();
    waitForBlockReplication(fs1, file1.toString(), 3, 20);
  } finally {
    fs1.close();
    cluster.shutdown();
  }
}

Java Code Examples for org.apache.hadoop.fs.FileSystem#create()