org.apache.hadoop.fs.FileSystem#getLocal

Source File: TestMerge.java From aliyun-maxcompute-data-collectors with Apache License 2.0

6 votes

/**
 * Return true if there's a file in 'dirName' with a line that starts with
 * 'prefix'.
 */
protected boolean recordStartsWith(List<Integer> record, String dirName,
    SqoopOptions.FileLayout fileLayout)
    throws Exception {
  Path warehousePath = new Path(LOCAL_WAREHOUSE_DIR);
  Path targetPath = new Path(warehousePath, dirName);

  FileSystem fs = FileSystem.getLocal(new Configuration());
  FileStatus [] files = fs.listStatus(targetPath);

  if (null == files || files.length == 0) {
    fail("Got no import files!");
  }

  for (FileStatus stat : files) {
    Path p = stat.getPath();
    if (p.getName().startsWith("part-")) {
      if (checkFileForLine(fs, p, fileLayout, record)) {
        // We found the line. Nothing further to do.
        return true;
      }
    }
  }

  return false;
}

Source File: TestMerge.java From big-c with Apache License 2.0

6 votes

private void copyPartitions(Path mapOutputPath, Path indexPath)
  throws IOException {
  FileSystem localFs = FileSystem.getLocal(jobConf);
  FileSystem rfs = ((LocalFileSystem)localFs).getRaw();
  FSDataOutputStream rawOutput = rfs.create(mapOutputPath, true, BUF_SIZE);
  SpillRecord spillRecord = new SpillRecord(numberOfPartitions);
  IndexRecord indexRecord = new IndexRecord();
  for (int i = 0; i < numberOfPartitions; i++) {
    indexRecord.startOffset = rawOutput.getPos();
    byte buffer[] = outStreams[i].toByteArray();
    IFileOutputStream checksumOutput = new IFileOutputStream(rawOutput);
    checksumOutput.write(buffer);
    // Write checksum.
    checksumOutput.finish();
    // Write index record
    indexRecord.rawLength = (long)buffer.length;
    indexRecord.partLength = rawOutput.getPos() - indexRecord.startOffset;
    spillRecord.putIndex(indexRecord, i);
    reporter.progress();
  }
  rawOutput.close();
  spillRecord.writeToFile(indexPath, jobConf);
}

Source File: TestBloomMapFile.java From hadoop with Apache License 2.0

5 votes

@Override
public void setUp() throws Exception {
  LocalFileSystem fs = FileSystem.getLocal(conf);
  if (fs.exists(TEST_ROOT) && !fs.delete(TEST_ROOT, true)) {
    Assert.fail("Can't clean up test root dir");
  }
  fs.mkdirs(TEST_ROOT);
}

Source File: TestViewFileSystemWithAuthorityLocalFileSystem.java From big-c with Apache License 2.0

5 votes

@Override
@Before
public void setUp() throws Exception {
  // create the test root on local_fs
  fsTarget = FileSystem.getLocal(new Configuration());
  super.setUp(); // this sets up conf (and fcView which we replace)

  // Now create a viewfs using a mount table called "default"
  // hence viewfs://default/
  schemeWithAuthority = 
    new URI(FsConstants.VIEWFS_SCHEME, "default", "/", null, null);
  fsView = FileSystem.get(schemeWithAuthority, conf);
}

Source File: TestLineRecordReaderJobs.java From big-c with Apache License 2.0

5 votes

/**
 * Writes the input test file
 *
 * @param conf
 * @throws IOException
 */
public void createInputFile(Configuration conf) throws IOException {
  FileSystem localFs = FileSystem.getLocal(conf);
  Path file = new Path(inputDir, "test.txt");
  Writer writer = new OutputStreamWriter(localFs.create(file));
  writer.write("abc\ndef\t\nghi\njkl");
  writer.close();
}

Source File: TestHistograms.java From big-c with Apache License 2.0

5 votes

/**
 * @throws IOException
 * 
 *           There should be files in the directory named by
 *           ${test.build.data}/rumen/histogram-test .
 * 
 *           There will be pairs of files, inputXxx.json and goldXxx.json .
 * 
 *           We read the input file as a HistogramRawTestData in json. Then we
 *           create a Histogram using the data field, and then a
 *           LoggedDiscreteCDF using the percentiles and scale field. Finally,
 *           we read the corresponding goldXxx.json as a LoggedDiscreteCDF and
 *           deepCompare them.
 */
@Test
public void testHistograms() throws IOException {
  final Configuration conf = new Configuration();
  final FileSystem lfs = FileSystem.getLocal(conf);
  final Path rootInputDir = new Path(
      System.getProperty("test.tools.input.dir", "")).makeQualified(lfs);
  final Path rootInputFile = new Path(rootInputDir, "rumen/histogram-tests");


  FileStatus[] tests = lfs.listStatus(rootInputFile);

  for (int i = 0; i < tests.length; ++i) {
    Path filePath = tests[i].getPath();
    String fileName = filePath.getName();
    if (fileName.startsWith("input")) {
      String testName = fileName.substring("input".length());
      Path goldFilePath = new Path(rootInputFile, "gold"+testName);
      assertTrue("Gold file dies not exist", lfs.exists(goldFilePath));
      LoggedDiscreteCDF newResult = histogramFileToCDF(filePath, lfs);
      System.out.println("Testing a Histogram for " + fileName);
      FSDataInputStream goldStream = lfs.open(goldFilePath);
      JsonObjectMapperParser<LoggedDiscreteCDF> parser = new JsonObjectMapperParser<LoggedDiscreteCDF>(
          goldStream, LoggedDiscreteCDF.class); 
      try {
        LoggedDiscreteCDF dcdf = parser.getNext();
        dcdf.deepCompare(newResult, new TreePath(null, "<root>"));
      } catch (DeepInequalityException e) {
        fail(e.path.toString());
      }
      finally {
          parser.close();
      }
    }
  }
}

Source File: CommitSequenceTest.java From incubator-gobblin with Apache License 2.0

5 votes

@BeforeClass
public void setUp() throws IOException {
  this.fs = FileSystem.getLocal(new Configuration());

  this.fs.delete(new Path(ROOT_DIR), true);

  Path storeRootDir = new Path(ROOT_DIR, "store");

  Path dir1 = new Path(ROOT_DIR, "dir1");
  Path dir2 = new Path(ROOT_DIR, "dir2");

  this.fs.mkdirs(dir1);
  this.fs.mkdirs(dir2);

  Path src1 = new Path(dir1, "file1");
  Path src2 = new Path(dir2, "file2");
  Path dst1 = new Path(dir2, "file1");
  Path dst2 = new Path(dir1, "file2");
  this.fs.createNewFile(src1);
  this.fs.createNewFile(src2);

  DatasetState ds = new DatasetState("job-name", "job-id");
  ds.setDatasetUrn("urn");
  ds.setNoJobFailure();

  State state = new State();
  state.setProp(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY, storeRootDir.toString());

  this.sequence = new CommitSequence.Builder().withJobName("testjob").withDatasetUrn("testurn")
      .beginStep(FsRenameCommitStep.Builder.class).from(src1).to(dst1).withProps(state).endStep()
      .beginStep(FsRenameCommitStep.Builder.class).from(src2).to(dst2).withProps(state).endStep()
      .beginStep(DatasetStateCommitStep.Builder.class).withDatasetUrn("urn").withDatasetState(ds).withProps(state)
      .endStep().build();
}

Source File: LoopingDatasetFinderSourceTest.java From incubator-gobblin with Apache License 2.0

5 votes

@AfterClass
public void tearDown()
    throws IOException {
  FileSystem fs = FileSystem.getLocal(new Configuration(false));
  Path rootDir = new Path(TEST_STATE_STORE_ROOT_DIR);
  if (fs.exists(rootDir)) {
    fs.delete(rootDir, true);
  }
}

Source File: TestAvroStorage.java From spork with Apache License 2.0

5 votes

private void verifyResults(String outPath, String expectedOutpath, String expectedCodec) throws IOException {

        FileSystem fs = FileSystem.getLocal(new Configuration()) ;

        /* read in expected results*/
        Set<Object> expected = getExpected (expectedOutpath);

        /* read in output results and compare */
        Path output = new Path(outPath);
        assertTrue("Output dir does not exists!", fs.exists(output)
                && fs.getFileStatus(output).isDir());

        Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
        assertTrue("Split field dirs not found!", paths != null);

        for (Path path : paths) {
          Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
          assertTrue("No files found for path: " + path.toUri().getPath(),
                  files != null);
          for (Path filePath : files) {
            assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            GenericDatumReader<Object> reader = new GenericDatumReader<Object>();

            DataFileStream<Object> in = new DataFileStream<Object>(
                                            fs.open(filePath), reader);
            assertEquals("codec", expectedCodec, in.getMetaString("avro.codec"));
            int count = 0;
            while (in.hasNext()) {
                Object obj = in.next();
                //System.out.println("obj = " + (GenericData.Array<Float>)obj);
                assertTrue("Avro result object found that's not expected: " + obj, expected.contains(obj));
                count++;
            }
            in.close();
            assertEquals(expected.size(), count);
          }
        }
      }

Source File: NMLeveldbStateStoreService.java From hadoop with Apache License 2.0

5 votes

private Path createStorageDir(Configuration conf) throws IOException {
  final String storeUri = conf.get(YarnConfiguration.NM_RECOVERY_DIR);
  if (storeUri == null) {
    throw new IOException("No store location directory configured in " +
        YarnConfiguration.NM_RECOVERY_DIR);
  }

  Path root = new Path(storeUri, DB_NAME);
  FileSystem fs = FileSystem.getLocal(conf);
  fs.mkdirs(root, new FsPermission((short)0700));
  return root;
}

Source File: TestDelegationTokenRemoteFetcher.java From hadoop with Apache License 2.0

5 votes

@Before
public void init() throws Exception {
  conf = new Configuration();
  fileSys = FileSystem.getLocal(conf);
  httpPort = NetUtils.getFreeSocketPort();
  serviceUrl = new URI("http://localhost:" + httpPort);
  testToken = createToken(serviceUrl);
}

Source File: TestMROutput.java From tez with Apache License 2.0

5 votes

public static LogicalIOProcessorRuntimeTask createLogicalTask(
    Configuration conf,
    TezUmbilical umbilical, String dagName,
    String vertexName, TezExecutors sharedExecutor) throws Exception {
  ProcessorDescriptor procDesc = ProcessorDescriptor.create(TestProcessor.class.getName());
  List<InputSpec> inputSpecs = Lists.newLinkedList();
  List<OutputSpec> outputSpecs = Lists.newLinkedList();
  outputSpecs.add(new OutputSpec("Null",
      MROutput.createConfigBuilder(conf, TestOutputFormat.class).build().getOutputDescriptor(), 1));
  
  TaskSpec taskSpec = new TaskSpec(
      TezTestUtils.getMockTaskAttemptId(0, 0, 0, 0),
      dagName, vertexName, -1,
      procDesc,
      inputSpecs,
      outputSpecs, null, null);

  FileSystem fs = FileSystem.getLocal(conf);
  Path workDir =
      new Path(new Path(System.getProperty("test.build.data", "/tmp")),
               "TestMapOutput").makeQualified(fs.getUri(), fs.getWorkingDirectory());

  return new LogicalIOProcessorRuntimeTask(
      taskSpec,
      0,
      conf,
      new String[] {workDir.toString()},
      umbilical,
      null,
      new HashMap<String, String>(),
      HashMultimap.<String, String>create(), null, "", new ExecutionContextImpl("localhost"),
      Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor);
}

Source File: UpgradeUtilities.java From RDFS with Apache License 2.0

5 votes

public static void createFederatedNameNodeStorageDirs(String[] parents) 
    throws Exception {
  LocalFileSystem localFS = FileSystem.getLocal(new Configuration());
  for (int i = 0; i < parents.length; i++) {
    File newDir = new File(parents[i]);
    createEmptyDirs(new String[] {newDir.toString()});
    localFS.copyToLocalFile(new Path(namenodeStorage.toString()),
        new Path(newDir.toString()),
        false);
  }
}

Source File: IsolationRunner.java From RDFS with Apache License 2.0

4 votes

/**
 * Run a single task
 * @param args the first argument is the task directory
 */
public static void main(String[] args
                        ) throws ClassNotFoundException, IOException, 
                                 InterruptedException {
  if (args.length != 1) {
    System.out.println("Usage: IsolationRunner <path>/job.xml");
    System.exit(1);
  }
  File jobFilename = new File(args[0]);
  if (!jobFilename.exists() || !jobFilename.isFile()) {
    System.out.println(jobFilename + " is not a valid job file.");
    System.exit(1);
  }
  JobConf conf = new JobConf(new Path(jobFilename.toString()));
  TaskAttemptID taskId = TaskAttemptID.forName(conf.get("mapred.task.id"));
  boolean isMap = conf.getBoolean("mapred.task.is.map", true);
  int partition = conf.getInt("mapred.task.partition", 0);
  
  // setup the local and user working directories
  FileSystem local = FileSystem.getLocal(conf);
  LocalDirAllocator lDirAlloc = new LocalDirAllocator("mapred.local.dir");
  File workDirName = new File(lDirAlloc.getLocalPathToRead(
                                TaskTracker.getLocalTaskDir(
                                  taskId.getJobID().toString(), 
                                  taskId.toString())
                                + Path.SEPARATOR + "work",
                                conf). toString());
  local.setWorkingDirectory(new Path(workDirName.toString()));
  FileSystem.get(conf).setWorkingDirectory(conf.getWorkingDirectory());
  
  // set up a classloader with the right classpath
  ClassLoader classLoader = makeClassLoader(conf, workDirName);
  Thread.currentThread().setContextClassLoader(classLoader);
  conf.setClassLoader(classLoader);
  
  Task task;
  if (isMap) {
    Path localSplit = new Path(new Path(jobFilename.toString()).getParent(), 
                               "split.dta");
    DataInputStream splitFile = FileSystem.getLocal(conf).open(localSplit);
    String splitClass = Text.readString(splitFile);
    BytesWritable split = new BytesWritable();
    split.readFields(splitFile);
    splitFile.close();
    task = new MapTask(jobFilename.toString(), taskId, partition, 
                       splitClass, split, 1, conf.getUser());
  } else {
    int numMaps = conf.getNumMapTasks();
    fillInMissingMapOutputs(local, taskId, numMaps, conf);
    task = new ReduceTask(jobFilename.toString(), taskId, partition, numMaps, 
                          1, conf.getUser());
  }
  task.setConf(conf);
  task.run(conf, new FakeUmbilical());
}

Source File: TestMerger.java From big-c with Apache License 2.0

4 votes

@Before
public void setup() throws IOException {
  conf = new Configuration();
  jobConf = new JobConf();
  fs = FileSystem.getLocal(conf);
}

Source File: BenchmarkThroughput.java From hadoop-gpu with Apache License 2.0

4 votes

public int run(String[] args) throws IOException {
  // silence the minidfs cluster
  Log hadoopLog = LogFactory.getLog("org");
  if (hadoopLog instanceof Log4JLogger) {
    ((Log4JLogger) hadoopLog).getLogger().setLevel(Level.WARN);
  }
  int reps = 1;
  if (args.length == 1) {
    try {
      reps = Integer.parseInt(args[0]);
    } catch (NumberFormatException e) {
      printUsage();
      return -1;
    }
  } else if (args.length > 1) {
    printUsage();
    return -1;
  }
  Configuration conf = getConf();
  // the size of the file to write
  long SIZE = conf.getLong("dfsthroughput.file.size",
      10L * 1024 * 1024 * 1024);
  BUFFER_SIZE = conf.getInt("dfsthroughput.buffer.size", 4 * 1024);

  String localDir = conf.get("mapred.temp.dir");
  dir = new LocalDirAllocator("mapred.temp.dir");

  System.setProperty("test.build.data", localDir);
  System.out.println("Local = " + localDir);
  ChecksumFileSystem checkedLocal = FileSystem.getLocal(conf);
  FileSystem rawLocal = checkedLocal.getRawFileSystem();
  for(int i=0; i < reps; ++i) {
    writeAndReadLocalFile("local", conf, SIZE);
    writeAndReadFile(rawLocal, "raw", conf, SIZE);
    writeAndReadFile(checkedLocal, "checked", conf, SIZE);
  }
  MiniDFSCluster cluster = null;
  try {
    cluster = new MiniDFSCluster(conf, 1, true, new String[]{"/foo"});
    cluster.waitActive();
    FileSystem dfs = cluster.getFileSystem();
    for(int i=0; i < reps; ++i) {
      writeAndReadFile(dfs, "dfs", conf, SIZE);
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
      // clean up minidfs junk
      rawLocal.delete(new Path(localDir, "dfs"), true);
    }
  }
  return 0;
}

Source File: TestInputFormatColumnProjection.java From parquet-mr with Apache License 2.0

4 votes

@Test
public void testProjectionSize() throws Exception {
  Assume.assumeTrue( // only run this test for Hadoop 2
      org.apache.hadoop.mapreduce.JobContext.class.isInterface());

  File inputFile = temp.newFile();
  FileOutputStream out = new FileOutputStream(inputFile);
  out.write(FILE_CONTENT.getBytes("UTF-8"));
  out.close();

  File tempFolder = temp.newFolder();
  tempFolder.delete();
  Path tempPath = new Path(tempFolder.toURI());

  File outputFolder = temp.newFile();
  outputFolder.delete();

  Configuration conf = new Configuration();
  // set the projection schema
  conf.set("parquet.read.schema", Types.buildMessage()
      .required(BINARY).as(UTF8).named("char")
      .named("FormatTestObject").toString());

  // disable summary metadata, it isn't needed
  conf.set("parquet.enable.summary-metadata", "false");
  conf.set("parquet.example.schema", PARQUET_TYPE.toString());

  {
    Job writeJob = new Job(conf, "write");
    writeJob.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(writeJob, new Path(inputFile.toString()));

    writeJob.setOutputFormatClass(ExampleOutputFormat.class);
    writeJob.setMapperClass(Writer.class);
    writeJob.setNumReduceTasks(0); // write directly to Parquet without reduce
    ParquetOutputFormat.setBlockSize(writeJob, 10240);
    ParquetOutputFormat.setPageSize(writeJob, 512);
    ParquetOutputFormat.setDictionaryPageSize(writeJob, 1024);
    ParquetOutputFormat.setEnableDictionary(writeJob, true);
    ParquetOutputFormat.setMaxPaddingSize(writeJob, 1023); // always pad
    ParquetOutputFormat.setOutputPath(writeJob, tempPath);

    waitForJob(writeJob);
  }

  long bytesWritten = 0;
  FileSystem fs = FileSystem.getLocal(conf);
  for (FileStatus file : fs.listStatus(tempPath)) {
    bytesWritten += file.getLen();
  }

  long bytesRead;
  {
    Job readJob = new Job(conf, "read");
    readJob.setInputFormatClass(ExampleInputFormat.class);
    TextInputFormat.addInputPath(readJob, tempPath);

    readJob.setOutputFormatClass(TextOutputFormat.class);
    readJob.setMapperClass(Reader.class);
    readJob.setNumReduceTasks(0); // no reduce phase
    TextOutputFormat.setOutputPath(readJob, new Path(outputFolder.toString()));

    waitForJob(readJob);

    bytesRead = Reader.bytesReadCounter.getValue();
  }

  Assert.assertTrue("Should read less than 10% of the input file size",
      bytesRead < (bytesWritten / 10));
}

Source File: MockRemoteDirectoryManager.java From submarine with Apache License 2.0

4 votes

@Override
public FileSystem getDefaultFileSystem() throws IOException {
  return FileSystem.getLocal(new Configuration());
}

Source File: AvroTestToolsTest.java From incubator-gobblin with Apache License 2.0

4 votes

@Test
public void test() throws Exception {
  DataTestTools testTools = new AvroTestTools();

  String resourceName = "avroWriterTest";

  File tmpDir = Files.createTempDir();

  FileSystem fs = FileSystem.getLocal(new Configuration());
  Path output = new Path(tmpDir.getAbsolutePath(), "test");

  testTools.writeJsonResourceRecordsAsBinary(resourceName, fs, output, null);

  Assert.assertTrue(testTools.checkSameFilesAndRecords(testTools.readAllRecordsInJsonResource(resourceName, null),
      testTools.readAllRecordsInBinaryDirectory(fs, output), false, null, true));
}

Source File: LocalFSContract.java From hadoop with Apache License 2.0

2 votes

/**
 * Get the local filesystem. This may be overridden
 * @return the filesystem
 * @throws IOException
 */
protected FileSystem getLocalFS() throws IOException {
  return FileSystem.getLocal(getConf());
}

Java Code Examples for org.apache.hadoop.fs.FileSystem#getLocal()