org.apache.hadoop.mapreduce.JobContext Java Exaples

Source File: HadoopInputFormatBase.java From flink with Apache License 2.0

6 votes

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	jobContext.getCredentials().addAll(this.credentials);
	Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
	if (currentUserCreds != null) {
		jobContext.getCredentials().addAll(currentUserCreds);
	}

	List<org.apache.hadoop.mapreduce.InputSplit> splits;
	try {
		splits = this.mapreduceInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++) {
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}

Source File: DatasetKeyOutputFormat.java From kite with Apache License 2.0

6 votes

/**
 * The job dataset may already exist if the ApplicationMaster was restarted
 */
@SuppressWarnings("unchecked")
private static <E> Dataset<E> loadOrCreateJobDataset(JobContext jobContext) {
  Dataset<Object> dataset = load(jobContext).getDataset();
  String jobDatasetName = getJobDatasetName(jobContext);
  DatasetRepository repo = getDatasetRepository(jobContext);
  if (repo.exists(TEMP_NAMESPACE, jobDatasetName)) {
    Dataset<E> tempDataset = repo.load(TEMP_NAMESPACE, jobDatasetName,
      DatasetKeyOutputFormat.<E>getType(jobContext));
    try {
      Compatibility.checkCompatible(dataset.getDescriptor(),
        tempDataset.getDescriptor());
      return tempDataset;
    } catch (RuntimeException ex) {
      // swallow
    }
  }

  return repo.create(TEMP_NAMESPACE, jobDatasetName,
      copy(dataset.getDescriptor()),
      DatasetKeyOutputFormat.<E>getType(jobContext));
}

Source File: PrunedSequenceFileInputFormat.java From incubator-retired-blur with Apache License 2.0

6 votes

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> splits = super.getSplits(job);
  List<InputSplit> results = new ArrayList<InputSplit>();
  Configuration configuration = job.getConfiguration();
  String table = InputSplitPruneUtil.getTable(configuration);
  for (InputSplit inputSplit : splits) {
    FileSplit fileSplit = (FileSplit) inputSplit;
    Path path = fileSplit.getPath();
    LOG.debug("Getting shard index from path [" + path + "]");
    String name = path.getName();
    int shard = getShardIndex(name);
    long rowIdUpdateFromNewDataCount = InputSplitPruneUtil.getBlurLookupRowIdUpdateFromNewDataCount(configuration,
        table, shard);
    long indexCount = InputSplitPruneUtil.getBlurLookupRowIdFromIndexCount(configuration, table, shard);
    if (rowIdUpdateFromNewDataCount == 0 || indexCount == 0) {
      LOG.debug("Pruning id lookup input path [" + path + "] no overlapping ids.");
    } else if (InputSplitPruneUtil.shouldLookupExecuteOnShard(configuration, table, shard)) {
      LOG.debug("Keeping id lookup input path [" + path + "]");
      results.add(inputSplit);
    } else {
      LOG.debug("Pruning id lookup input path [" + path + "]");
    }
  }
  return results;
}

Source File: GenerateData.java From RDFS with Apache License 2.0

6 votes

@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}

Source File: GeoWaveInputFormat.java From geowave with Apache License 2.0

6 votes

/**
 * Check whether a configuration is fully configured to be used with an Accumulo
 * {@link org.apache.hadoop.mapreduce.InputFormat}.
 *
 * @param context the Hadoop context for the configured job
 * @throws IOException if the context is improperly configured
 * @since 1.5.0
 */
protected static void validateOptions(final JobContext context) throws IOException { // attempt to
  // get each
  // of the
  // GeoWave
  // stores
  // from the job context
  try {
    final Map<String, String> configOptions = getStoreOptionsMap(context);
    final StoreFactoryFamilySpi factoryFamily = GeoWaveStoreFinder.findStoreFamily(configOptions);
    if (factoryFamily == null) {
      final String msg = "Unable to find GeoWave data store";
      LOGGER.warn(msg);
      throw new IOException(msg);
    }
  } catch (final Exception e) {
    LOGGER.warn("Error finding GeoWave stores", e);
    throw new IOException("Error finding GeoWave stores", e);
  }
}

Source File: TestMRCJCFileInputFormat.java From hadoop with Apache License 2.0

5 votes

@Test
@SuppressWarnings({ "rawtypes", "unchecked" })
public void testLastInputSplitSingleSplit() throws Exception {
  FileInputFormat fif = new FileInputFormatForTest(100l * 1024 * 1024,
      128l * 1024 * 1024);
  Configuration conf = new Configuration();
  JobContext jobContext = mock(JobContext.class);
  when(jobContext.getConfiguration()).thenReturn(conf);
  List<InputSplit> splits = fif.getSplits(jobContext);
  assertEquals(1, splits.size());
  for (int i = 0; i < splits.size(); i++) {
    InputSplit split = splits.get(i);
    assertEquals(("host" + i), split.getLocations()[0]);
  }
}

Source File: NMapInputFormat.java From HBase-ToHDFS with Apache License 2.0

5 votes

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException,
    InterruptedException {
  int count = getNumMapTasks(context.getConfiguration());
  List<InputSplit> splits = new ArrayList<InputSplit>(count);
  for (int i = 0; i < count; i++) {
    splits.add(new NullInputSplit());
  }
  return splits;
}

Source File: GeoWaveConfiguratorBase.java From geowave with Apache License 2.0

5 votes

public static final <T> T getInstance(
    final Class<?> implementingClass,
    final Enum<?> e,
    final JobContext context,
    final Class<T> interfaceClass,
    final Class<? extends T> defaultClass) throws InstantiationException, IllegalAccessException {
  return getConfiguration(context).getClass(
      enumToConfKey(implementingClass, e),
      defaultClass,
      interfaceClass).newInstance();
}

Source File: SequenceFileAsBinaryOutputFormat.java From hadoop with Apache License 2.0

5 votes

@Override 
public void checkOutputSpecs(JobContext job) throws IOException {
  super.checkOutputSpecs(job);
  if (getCompressOutput(job) && 
      getOutputCompressionType(job) == CompressionType.RECORD ) {
    throw new InvalidJobConfException("SequenceFileAsBinaryOutputFormat "
      + "doesn't support Record Compression" );
  }
}

Source File: HadoopOutputFormatTest.java From flink with Apache License 2.0

5 votes

@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	OutputCommitter outputCommitter = setupOutputCommitter(true);
	when(dummyOutputFormat.getOutputCommitter(any(TaskAttemptContext.class))).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> hadoopOutputFormat = setupHadoopOutputFormat(dummyOutputFormat,
		Job.getInstance(), new DummyRecordWriter(), setupOutputCommitter(true), new Configuration());

	hadoopOutputFormat.open(1, 4);

	verify(hadoopOutputFormat.outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(hadoopOutputFormat.mapreduceOutputFormat, times(1)).getRecordWriter(any(TaskAttemptContext.class));
}

Source File: FileAndDirectoryInputFormat.java From marklogic-contentpump with Apache License 2.0

5 votes

protected List<FileStatus> listStatus(JobContext job
        ) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, 
            job.getConfiguration());

    // Whether we need to recursive look into the directory structure
    boolean recursive = getInputDirRecursive(job);

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    List<FileStatus> result = simpleListStatus(job, dirs, inputFilter, recursive);     

    LOG.info("Total input paths to process : " + result.size()); 
    return result;
}

Source File: AccumuloHDFSFileInputFormat.java From rya with Apache License 2.0

5 votes

@Override
public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
    //read the params from AccumuloInputFormat
    Configuration conf = jobContext.getConfiguration();
    Instance instance = MRUtils.AccumuloProps.getInstance(jobContext);
    String user = MRUtils.AccumuloProps.getUsername(jobContext);
    AuthenticationToken password = MRUtils.AccumuloProps.getPassword(jobContext);
    String table = MRUtils.AccumuloProps.getTablename(jobContext);
    ArgumentChecker.notNull(instance);
    ArgumentChecker.notNull(table);

    //find the files necessary
    try {
        Connector connector = instance.getConnector(user, password);
        TableOperations tos = connector.tableOperations();
        String tableId = tos.tableIdMap().get(table);
        Scanner scanner = connector.createScanner("accumulo.metadata", Authorizations.EMPTY); //TODO: auths?
        scanner.setRange(new Range(new Text(tableId + "\u0000"), new Text(tableId + "\uFFFD")));
        scanner.fetchColumnFamily(new Text("file"));
        List<String> files = new ArrayList<String>();
        List<InputSplit> fileSplits = new ArrayList<InputSplit>();
        for (Map.Entry<Key, Value> entry : scanner) {
            String file = entry.getKey().getColumnQualifier().toString();
            Path path = new Path(file);
            FileSystem fs = path.getFileSystem(conf);
            FileStatus fileStatus = fs.getFileStatus(path);
            long len = fileStatus.getLen();
            BlockLocation[] fileBlockLocations = fs.getFileBlockLocations(fileStatus, 0, len);
            files.add(file);
            fileSplits.add(new FileSplit(path, 0, len, fileBlockLocations[0].getHosts()));
        }
        System.out.println(files);
        return fileSplits;
    } catch (Exception e) {
        throw new IOException(e);
    }
}

Source File: DatasetKeyOutputFormat.java From kite with Apache License 2.0

5 votes

private static DatasetRepository getDatasetRepository(JobContext jobContext) {
  Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
  DatasetRepository repo = DatasetRepositories.repositoryFor(conf.get(KITE_OUTPUT_URI));
  if (repo instanceof TemporaryDatasetRepositoryAccessor) {
    Dataset<Object> dataset = load(jobContext).getDataset();
    String namespace = dataset.getNamespace();
    repo = ((TemporaryDatasetRepositoryAccessor) repo)
        .getTemporaryRepository(namespace, getJobDatasetName(jobContext));
  }
  return repo;
}

Source File: S3DirectoryOutputCommitter.java From s3committer with Apache License 2.0

5 votes

@Override
public void commitJob(JobContext context) throws IOException {
  Path outputPath = getOutputPath(context);
  // use the FS implementation because it will check for _$folder$
  FileSystem fs = outputPath.getFileSystem(context.getConfiguration());
  if (fs.exists(outputPath)) {
    switch (getMode(context)) {
      case FAIL:
        // this was checked in setupJob, but this avoids some cases where
        // output was created while the job was processing
        throw new AlreadyExistsException(
            "Output path already exists: " + outputPath);
      case APPEND:
        // do nothing
        break;
      case REPLACE:
        LOG.info("Removing output path to be replaced: " + outputPath);
        if (!fs.delete(outputPath, true /* recursive */ )) {
          throw new IOException(
              "Failed to delete existing output directory for replace:" +
              outputPath);
        }
        break;
      default:
        throw new RuntimeException(
            "Unknown conflict resolution mode: " + getMode(context));
    }
  }

  super.commitJob(context);
}

Source File: FileOutputCommitter.java From big-c with Apache License 2.0

5 votes

/**
 * Delete the temporary directory, including all of the work directories.
 * @param context the job's context
 */
@Override
public void abortJob(JobContext context, JobStatus.State state) 
throws IOException {
  // delete the _temporary folder
  cleanupJob(context);
}

Source File: HadoopOutputFormatTest.java From flink with Apache License 2.0

5 votes

@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	OutputCommitter outputCommitter = setupOutputCommitter(true);
	when(dummyOutputFormat.getOutputCommitter(any(TaskAttemptContext.class))).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> hadoopOutputFormat = setupHadoopOutputFormat(dummyOutputFormat,
		Job.getInstance(), new DummyRecordWriter(), setupOutputCommitter(true), new Configuration());

	hadoopOutputFormat.open(1, 4);

	verify(hadoopOutputFormat.outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(hadoopOutputFormat.mapreduceOutputFormat, times(1)).getRecordWriter(any(TaskAttemptContext.class));
}

Source File: TestJobImpl.java From big-c with Apache License 2.0

5 votes

@Test(timeout=20000)
public void testKilledDuringKillAbort() throws Exception {
  Configuration conf = new Configuration();
  conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
  AsyncDispatcher dispatcher = new AsyncDispatcher();
  dispatcher.init(conf);
  dispatcher.start();
  OutputCommitter committer = new StubbedOutputCommitter() {
    @Override
    public synchronized void abortJob(JobContext jobContext, State state)
        throws IOException {
      while (!Thread.interrupted()) {
        try {
          wait();
        } catch (InterruptedException e) {
        }
      }
    }
  };
  CommitterEventHandler commitHandler =
      createCommitterEventHandler(dispatcher, committer);
  commitHandler.init(conf);
  commitHandler.start();

  JobImpl job = createStubbedJob(conf, dispatcher, 2, null);
  JobId jobId = job.getID();
  job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
  assertJobState(job, JobStateInternal.INITED);
  job.handle(new JobStartEvent(jobId));
  assertJobState(job, JobStateInternal.SETUP);

  job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
  assertJobState(job, JobStateInternal.KILL_ABORT);

  job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
  assertJobState(job, JobStateInternal.KILLED);
  dispatcher.stop();
  commitHandler.stop();
}

Source File: IcebergPigInputFormat.java From iceberg with Apache License 2.0

5 votes

@Override
@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext context) throws IOException {
  if (splits != null) {
    LOG.info("Returning cached splits: {}", splits.size());
    return splits;
  }

  splits = Lists.newArrayList();

  TableScan scan = table.newScan();

  //Apply Filters
  Expression filterExpression =
      (Expression) ObjectSerializer.deserialize(context.getConfiguration().get(scope(ICEBERG_FILTER_EXPRESSION)));
  LOG.info("[{}]: iceberg filter expressions: {}", signature, filterExpression);

  if (filterExpression != null) {
    LOG.info("Filter Expression: {}", filterExpression);
    scan = scan.filter(filterExpression);
  }

  //Wrap in Splits
  try (CloseableIterable<CombinedScanTask> tasks = scan.planTasks()) {
    tasks.forEach(scanTask -> splits.add(new IcebergSplit(scanTask)));
  }

  return splits;
}

Source File: TestFileOutputCommitter.java From hadoop with Apache License 2.0

5 votes

private void testMapFileOutputCommitterInternal(int version)
    throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
      version);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());    
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeMapFileOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: RegionSplitsIT.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Test
public void testGetSplits() throws Exception{

    SMInputFormat smInputFormat = new SMInputFormat();
    final Configuration conf=new Configuration(HConfiguration.unwrapDelegate());
    conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR, FakeOutputFormat.class,FakeOutputFormat.class);
    conf.setInt(MRConstants.SPLICE_SPLITS_PER_TABLE, 8);
    // Get splits for the SYSCOLUMNS table.
    String tableName = format("%s.%s", SCHEMA_NAME, TABLE1_NAME);
    conf.set(MRConstants.SPLICE_INPUT_TABLE_NAME, tableName);
    long conglomId = spliceClassWatcher.getConglomId(TABLE1_NAME, SCHEMA_NAME);
    String conglomAsString = format("%d", conglomId);
    conf.set(MRConstants.SPLICE_INPUT_CONGLOMERATE, conglomAsString);
    String jdbcString = "jdbc:splice://localhost:1527/splicedb;user=splice;password=admin";
    conf.set(MRConstants.SPLICE_JDBC_STR, jdbcString);

    SMSQLUtil util = SMSQLUtil.getInstance(jdbcString);
    List<String> columns = new ArrayList<>();
    columns.add("I");
    conf.set(MRConstants.SPLICE_SCAN_INFO, util.getTableScannerBuilder(tableName, columns).base64Encode());
    smInputFormat.setConf(conf);
    JobContext ctx = new JobContextImpl(conf,new JobID("test",1));
    List<InputSplit> splits = smInputFormat.getSplits(ctx);

    LOG.info("Got "+splits.size() + " splits");
    assertTrue(format("Expected between 6 and 10 splits, got %d.", splits.size()),
            splits.size() >= 6 && splits.size() <= 10);

}

Source File: DatasetKeyOutputFormat.java From kite with Apache License 2.0

5 votes

@Override
@SuppressWarnings("unchecked")
public void commitJob(JobContext jobContext) throws IOException {
  Configuration conf = Hadoop.JobContext
      .getConfiguration.invoke(jobContext);
  DatasetRepository repo = getDatasetRepository(jobContext);
  boolean isTemp = repo instanceof TemporaryDatasetRepository;

  String jobDatasetName = getJobDatasetName(jobContext);
  View<E> targetView = load(jobContext);
  Dataset<E> jobDataset = repo.load(TEMP_NAMESPACE, jobDatasetName);
  WriteMode mode = conf.getEnum(KITE_WRITE_MODE, WriteMode.DEFAULT);
  if (mode == WriteMode.OVERWRITE && canReplace(targetView)) {
    ((Replaceable<View<E>>) targetView.getDataset()).replace(targetView, jobDataset);
  } else {
    ((Mergeable<Dataset<E>>) targetView.getDataset()).merge(jobDataset);
  }

  if (targetView instanceof Signalable) {
    ((Signalable)targetView).signalReady();
  }

  if (isTemp) {
    ((TemporaryDatasetRepository) repo).delete();
  } else {
    repo.delete(TEMP_NAMESPACE, jobDatasetName);
  }
}

Source File: PhoenixInputFormat.java From phoenix with Apache License 2.0

5 votes

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {  
    final Configuration configuration = context.getConfiguration();
    final QueryPlan queryPlan = getQueryPlan(context,configuration);
    final List<KeyRange> allSplits = queryPlan.getSplits();
    final List<InputSplit> splits = generateSplits(queryPlan,allSplits);
    return splits;
}

Source File: InputRDDFormat.java From tinkerpop with Apache License 2.0

5 votes

@Override
public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException {
    return Collections.singletonList(new InputSplit() {
        @Override
        public long getLength() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public String[] getLocations() throws IOException, InterruptedException {
            return new String[0];
        }
    });
}

Source File: TestFileOutputCommitter.java From hadoop with Apache License 2.0

5 votes

private void testCommitterInternal(int version) throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
      version);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  validateContent(outDir);
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: CombineFileInputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

@Override
protected boolean isSplitable(JobContext context, Path file) {
  final CompressionCodec codec =
    new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
  if (null == codec) {
    return true;
  }

  // Once we remove support for Hadoop < 2.0
  //return codec instanceof SplittableCompressionCodec;
  return false;
}

Source File: YarnOutputFiles.java From hadoop with Apache License 2.0

5 votes

/**
 * Create a local map output file name on the same volume.
 */
public Path getOutputFileForWriteInVolume(Path existing) {
  Path outputDir = new Path(existing.getParent(), JOB_OUTPUT_DIR);
  Path attemptOutputDir = new Path(outputDir,
      conf.get(JobContext.TASK_ATTEMPT_ID));
  return new Path(attemptOutputDir, MAP_OUTPUT_FILENAME_STRING);
}

Source File: DistSum.java From hadoop with Apache License 2.0

5 votes

/** @return a list containing a single split of summation */
@Override
public List<InputSplit> getSplits(JobContext context) {
  //read sigma from conf
  final Configuration conf = context.getConfiguration();
  final Summation sigma = SummationWritable.read(DistSum.class, conf); 
  
  //create splits
  final List<InputSplit> splits = new ArrayList<InputSplit>(1);
  splits.add(new SummationSplit(sigma));
  return splits;
}

Source File: SSTableInputFormat.java From hadoop-sstable with Apache License 2.0

5 votes

@Override
public List<InputSplit> getSplits(final JobContext job) throws IOException {
    final Configuration configuration = job.getConfiguration();

    final List<InputSplit> result = Lists.newArrayList();

    final List<FileStatus> files = listStatus(job);

    LOG.debug("Initial file list: {} {}", files.size(), files);

    for (final FileStatus fileStatus : files) {
        final Path dataFile = fileStatus.getPath();
        final FileSystem fileSystem = dataFile.getFileSystem(configuration);
        final BlockLocation[] blockLocations = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());

        // Data file, try to split if the .index file was found
        final SSTableIndexIndex index = indexes.get(dataFile);
        if (index == null) {
            throw new IOException("Index not found for " + dataFile);
        }

        for (final SSTableIndexIndex.Chunk chunk : index.getOffsets()) {
            // This isn't likely to work well because we are dealing with the index into uncompressed data...
            final int blockIndex = getBlockIndex(blockLocations, chunk.getStart() / COMPRESSION_RATIO_ASSUMPTION);
            final SSTableSplit split = new SSTableSplit(dataFile, chunk.getStart(), chunk.getEnd(),
                    chunk.getEnd() - chunk.getStart(), blockLocations[blockIndex].getHosts());
            result.add(split);
        }
    }

    LOG.debug("Splits calculated: {} {}", result.size(), result);

    return result;
}

Source File: AbstractSMInputFormat.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private List<InputSplit> getInputSplitsFromCache(JobContext context) {
    if (inputSplits != null) {
        return inputSplits;
    }

    String splitCacheId = context.getConfiguration().get(MRConstants.SPLICE_SCAN_INPUT_SPLITS_ID);
    if (StringUtils.isNotEmpty(splitCacheId)) {
        if (FetchSplitsJob.splitCache.containsKey(splitCacheId)) {
            Future<List<InputSplit>> cachedSplitsFuture = FetchSplitsJob.splitCache.get(splitCacheId);
            List<InputSplit> cachedSplits = null;
            if (cachedSplitsFuture != null) {
                try {
                    cachedSplits = cachedSplitsFuture.get();
                } catch (ExecutionException | InterruptedException e) {
                    throw new RuntimeException(e.getMessage(), e);
                }
            }
            FetchSplitsJob.splitCache.remove(splitCacheId);
            if (cachedSplits != null) {
                inputSplits = cachedSplits;
                return cachedSplits;
            }
        }
    }

    return null;
}

Source File: TestJobImpl.java From hadoop with Apache License 2.0

5 votes

@Test (timeout=10000)
public void testFailAbortDoesntHang() throws IOException {
  Configuration conf = new Configuration();
  conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
  conf.set(MRJobConfig.MR_AM_COMMITTER_CANCEL_TIMEOUT_MS, "1000");
  
  DrainDispatcher dispatcher = new DrainDispatcher();
  dispatcher.init(conf);
  dispatcher.start();
  OutputCommitter committer = Mockito.mock(OutputCommitter.class);
  CommitterEventHandler commitHandler =
      createCommitterEventHandler(dispatcher, committer);
  commitHandler.init(conf);
  commitHandler.start();
  //Job has only 1 mapper task. No reducers
  conf.setInt(MRJobConfig.NUM_REDUCES, 0);
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1);
  JobImpl job = createRunningStubbedJob(conf, dispatcher, 1, null);

  //Fail / finish all the tasks. This should land the JobImpl directly in the
  //FAIL_ABORT state
  for(Task t: job.tasks.values()) {
    TaskImpl task = (TaskImpl) t;
    task.handle(new TaskEvent(task.getID(), TaskEventType.T_SCHEDULE));
    for(TaskAttempt ta: task.getAttempts().values()) {
      task.handle(new TaskTAttemptEvent(ta.getID(),
        TaskEventType.T_ATTEMPT_FAILED));
    }
  }

  dispatcher.await();
  //Verify abortJob is called once and the job failed
  Mockito.verify(committer, Mockito.timeout(2000).times(1))
    .abortJob((JobContext) Mockito.any(), (State) Mockito.any());
  assertJobState(job, JobStateInternal.FAILED);

  dispatcher.stop();
}

org.apache.hadoop.mapreduce.JobContext Java Examples