org.apache.hadoop.mapreduce.Mapper Java Exaples

Source File: AbstractReasoningTool.java From rya with Apache License 2.0

6 votes

/**
 * Set up the MapReduce job to use as inputs both an Accumulo table and the
 * files containing previously derived information. Looks for a file for
 * every iteration number so far, preferring final cleaned up output from
 * that iteration but falling back on intermediate data if necessary.
 * @param tableMapper   Mapper class to use for database input
 * @param rdfMapper     Mapper class to use for direct RDF input
 * @param fileMapper    Mapper class to use for derived triples input
 * @param incMapper     Mapper class to use for derived inconsistencies input
 * @param filter        True to exclude previously derived data that couldn't be
 *                      used to derive anything new at this point.
 */
protected void configureMultipleInput(
        Class<? extends Mapper<Key, Value, ?, ?>> tableMapper,
        Class<? extends Mapper<LongWritable, RyaStatementWritable, ?, ?>> rdfMapper,
        Class<? extends Mapper<Fact, NullWritable, ?, ?>> fileMapper,
        Class<? extends Mapper<Derivation, NullWritable, ?, ?>> incMapper,
        boolean filter)
        throws IOException, AccumuloSecurityException {
    Path inputPath = MRReasoningUtils.getInputPath(job.getConfiguration());
    if (inputPath != null) {
        configureRdfInput(inputPath, rdfMapper);
    }
    else {
        configureAccumuloInput(tableMapper);
    }
    configureFileInput(fileMapper, incMapper, filter);
}

Source File: JobConfiguration.java From secure-data-service with Apache License 2.0

6 votes

@SuppressWarnings("rawtypes")
public static
Class<? extends Mapper> getMapClass(mapper m) {
    Class<? extends Mapper> rval = null;

    switch(m) {
        case IDMapper:
            rval = tmp.getClass();
        break;
        case StringMapper:
            rval = org.slc.sli.aggregation.mapreduce.map.StringValueMapper.class;
        break;
        case LongMapper:
            rval = org.slc.sli.aggregation.mapreduce.map.LongValueMapper.class;
        break;
        case DoubleMapper:
            rval = org.slc.sli.aggregation.mapreduce.map.DoubleValueMapper.class;
        break;
        case EnumMapper:
            rval = org.slc.sli.aggregation.mapreduce.map.EnumValueMapper.class;
        break;
    }
    return rval;
}

Source File: TestCopyMapper.java From hadoop with Apache License 2.0

6 votes

private void testCopyingExistingFiles(FileSystem fs, CopyMapper copyMapper,
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context) {
  try {
    for (Path path : pathList) {
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
              new CopyListingFileStatus(fs.getFileStatus(path)), context);
    }

    Assert.assertEquals(nFiles,
            context.getCounter(CopyMapper.Counter.SKIP).getValue());
  }
  catch (Exception exception) {
    Assert.assertTrue("Caught unexpected exception:" + exception.getMessage(),
            false);
  }
}

Source File: TestChainErrors.java From hadoop with Apache License 2.0

6 votes

/**
 * Tests one of the mappers throwing exception.
 * 
 * @throws Exception
 */
public void testChainFail() throws Exception {

  Configuration conf = createJobConf();

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input);
  job.setJobName("chain");

  ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  ChainMapper.addMapper(job, FailMap.class, LongWritable.class, Text.class,
      IntWritable.class, Text.class, null);

  ChainMapper.addMapper(job, Mapper.class, IntWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  job.waitForCompletion(true);
  assertTrue("Job Not failed", !job.isSuccessful());
}

Source File: JdbcExportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0

6 votes

@Override
protected Class<? extends Mapper> getMapperClass() {
  if (isHCatJob) {
    return SqoopHCatUtilities.getExportMapperClass();
  }
  if (options.getOdpsTable() != null) {
    return OdpsExportMapper.class;
  }
  switch (fileType) {
    case SEQUENCE_FILE:
      return SequenceFileExportMapper.class;
    case AVRO_DATA_FILE:
      return AvroExportMapper.class;
    case PARQUET_FILE:
      return ParquetExportMapper.class;
    case UNKNOWN:
    default:
      return TextExportMapper.class;
  }
}

Source File: SchoolProficiencyMapper.java From secure-data-service with Apache License 2.0

6 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
protected void setup(Mapper.Context context) throws IOException, InterruptedException {
    super.setup(context);

    ConfigSections cfg = JobConfiguration.fromHadoopConfiguration(context.getConfiguration());
    MetadataConfig meta = cfg.getMetadata();
    bands = meta.getCutPoints();

    BSONObject obj = MongoConfigUtil.getFields(context.getConfiguration());
    if (obj != null) {
        fields = obj.keySet().toArray(new String[0]);
    } else {
        throw new IllegalArgumentException("Invalid configuration found. Aggregates must "
            + "specify a the hadoop.map.fields property.");
    }
}

Source File: TestMainframeImportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0

6 votes

@Test
public void testSuperMapperClass() throws SecurityException,
    NoSuchMethodException, IllegalArgumentException, IllegalAccessException,
    InvocationTargetException {
  String jarFile = "dummyJarFile";
  String tableName = "dummyTableName";
  Path path = new Path("dummyPath");
  options.setFileLayout(SqoopOptions.FileLayout.AvroDataFile);
  ImportJobContext context = new ImportJobContext(tableName, jarFile,
      options, path);
  avroImportJob = new MainframeImportJob(options, context);

  // To access protected method by means of reflection
  Class[] types = {};
  Method m_getMapperClass = MainframeImportJob.class.getDeclaredMethod(
      "getMapperClass", types);
  m_getMapperClass.setAccessible(true);
  Class<? extends Mapper> mapper = (Class<? extends Mapper>) m_getMapperClass
      .invoke(avroImportJob);
  assertEquals(mapper, org.apache.sqoop.mapreduce.AvroImportMapper.class);
}

Source File: FlowPartitionMapper.java From xxhadoop with Apache License 2.0

6 votes

@Override
protected void map(LongWritable key, Text value,
		Mapper<LongWritable, Text, Text, FlowBean>.Context context)
		throws IOException, InterruptedException {

	//super.map(key, value, context);
	line = value.toString();
	String[] fields = StringUtils.split(line, SEPARATOR);
	if (fields.length != 11) {
		LOGGER.error("invalid line: {}", line);
		System.err.println("invalid line: " + line);
	} else {
		phoneNum = fields[1];
		upFlow = Long.parseLong(fields[8]);
		downFlow = Long.parseLong(fields[9]);
		flowBean.setPhoneNum(phoneNum);
		flowBean.setUpFlow(upFlow);
		flowBean.setDownFlow(downFlow);
		//sumFlow = upFlow + downFlow;
		flowBean.setSumFlow(upFlow + downFlow);
		text.set(phoneNum);
		context.write(text, flowBean);
	}
	
}

Source File: IngestMapper.java From geowave with Apache License 2.0

6 votes

@Override
protected void setup(final org.apache.hadoop.mapreduce.Mapper.Context context)
    throws IOException, InterruptedException {
  super.setup(context);
  try {
    final String ingestWithMapperStr =
        context.getConfiguration().get(AbstractMapReduceIngest.INGEST_PLUGIN_KEY);
    final byte[] ingestWithMapperBytes = ByteArrayUtils.byteArrayFromString(ingestWithMapperStr);
    ingestWithMapper = (IngestWithMapper) PersistenceUtils.fromBinary(ingestWithMapperBytes);
    globalVisibility =
        context.getConfiguration().get(AbstractMapReduceIngest.GLOBAL_VISIBILITY_KEY);
    indexNames = AbstractMapReduceIngest.getIndexNames(context.getConfiguration());
  } catch (final Exception e) {
    throw new IllegalArgumentException(e);
  }
}

Source File: NNMapReduce.java From geowave with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
@Override
protected void setup(
    final Mapper<GeoWaveInputKey, Object, PartitionDataWritable, AdapterWithObjectWritable>.Context context)
    throws IOException, InterruptedException {
  super.setup(context);
  final ScopedJobConfiguration config =
      new ScopedJobConfiguration(context.getConfiguration(), NNMapReduce.class, LOGGER);
  serializationTool = new HadoopWritableSerializationTool(context);
  try {
    partitioner =
        config.getInstance(
            PartitionParameters.Partition.PARTITIONER_CLASS,
            Partitioner.class,
            OrthodromicDistancePartitioner.class);

    partitioner.initialize(context, NNMapReduce.class);
  } catch (final Exception e1) {
    throw new IOException(e1);
  }
}

Source File: VectorExportMapper.java From geowave with Apache License 2.0

6 votes

@Override
protected void map(
    final GeoWaveInputKey key,
    final SimpleFeature value,
    final Mapper<GeoWaveInputKey, SimpleFeature, AvroKey<AvroSimpleFeatureCollection>, NullWritable>.Context context)
    throws IOException, InterruptedException {
  AvroSFCWriter avroWriter = adapterIdToAvroWriterMap.get(key.getInternalAdapterId());
  if (avroWriter == null) {
    avroWriter = new AvroSFCWriter(value.getFeatureType(), batchSize);
    adapterIdToAvroWriterMap.put(key.getInternalAdapterId(), avroWriter);
  }
  final AvroSimpleFeatureCollection retVal = avroWriter.write(value);
  if (retVal != null) {
    outKey.datum(retVal);
    context.write(outKey, outVal);
  }
}

Source File: BWAAlnInstance.java From halvade with GNU General Public License v3.0

5 votes

static public BWAAlnInstance getBWAInstance(Mapper.Context context, String bin) throws IOException, InterruptedException, URISyntaxException {
    if(instance == null) {
        instance = new BWAAlnInstance(context, bin);
        instance.startAligner(context);
    }
    BWAAlnInstance.context = context;
    Logger.DEBUG("Started BWA");
    return instance;
}

Source File: TestCopyMapper.java From big-c with Apache License 2.0

5 votes

@Test(timeout=40000)
public void testMakeDirFailure() {
  try {
    deleteState();
    createSourceData();

    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
            = stubContext.getContext();

    Configuration configuration = context.getConfiguration();
    String workPath = new Path("hftp://localhost:1234/*/*/*/?/")
            .makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString();
    configuration.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH,
            workPath);
    copyMapper.setup(context);

    copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), pathList.get(0))),
            new CopyListingFileStatus(fs.getFileStatus(pathList.get(0))), context);

    Assert.assertTrue("There should have been an exception.", false);
  }
  catch (Exception ignore) {
  }
}

Source File: MapReduceBitcoinTransactionTest.java From hadoopcryptoledger with Apache License 2.0

5 votes

@Test
   public void map(@Mocked final Mapper.Context defaultContext) throws IOException,InterruptedException {
BitcoinTransactionMap mapper = new BitcoinTransactionMap();
final BytesWritable key = new BytesWritable();
final BitcoinTransaction value = new BitcoinTransaction(0,new byte[0], new ArrayList<BitcoinTransactionInput>(),new byte[0],new ArrayList<BitcoinTransactionOutput>(),0);
final Text defaultKey = new Text("Transaction Input Count:");
final IntWritable nullInt = new IntWritable(0);
new Expectations() {{
	defaultContext.write(defaultKey,nullInt); times=1;
}};
mapper.map(key,value,defaultContext);
   }

Source File: ImportJobBase.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

public ImportJobBase(final SqoopOptions opts,
    final Class<? extends Mapper> mapperClass,
    final Class<? extends InputFormat> inputFormatClass,
    final Class<? extends OutputFormat> outputFormatClass,
    final ImportJobContext context) {
  super(opts, mapperClass, inputFormatClass, outputFormatClass);
  this.context = context;
}

Source File: AbstractReasoningTool.java From rya with Apache License 2.0

5 votes

/**
 * Set up the MapReduce job to use an RDF file as an input.
 * @param rdfMapper class to use
 */
protected void configureRdfInput(Path inputPath,
        Class<? extends Mapper<LongWritable, RyaStatementWritable, ?, ?>> rdfMapper) {
    Configuration conf = job.getConfiguration();
    String format = conf.get(MRUtils.FORMAT_PROP, RDFFormat.RDFXML.getName());
    conf.set(MRUtils.FORMAT_PROP, format);
    MultipleInputs.addInputPath(job, inputPath,
        RdfFileInputFormat.class, rdfMapper);
}

Source File: DelegatingMapper.java From hadoop with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
protected void setup(Context context)
    throws IOException, InterruptedException {
  // Find the Mapper from the TaggedInputSplit.
  TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit();
  mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
     .getMapperClass(), context.getConfiguration());
  
}

Source File: MergeMapper.java From MapReduce-Demo with MIT License

5 votes

@Override
protected void setup(Mapper<NullWritable, BytesWritable, Text, BytesWritable>.Context context)
		throws IOException, InterruptedException {
	InputSplit split = context.getInputSplit();
	Path path = ((FileSplit)split).getPath();//???
	fileNameKey = new Text(path.toString());
}

Source File: MainframeImportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

@Override
protected Class<? extends Mapper> getMapperClass() {
  if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) {
    return MainframeDatasetImportMapper.class;
  } else {
    return super.getMapperClass();
  }
}

Source File: TestHFileOutputFormat2.java From hbase with Apache License 2.0

5 votes

@Override
protected void map(
        NullWritable n1, NullWritable n2,
        Mapper<NullWritable, NullWritable,
                ImmutableBytesWritable, Put>.Context context)
        throws java.io.IOException, InterruptedException {

  byte keyBytes[] = new byte[keyLength];
  byte valBytes[] = new byte[valLength];

  int taskId = context.getTaskAttemptID().getTaskID().getId();
  assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";

  Random random = new Random();
  byte[] key;
  for (int j = 0; j < tables.length; ++j) {
    for (int i = 0; i < ROWSPERSPLIT; i++) {
      random.nextBytes(keyBytes);
      // Ensure that unique tasks generate unique keys
      keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
      random.nextBytes(valBytes);
      key = keyBytes;
      if (multiTableMapper) {
        key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes);
      }

      for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
        Put p = new Put(keyBytes);
        p.addColumn(family, QUALIFIER, valBytes);
        // set TTL to very low so that the scan does not return any value
        p.setTTL(1l);
        context.write(new ImmutableBytesWritable(key), p);
      }
    }
  }
}

Source File: InMemCuboidFromBaseCuboidMapper.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Override
protected void doSetup(Mapper.Context context) throws IOException {
    super.doSetup(context);

    long baseCuboid = Cuboid.getBaseCuboidId(cubeDesc);
    GTInfo gtInfo = CubeGridTable.newGTInfo(Cuboid.findForMandatory(cubeDesc, baseCuboid),
            new CubeDimEncMap(cubeDesc, dictionaryMap));
    keyValueBuffer = ByteBuffer.allocate(gtInfo.getMaxRecordLength());
    keyOffset = cubeSegment.getRowKeyPreambleSize();
}

Source File: MultipleInputs.java From big-c with Apache License 2.0

5 votes

/**
 * Add a {@link Path} with a custom {@link InputFormat} and
 * {@link Mapper} to the list of inputs for the map-reduce job.
 * 
 * @param job The {@link Job}
 * @param path {@link Path} to be added to the list of inputs for the job
 * @param inputFormatClass {@link InputFormat} class to use for this path
 * @param mapperClass {@link Mapper} class to use for this path
 */
@SuppressWarnings("unchecked")
public static void addInputPath(Job job, Path path,
    Class<? extends InputFormat> inputFormatClass,
    Class<? extends Mapper> mapperClass) {

  addInputPath(job, path, inputFormatClass);
  Configuration conf = job.getConfiguration();
  String mapperMapping = path.toString() + ";" + mapperClass.getName();
  String mappers = conf.get(DIR_MAPPERS);
  conf.set(DIR_MAPPERS, mappers == null ? mapperMapping
     : mappers + "," + mapperMapping);

  job.setMapperClass(DelegatingMapper.class);
}

Source File: ImportJobBase.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

public ImportJobBase(final SqoopOptions opts,
    final Class<? extends Mapper> mapperClass,
    final Class<? extends InputFormat> inputFormatClass,
    final Class<? extends OutputFormat> outputFormatClass,
    final ImportJobContext context) {
  super(opts, mapperClass, inputFormatClass, outputFormatClass, context);
}

Source File: MultithreadedTableMapper.java From hbase with Apache License 2.0

5 votes

/**
 * Set the application's mapper class.
 * @param <K2> the map output key type
 * @param <V2> the map output value type
 * @param job the job to modify
 * @param cls the class to use as the mapper
 */
public static <K2,V2>
void setMapperClass(Job job,
    Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> cls) {
  if (MultithreadedTableMapper.class.isAssignableFrom(cls)) {
    throw new IllegalArgumentException("Can't have recursive " +
        "MultithreadedTableMapper instances.");
  }
  job.getConfiguration().setClass(MAPPER_CLASS,
      cls, Mapper.class);
}

Source File: InjectableConnManager.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

/**
 * Allow the user to inject custom mapper, input, and output formats
 * into the importTable() process.
 */
@Override
@SuppressWarnings("unchecked")
public void importTable(ImportJobContext context)
    throws IOException, ImportException {

  SqoopOptions options = context.getOptions();
  Configuration conf = options.getConf();

  Class<? extends Mapper> mapperClass = (Class<? extends Mapper>)
      conf.getClass(MAPPER_KEY, Mapper.class);
  Class<? extends InputFormat> ifClass = (Class<? extends InputFormat>)
      conf.getClass(INPUT_FORMAT_KEY, TextInputFormat.class);
  Class<? extends OutputFormat> ofClass = (Class<? extends OutputFormat>)
      conf.getClass(OUTPUT_FORMAT_KEY, TextOutputFormat.class);

  Class<? extends ImportJobBase> jobClass = (Class<? extends ImportJobBase>)
      conf.getClass(IMPORT_JOB_KEY, ImportJobBase.class);

  String tableName = context.getTableName();

  // Instantiate the user's chosen ImportJobBase instance.
  ImportJobBase importJob = ReflectionUtils.newInstance(jobClass, conf);

  // And configure the dependencies to inject
  importJob.setOptions(options);
  importJob.setMapperClass(mapperClass);
  importJob.setInputFormatClass(ifClass);
  importJob.setOutputFormatClass(ofClass);

  importJob.runImport(tableName, context.getJarFile(),
      getSplitColumn(options, tableName), conf);
}

Source File: UpdateCentroidCostMapReduce.java From geowave with Apache License 2.0

5 votes

@Override
protected void mapNativeValue(
    final GeoWaveInputKey key,
    final Object value,
    final Mapper<GeoWaveInputKey, ObjectWritable, GroupIDText, CountofDoubleWritable>.Context context)
    throws IOException, InterruptedException {
  final AnalyticItemWrapper<Object> wrappedItem = itemWrapperFactory.create(value);
  dw.set(
      nestedGroupCentroidAssigner.findCentroidForLevel(wrappedItem, centroidAssociationFn),
      1.0);

  context.write(outputWritable, dw);
}

Source File: JobContextImpl.java From big-c with Apache License 2.0

5 votes

/**
 * Get the {@link Mapper} class for the job.
 * 
 * @return the {@link Mapper} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Mapper<?,?,?,?>> getMapperClass() 
   throws ClassNotFoundException {
  return (Class<? extends Mapper<?,?,?,?>>) 
    conf.getClass(MAP_CLASS_ATTR, Mapper.class);
}

Source File: IntegrationTestBigLinkedList.java From hbase with Apache License 2.0

5 votes

public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMultiplier, Integer numWalkers)
    throws Exception {
  LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
      + ", numNodes=" + numNodes);
  Job job = Job.getInstance(getConf());

  job.setJobName("Random Input Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  job.setInputFormatClass(GeneratorInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);

  job.setMapperClass(Mapper.class); //identity mapper

  FileOutputFormat.setOutputPath(job, tmpOutput);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Random64.class);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}

Source File: TestLineRecordReaderJobs.java From hadoop with Apache License 2.0

5 votes

/**
 * Creates and runs an MR job
 *
 * @param conf
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void createAndRunJob(Configuration conf) throws IOException,
    InterruptedException, ClassNotFoundException {
  Job job = Job.getInstance(conf);
  job.setJarByClass(TestLineRecordReaderJobs.class);
  job.setMapperClass(Mapper.class);
  job.setReducerClass(Reducer.class);
  FileInputFormat.addInputPath(job, inputDir);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.waitForCompletion(true);
}

Source File: Cushaw2Instance.java From halvade with GNU General Public License v3.0

5 votes

static public Cushaw2Instance getCushaw2Instance(Mapper.Context context, String bin) throws IOException, InterruptedException, URISyntaxException {
    if(instance == null) {
        instance = new Cushaw2Instance(context, bin);
        instance.startAligner(context);
    }
    Cushaw2Instance.context = context;
    Logger.DEBUG("Started Cushaw2");
    return instance;
}

org.apache.hadoop.mapreduce.Mapper Java Examples