org.apache.hadoop.mapreduce.Mapper Java Examples
The following examples show how to use
org.apache.hadoop.mapreduce.Mapper.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractReasoningTool.java From rya with Apache License 2.0 | 6 votes |
/** * Set up the MapReduce job to use as inputs both an Accumulo table and the * files containing previously derived information. Looks for a file for * every iteration number so far, preferring final cleaned up output from * that iteration but falling back on intermediate data if necessary. * @param tableMapper Mapper class to use for database input * @param rdfMapper Mapper class to use for direct RDF input * @param fileMapper Mapper class to use for derived triples input * @param incMapper Mapper class to use for derived inconsistencies input * @param filter True to exclude previously derived data that couldn't be * used to derive anything new at this point. */ protected void configureMultipleInput( Class<? extends Mapper<Key, Value, ?, ?>> tableMapper, Class<? extends Mapper<LongWritable, RyaStatementWritable, ?, ?>> rdfMapper, Class<? extends Mapper<Fact, NullWritable, ?, ?>> fileMapper, Class<? extends Mapper<Derivation, NullWritable, ?, ?>> incMapper, boolean filter) throws IOException, AccumuloSecurityException { Path inputPath = MRReasoningUtils.getInputPath(job.getConfiguration()); if (inputPath != null) { configureRdfInput(inputPath, rdfMapper); } else { configureAccumuloInput(tableMapper); } configureFileInput(fileMapper, incMapper, filter); }
Example #2
Source File: JobConfiguration.java From secure-data-service with Apache License 2.0 | 6 votes |
@SuppressWarnings("rawtypes") public static Class<? extends Mapper> getMapClass(mapper m) { Class<? extends Mapper> rval = null; switch(m) { case IDMapper: rval = tmp.getClass(); break; case StringMapper: rval = org.slc.sli.aggregation.mapreduce.map.StringValueMapper.class; break; case LongMapper: rval = org.slc.sli.aggregation.mapreduce.map.LongValueMapper.class; break; case DoubleMapper: rval = org.slc.sli.aggregation.mapreduce.map.DoubleValueMapper.class; break; case EnumMapper: rval = org.slc.sli.aggregation.mapreduce.map.EnumValueMapper.class; break; } return rval; }
Example #3
Source File: TestCopyMapper.java From hadoop with Apache License 2.0 | 6 votes |
private void testCopyingExistingFiles(FileSystem fs, CopyMapper copyMapper, Mapper<Text, CopyListingFileStatus, Text, Text>.Context context) { try { for (Path path : pathList) { copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), new CopyListingFileStatus(fs.getFileStatus(path)), context); } Assert.assertEquals(nFiles, context.getCounter(CopyMapper.Counter.SKIP).getValue()); } catch (Exception exception) { Assert.assertTrue("Caught unexpected exception:" + exception.getMessage(), false); } }
Example #4
Source File: TestChainErrors.java From hadoop with Apache License 2.0 | 6 votes |
/** * Tests one of the mappers throwing exception. * * @throws Exception */ public void testChainFail() throws Exception { Configuration conf = createJobConf(); Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input); job.setJobName("chain"); ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); ChainMapper.addMapper(job, FailMap.class, LongWritable.class, Text.class, IntWritable.class, Text.class, null); ChainMapper.addMapper(job, Mapper.class, IntWritable.class, Text.class, LongWritable.class, Text.class, null); job.waitForCompletion(true); assertTrue("Job Not failed", !job.isSuccessful()); }
Example #5
Source File: JdbcExportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
@Override protected Class<? extends Mapper> getMapperClass() { if (isHCatJob) { return SqoopHCatUtilities.getExportMapperClass(); } if (options.getOdpsTable() != null) { return OdpsExportMapper.class; } switch (fileType) { case SEQUENCE_FILE: return SequenceFileExportMapper.class; case AVRO_DATA_FILE: return AvroExportMapper.class; case PARQUET_FILE: return ParquetExportMapper.class; case UNKNOWN: default: return TextExportMapper.class; } }
Example #6
Source File: SchoolProficiencyMapper.java From secure-data-service with Apache License 2.0 | 6 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Override protected void setup(Mapper.Context context) throws IOException, InterruptedException { super.setup(context); ConfigSections cfg = JobConfiguration.fromHadoopConfiguration(context.getConfiguration()); MetadataConfig meta = cfg.getMetadata(); bands = meta.getCutPoints(); BSONObject obj = MongoConfigUtil.getFields(context.getConfiguration()); if (obj != null) { fields = obj.keySet().toArray(new String[0]); } else { throw new IllegalArgumentException("Invalid configuration found. Aggregates must " + "specify a the hadoop.map.fields property."); } }
Example #7
Source File: TestMainframeImportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
@Test public void testSuperMapperClass() throws SecurityException, NoSuchMethodException, IllegalArgumentException, IllegalAccessException, InvocationTargetException { String jarFile = "dummyJarFile"; String tableName = "dummyTableName"; Path path = new Path("dummyPath"); options.setFileLayout(SqoopOptions.FileLayout.AvroDataFile); ImportJobContext context = new ImportJobContext(tableName, jarFile, options, path); avroImportJob = new MainframeImportJob(options, context); // To access protected method by means of reflection Class[] types = {}; Method m_getMapperClass = MainframeImportJob.class.getDeclaredMethod( "getMapperClass", types); m_getMapperClass.setAccessible(true); Class<? extends Mapper> mapper = (Class<? extends Mapper>) m_getMapperClass .invoke(avroImportJob); assertEquals(mapper, org.apache.sqoop.mapreduce.AvroImportMapper.class); }
Example #8
Source File: FlowPartitionMapper.java From xxhadoop with Apache License 2.0 | 6 votes |
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context) throws IOException, InterruptedException { //super.map(key, value, context); line = value.toString(); String[] fields = StringUtils.split(line, SEPARATOR); if (fields.length != 11) { LOGGER.error("invalid line: {}", line); System.err.println("invalid line: " + line); } else { phoneNum = fields[1]; upFlow = Long.parseLong(fields[8]); downFlow = Long.parseLong(fields[9]); flowBean.setPhoneNum(phoneNum); flowBean.setUpFlow(upFlow); flowBean.setDownFlow(downFlow); //sumFlow = upFlow + downFlow; flowBean.setSumFlow(upFlow + downFlow); text.set(phoneNum); context.write(text, flowBean); } }
Example #9
Source File: IngestMapper.java From geowave with Apache License 2.0 | 6 votes |
@Override protected void setup(final org.apache.hadoop.mapreduce.Mapper.Context context) throws IOException, InterruptedException { super.setup(context); try { final String ingestWithMapperStr = context.getConfiguration().get(AbstractMapReduceIngest.INGEST_PLUGIN_KEY); final byte[] ingestWithMapperBytes = ByteArrayUtils.byteArrayFromString(ingestWithMapperStr); ingestWithMapper = (IngestWithMapper) PersistenceUtils.fromBinary(ingestWithMapperBytes); globalVisibility = context.getConfiguration().get(AbstractMapReduceIngest.GLOBAL_VISIBILITY_KEY); indexNames = AbstractMapReduceIngest.getIndexNames(context.getConfiguration()); } catch (final Exception e) { throw new IllegalArgumentException(e); } }
Example #10
Source File: NNMapReduce.java From geowave with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") @Override protected void setup( final Mapper<GeoWaveInputKey, Object, PartitionDataWritable, AdapterWithObjectWritable>.Context context) throws IOException, InterruptedException { super.setup(context); final ScopedJobConfiguration config = new ScopedJobConfiguration(context.getConfiguration(), NNMapReduce.class, LOGGER); serializationTool = new HadoopWritableSerializationTool(context); try { partitioner = config.getInstance( PartitionParameters.Partition.PARTITIONER_CLASS, Partitioner.class, OrthodromicDistancePartitioner.class); partitioner.initialize(context, NNMapReduce.class); } catch (final Exception e1) { throw new IOException(e1); } }
Example #11
Source File: VectorExportMapper.java From geowave with Apache License 2.0 | 6 votes |
@Override protected void map( final GeoWaveInputKey key, final SimpleFeature value, final Mapper<GeoWaveInputKey, SimpleFeature, AvroKey<AvroSimpleFeatureCollection>, NullWritable>.Context context) throws IOException, InterruptedException { AvroSFCWriter avroWriter = adapterIdToAvroWriterMap.get(key.getInternalAdapterId()); if (avroWriter == null) { avroWriter = new AvroSFCWriter(value.getFeatureType(), batchSize); adapterIdToAvroWriterMap.put(key.getInternalAdapterId(), avroWriter); } final AvroSimpleFeatureCollection retVal = avroWriter.write(value); if (retVal != null) { outKey.datum(retVal); context.write(outKey, outVal); } }
Example #12
Source File: BWAAlnInstance.java From halvade with GNU General Public License v3.0 | 5 votes |
static public BWAAlnInstance getBWAInstance(Mapper.Context context, String bin) throws IOException, InterruptedException, URISyntaxException { if(instance == null) { instance = new BWAAlnInstance(context, bin); instance.startAligner(context); } BWAAlnInstance.context = context; Logger.DEBUG("Started BWA"); return instance; }
Example #13
Source File: TestCopyMapper.java From big-c with Apache License 2.0 | 5 votes |
@Test(timeout=40000) public void testMakeDirFailure() { try { deleteState(); createSourceData(); FileSystem fs = cluster.getFileSystem(); CopyMapper copyMapper = new CopyMapper(); StubContext stubContext = new StubContext(getConfiguration(), null, 0); Mapper<Text, CopyListingFileStatus, Text, Text>.Context context = stubContext.getContext(); Configuration configuration = context.getConfiguration(); String workPath = new Path("hftp://localhost:1234/*/*/*/?/") .makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString(); configuration.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath); copyMapper.setup(context); copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), pathList.get(0))), new CopyListingFileStatus(fs.getFileStatus(pathList.get(0))), context); Assert.assertTrue("There should have been an exception.", false); } catch (Exception ignore) { } }
Example #14
Source File: MapReduceBitcoinTransactionTest.java From hadoopcryptoledger with Apache License 2.0 | 5 votes |
@Test public void map(@Mocked final Mapper.Context defaultContext) throws IOException,InterruptedException { BitcoinTransactionMap mapper = new BitcoinTransactionMap(); final BytesWritable key = new BytesWritable(); final BitcoinTransaction value = new BitcoinTransaction(0,new byte[0], new ArrayList<BitcoinTransactionInput>(),new byte[0],new ArrayList<BitcoinTransactionOutput>(),0); final Text defaultKey = new Text("Transaction Input Count:"); final IntWritable nullInt = new IntWritable(0); new Expectations() {{ defaultContext.write(defaultKey,nullInt); times=1; }}; mapper.map(key,value,defaultContext); }
Example #15
Source File: ImportJobBase.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
public ImportJobBase(final SqoopOptions opts, final Class<? extends Mapper> mapperClass, final Class<? extends InputFormat> inputFormatClass, final Class<? extends OutputFormat> outputFormatClass, final ImportJobContext context) { super(opts, mapperClass, inputFormatClass, outputFormatClass); this.context = context; }
Example #16
Source File: AbstractReasoningTool.java From rya with Apache License 2.0 | 5 votes |
/** * Set up the MapReduce job to use an RDF file as an input. * @param rdfMapper class to use */ protected void configureRdfInput(Path inputPath, Class<? extends Mapper<LongWritable, RyaStatementWritable, ?, ?>> rdfMapper) { Configuration conf = job.getConfiguration(); String format = conf.get(MRUtils.FORMAT_PROP, RDFFormat.RDFXML.getName()); conf.set(MRUtils.FORMAT_PROP, format); MultipleInputs.addInputPath(job, inputPath, RdfFileInputFormat.class, rdfMapper); }
Example #17
Source File: DelegatingMapper.java From hadoop with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") protected void setup(Context context) throws IOException, InterruptedException { // Find the Mapper from the TaggedInputSplit. TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit(); mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit .getMapperClass(), context.getConfiguration()); }
Example #18
Source File: MergeMapper.java From MapReduce-Demo with MIT License | 5 votes |
@Override protected void setup(Mapper<NullWritable, BytesWritable, Text, BytesWritable>.Context context) throws IOException, InterruptedException { InputSplit split = context.getInputSplit(); Path path = ((FileSplit)split).getPath();//??? fileNameKey = new Text(path.toString()); }
Example #19
Source File: MainframeImportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override protected Class<? extends Mapper> getMapperClass() { if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) { return MainframeDatasetImportMapper.class; } else { return super.getMapperClass(); } }
Example #20
Source File: TestHFileOutputFormat2.java From hbase with Apache License 2.0 | 5 votes |
@Override protected void map( NullWritable n1, NullWritable n2, Mapper<NullWritable, NullWritable, ImmutableBytesWritable, Put>.Context context) throws java.io.IOException, InterruptedException { byte keyBytes[] = new byte[keyLength]; byte valBytes[] = new byte[valLength]; int taskId = context.getTaskAttemptID().getTaskID().getId(); assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; Random random = new Random(); byte[] key; for (int j = 0; j < tables.length; ++j) { for (int i = 0; i < ROWSPERSPLIT; i++) { random.nextBytes(keyBytes); // Ensure that unique tasks generate unique keys keyBytes[keyLength - 1] = (byte) (taskId & 0xFF); random.nextBytes(valBytes); key = keyBytes; if (multiTableMapper) { key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes); } for (byte[] family : TestHFileOutputFormat2.FAMILIES) { Put p = new Put(keyBytes); p.addColumn(family, QUALIFIER, valBytes); // set TTL to very low so that the scan does not return any value p.setTTL(1l); context.write(new ImmutableBytesWritable(key), p); } } } }
Example #21
Source File: InMemCuboidFromBaseCuboidMapper.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Override protected void doSetup(Mapper.Context context) throws IOException { super.doSetup(context); long baseCuboid = Cuboid.getBaseCuboidId(cubeDesc); GTInfo gtInfo = CubeGridTable.newGTInfo(Cuboid.findForMandatory(cubeDesc, baseCuboid), new CubeDimEncMap(cubeDesc, dictionaryMap)); keyValueBuffer = ByteBuffer.allocate(gtInfo.getMaxRecordLength()); keyOffset = cubeSegment.getRowKeyPreambleSize(); }
Example #22
Source File: MultipleInputs.java From big-c with Apache License 2.0 | 5 votes |
/** * Add a {@link Path} with a custom {@link InputFormat} and * {@link Mapper} to the list of inputs for the map-reduce job. * * @param job The {@link Job} * @param path {@link Path} to be added to the list of inputs for the job * @param inputFormatClass {@link InputFormat} class to use for this path * @param mapperClass {@link Mapper} class to use for this path */ @SuppressWarnings("unchecked") public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass) { addInputPath(job, path, inputFormatClass); Configuration conf = job.getConfiguration(); String mapperMapping = path.toString() + ";" + mapperClass.getName(); String mappers = conf.get(DIR_MAPPERS); conf.set(DIR_MAPPERS, mappers == null ? mapperMapping : mappers + "," + mapperMapping); job.setMapperClass(DelegatingMapper.class); }
Example #23
Source File: ImportJobBase.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
public ImportJobBase(final SqoopOptions opts, final Class<? extends Mapper> mapperClass, final Class<? extends InputFormat> inputFormatClass, final Class<? extends OutputFormat> outputFormatClass, final ImportJobContext context) { super(opts, mapperClass, inputFormatClass, outputFormatClass, context); }
Example #24
Source File: MultithreadedTableMapper.java From hbase with Apache License 2.0 | 5 votes |
/** * Set the application's mapper class. * @param <K2> the map output key type * @param <V2> the map output value type * @param job the job to modify * @param cls the class to use as the mapper */ public static <K2,V2> void setMapperClass(Job job, Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> cls) { if (MultithreadedTableMapper.class.isAssignableFrom(cls)) { throw new IllegalArgumentException("Can't have recursive " + "MultithreadedTableMapper instances."); } job.getConfiguration().setClass(MAPPER_CLASS, cls, Mapper.class); }
Example #25
Source File: InjectableConnManager.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
/** * Allow the user to inject custom mapper, input, and output formats * into the importTable() process. */ @Override @SuppressWarnings("unchecked") public void importTable(ImportJobContext context) throws IOException, ImportException { SqoopOptions options = context.getOptions(); Configuration conf = options.getConf(); Class<? extends Mapper> mapperClass = (Class<? extends Mapper>) conf.getClass(MAPPER_KEY, Mapper.class); Class<? extends InputFormat> ifClass = (Class<? extends InputFormat>) conf.getClass(INPUT_FORMAT_KEY, TextInputFormat.class); Class<? extends OutputFormat> ofClass = (Class<? extends OutputFormat>) conf.getClass(OUTPUT_FORMAT_KEY, TextOutputFormat.class); Class<? extends ImportJobBase> jobClass = (Class<? extends ImportJobBase>) conf.getClass(IMPORT_JOB_KEY, ImportJobBase.class); String tableName = context.getTableName(); // Instantiate the user's chosen ImportJobBase instance. ImportJobBase importJob = ReflectionUtils.newInstance(jobClass, conf); // And configure the dependencies to inject importJob.setOptions(options); importJob.setMapperClass(mapperClass); importJob.setInputFormatClass(ifClass); importJob.setOutputFormatClass(ofClass); importJob.runImport(tableName, context.getJarFile(), getSplitColumn(options, tableName), conf); }
Example #26
Source File: UpdateCentroidCostMapReduce.java From geowave with Apache License 2.0 | 5 votes |
@Override protected void mapNativeValue( final GeoWaveInputKey key, final Object value, final Mapper<GeoWaveInputKey, ObjectWritable, GroupIDText, CountofDoubleWritable>.Context context) throws IOException, InterruptedException { final AnalyticItemWrapper<Object> wrappedItem = itemWrapperFactory.create(value); dw.set( nestedGroupCentroidAssigner.findCentroidForLevel(wrappedItem, centroidAssociationFn), 1.0); context.write(outputWritable, dw); }
Example #27
Source File: JobContextImpl.java From big-c with Apache License 2.0 | 5 votes |
/** * Get the {@link Mapper} class for the job. * * @return the {@link Mapper} class for the job. */ @SuppressWarnings("unchecked") public Class<? extends Mapper<?,?,?,?>> getMapperClass() throws ClassNotFoundException { return (Class<? extends Mapper<?,?,?,?>>) conf.getClass(MAP_CLASS_ATTR, Mapper.class); }
Example #28
Source File: IntegrationTestBigLinkedList.java From hbase with Apache License 2.0 | 5 votes |
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput, Integer width, Integer wrapMultiplier, Integer numWalkers) throws Exception { LOG.info("Running RandomInputGenerator with numMappers=" + numMappers + ", numNodes=" + numNodes); Job job = Job.getInstance(getConf()); job.setJobName("Random Input Generator"); job.setNumReduceTasks(0); job.setJarByClass(getClass()); job.setInputFormatClass(GeneratorInputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(NullWritable.class); setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers); job.setMapperClass(Mapper.class); //identity mapper FileOutputFormat.setOutputPath(job, tmpOutput); job.setOutputFormatClass(SequenceFileOutputFormat.class); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Random64.class); boolean success = jobCompletion(job); return success ? 0 : 1; }
Example #29
Source File: TestLineRecordReaderJobs.java From hadoop with Apache License 2.0 | 5 votes |
/** * Creates and runs an MR job * * @param conf * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void createAndRunJob(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(conf); job.setJarByClass(TestLineRecordReaderJobs.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); FileInputFormat.addInputPath(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.waitForCompletion(true); }
Example #30
Source File: Cushaw2Instance.java From halvade with GNU General Public License v3.0 | 5 votes |
static public Cushaw2Instance getCushaw2Instance(Mapper.Context context, String bin) throws IOException, InterruptedException, URISyntaxException { if(instance == null) { instance = new Cushaw2Instance(context, bin); instance.startAligner(context); } Cushaw2Instance.context = context; Logger.DEBUG("Started Cushaw2"); return instance; }