org.apache.crunch.MapFn Java Examples
The following examples show how to use
org.apache.crunch.MapFn.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CassandraParams.java From hdfs2cass with Apache License 2.0 | 5 votes |
private static MapFn<CQLRecord, ByteBuffer> makeKeyFn(final int[] partitionKeyIndexes) { return new MapFn<CQLRecord, ByteBuffer>() { @Override public ByteBuffer map(final CQLRecord record) { return CassandraRecordUtils.getPartitionKey(record.getValues(), partitionKeyIndexes); } }; }
Example #2
Source File: CassandraParams.java From hdfs2cass with Apache License 2.0 | 4 votes |
/** * @return a map function to extract the partition key from a record */ public MapFn<CQLRecord, ByteBuffer> getKeyFn() { return makeKeyFn(clusterInfo.getPartitionKeyIndexes()); }
Example #3
Source File: TransformTask.java From kite with Apache License 2.0 | 4 votes |
public PipelineResult run() throws IOException { boolean isLocal = (isLocal(from.getDataset()) || isLocal(to.getDataset())); if (isLocal) { // copy to avoid making changes to the caller's configuration Configuration conf = new Configuration(getConf()); conf.set("mapreduce.framework.name", "local"); setConf(conf); } if (isHive(from) || isHive(to)) { setConf(addHiveDelegationToken(getConf())); // add jars needed for metastore interaction to the classpath if (!isLocal) { Class<?> fb303Class, thriftClass; try { // attempt to use libfb303 and libthrift 0.9.2 when async was added fb303Class = Class.forName( "com.facebook.fb303.FacebookService.AsyncProcessor"); thriftClass = Class.forName( "org.apache.thrift.TBaseAsyncProcessor"); } catch (ClassNotFoundException e) { try { // fallback to 0.9.0 or earlier fb303Class = Class.forName( "com.facebook.fb303.FacebookBase"); thriftClass = Class.forName( "org.apache.thrift.TBase"); } catch (ClassNotFoundException real) { throw new DatasetOperationException( "Cannot find thrift dependencies", real); } } TaskUtil.configure(getConf()) .addJarForClass(Encoder.class) // commons-codec .addJarForClass(Log.class) // commons-logging .addJarForClass(CompressorInputStream.class) // commons-compress .addJarForClass(ApiAdapter.class) // datanucleus-core .addJarForClass(JDOAdapter.class) // datanucleus-api-jdo .addJarForClass(SQLQuery.class) // datanucleus-rdbms .addJarForClass(JDOHelper.class) // jdo-api .addJarForClass(Transaction.class) // jta .addJarForClass(fb303Class) // libfb303 .addJarForClass(thriftClass) // libthrift .addJarForClass(HiveMetaStore.class) // hive-metastore .addJarForClass(HiveConf.class); // hive-exec } } PType<T> toPType = ptype(to); MapFn<T, T> validate = new CheckEntityClass<T>(to.getType()); Pipeline pipeline = new MRPipeline(getClass(), getConf()); PCollection<T> collection = pipeline.read(CrunchDatasets.asSource(from)) .parallelDo(transform, toPType).parallelDo(validate, toPType); if (compact) { // the transform must be run before partitioning collection = CrunchDatasets.partition(collection, to, numWriters, numPartitionWriters); } pipeline.write(collection, CrunchDatasets.asTarget(to), mode); PipelineResult result = pipeline.done(); StageResult sr = Iterables.getFirst(result.getStageResults(), null); if (sr != null && MAP_INPUT_RECORDS != null) { this.count = sr.getCounterValue(MAP_INPUT_RECORDS); } return result; }