org.apache.cassandra.hadoop.ColumnFamilyInputFormat Java Examples
The following examples show how to use
org.apache.cassandra.hadoop.ColumnFamilyInputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WordCountCounters.java From stratio-cassandra with Apache License 2.0 | 5 votes |
public int run(String[] args) throws Exception { Job job = new Job(getConf(), "wordcountcounters"); job.setJarByClass(WordCountCounters.class); job.setMapperClass(SumMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX)); job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.Murmur3Partitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCountCounters.COUNTER_COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate().setSlice_range( new SliceRange(). setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER). setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER). setCount(100)); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true); return 0; }
Example #2
Source File: WordCount.java From stratio-cassandra with Apache License 2.0 | 4 votes |
public int run(String[] args) throws Exception { String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1]; } logger.info("output reducer type: " + outputReducerType); // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better ConfigHelper.setRangeBatchSize(getConf(), 99); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "text" + i; Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setCombinerClass(ReducerToFilesystem.class); job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); if (i == 4) { IndexExpression expr = new IndexExpression(ByteBufferUtil.bytes("int4"), IndexOperator.EQ, ByteBufferUtil.bytes(0)); ConfigHelper.setInputRange(job.getConfiguration(), Arrays.asList(expr)); } if (i == 5) { // this will cause the predicate to be ignored in favor of scanning everything as a wide row ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true); } ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); job.waitForCompletion(true); } return 0; }