org.apache.cassandra.hadoop.ColumnFamilySplit Java Examples
The following examples show how to use
org.apache.cassandra.hadoop.ColumnFamilySplit.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0 | 5 votes |
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { TaskAttemptContext tac = HadoopCompat.newTaskAttemptContext(jobConf, new TaskAttemptID()); List<org.apache.hadoop.mapreduce.InputSplit> newInputSplits = this.getSplits(tac); InputSplit[] oldInputSplits = new InputSplit[newInputSplits.size()]; for (int i = 0; i < newInputSplits.size(); i++) { oldInputSplits[i] = (ColumnFamilySplit) newInputSplits.get(i); } return oldInputSplits; }
Example #2
Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0 | 4 votes |
@Override public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = HadoopCompat.getConfiguration(context); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); keyspace = ConfigHelper.getInputKeyspace(conf); partitioner = ConfigHelper.getInputPartitioner(conf); inputColumns = conf.get(INPUT_CQL_COLUMNS_CONFIG); userDefinedWhereClauses = conf.get(INPUT_CQL_WHERE_CLAUSE_CONFIG); try { // create a Cluster instance String[] locations = split.getLocations(); session = getInputSession(locations, conf); } catch (Exception e) { throw new RuntimeException(e); } //get negotiated serialization protocol nativeProtocolVersion = session.getContext().getProtocolVersion().getCode(); // If the user provides a CQL query then we will use it without validation // otherwise we will fall back to building a query using the: // inputColumns // whereClauses cqlQuery = conf.get(INPUT_CQL); // validate that the user hasn't tried to give us a custom query along with input columns // and where clauses if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) { throw new AssertionError("Cannot define a custom query with input columns and / or where clauses"); } if (StringUtils.isEmpty(cqlQuery)) { cqlQuery = buildQuery(); } LOG.trace("cqlQuery {}", cqlQuery); rowIterator = new RowIterator(); LOG.trace("created {}", rowIterator); }
Example #3
Source File: CqlRecordReader.java From stratio-cassandra with Apache License 2.0 | 4 votes |
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = HadoopCompat.getConfiguration(context); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); keyspace = ConfigHelper.getInputKeyspace(conf); partitioner = ConfigHelper.getInputPartitioner(conf); inputColumns = CqlConfigHelper.getInputcolumns(conf); userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf); try { if (cluster != null) return; // create a Cluster instance String[] locations = split.getLocations(); cluster = CqlConfigHelper.getInputCluster(locations, conf); } catch (Exception e) { throw new RuntimeException(e); } if (cluster != null) session = cluster.connect(quote(keyspace)); if (session == null) throw new RuntimeException("Can't create connection session"); //get negotiated serialization protocol nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion(); // If the user provides a CQL query then we will use it without validation // otherwise we will fall back to building a query using the: // inputColumns // whereClauses cqlQuery = CqlConfigHelper.getInputCql(conf); // validate that the user hasn't tried to give us a custom query along with input columns // and where clauses if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) { throw new AssertionError("Cannot define a custom query with input columns and / or where clauses"); } if (StringUtils.isEmpty(cqlQuery)) cqlQuery = buildQuery(); logger.debug("cqlQuery {}", cqlQuery); rowIterator = new RowIterator(); logger.debug("created {}", rowIterator); }
Example #4
Source File: HiveCassandraStandardSplit.java From Hive-Cassandra with Apache License 2.0 | 4 votes |
public HiveCassandraStandardSplit() { super((Path) null, 0, 0, (String[]) null); columnMapping = ""; split = new ColumnFamilySplit(null,null,null); }
Example #5
Source File: HiveCassandraStandardSplit.java From Hive-Cassandra with Apache License 2.0 | 4 votes |
public HiveCassandraStandardSplit(ColumnFamilySplit split, String columnsMapping, Path dummyPath) { super(dummyPath, 0, 0, (String[]) null); this.split = split; columnMapping = columnsMapping; }
Example #6
Source File: HiveCassandraStandardSplit.java From Hive-Cassandra with Apache License 2.0 | 4 votes |
public ColumnFamilySplit getSplit() { return split; }
Example #7
Source File: HiveCassandraStandardColumnInputFormat.java From Hive-Cassandra with Apache License 2.0 | 4 votes |
@Override public RecordReader<BytesWritable, MapWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split; List<String> columns = AbstractColumnSerDe.parseColumnMapping(cassandraSplit.getColumnMapping()); isTransposed = AbstractColumnSerDe.isTransposed(columns); List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (columns.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); } org.apache.cassandra.hadoop.ColumnFamilySplit cfSplit = cassandraSplit.getSplit(); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; SlicePredicate predicate = new SlicePredicate(); if (isTransposed || readColIDs.size() == columns.size() || readColIDs.size() == 0) { SliceRange range = new SliceRange(); AbstractType comparator = BytesType.instance; String comparatorType = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR); if (comparatorType != null && !comparatorType.equals("")) { try { comparator = TypeParser.parse(comparatorType); } catch (Exception ex) { throw new IOException("Comparator class not found."); } } String sliceStart = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START); String sliceEnd = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH); String reversed = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED); range.setStart(comparator.fromString(sliceStart == null ? "" : sliceStart)); range.setFinish(comparator.fromString(sliceEnd == null ? "" : sliceEnd)); range.setReversed(reversed == null ? false : reversed.equals("true")); range.setCount(cassandraSplit.getSlicePredicateSize()); predicate.setSlice_range(range); } else { int iKey = columns.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN); predicate.setColumn_names(getColumnNames(iKey, columns, readColIDs)); } try { ConfigHelper.setInputColumnFamily(tac.getConfiguration(), cassandraSplit.getKeyspace(), cassandraSplit.getColumnFamily()); ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate); ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize()); ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + ""); ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost()); ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner()); // Set Split Size ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize()); CassandraHiveRecordReader rr = null; if(isTransposed && tac.getConfiguration().getBoolean(AbstractColumnSerDe.CASSANDRA_ENABLE_WIDEROW_ITERATOR, true)) { rr = new CassandraHiveRecordReader(new ColumnFamilyWideRowRecordReader(), isTransposed); } else { rr = new CassandraHiveRecordReader(new ColumnFamilyRecordReader(), isTransposed); } rr.initialize(cfSplit, tac); return rr; } catch (Exception ie) { throw new IOException(ie); } }
Example #8
Source File: HiveCassandraStandardColumnInputFormat.java From Hive-Cassandra with Apache License 2.0 | 4 votes |
@Override public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { String ks = jobConf.get(AbstractColumnSerDe.CASSANDRA_KEYSPACE_NAME); String cf = jobConf.get(AbstractColumnSerDe.CASSANDRA_CF_NAME); int slicePredicateSize = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_SIZE, AbstractColumnSerDe.DEFAULT_SLICE_PREDICATE_SIZE); int sliceRangeSize = jobConf.getInt( AbstractColumnSerDe.CASSANDRA_RANGE_BATCH_SIZE, AbstractColumnSerDe.DEFAULT_RANGE_BATCH_SIZE); int splitSize = jobConf.getInt( AbstractColumnSerDe.CASSANDRA_SPLIT_SIZE, AbstractColumnSerDe.DEFAULT_SPLIT_SIZE); String cassandraColumnMapping = jobConf.get(AbstractColumnSerDe.CASSANDRA_COL_MAPPING); int rpcPort = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_PORT, 9160); String host = jobConf.get(AbstractColumnSerDe.CASSANDRA_HOST); String partitioner = jobConf.get(AbstractColumnSerDe.CASSANDRA_PARTITIONER); if (cassandraColumnMapping == null) { throw new IOException("cassandra.columns.mapping required for Cassandra Table."); } SliceRange range = new SliceRange(); range.setStart(new byte[0]); range.setFinish(new byte[0]); range.setReversed(false); range.setCount(slicePredicateSize); SlicePredicate predicate = new SlicePredicate(); predicate.setSlice_range(range); ConfigHelper.setInputRpcPort(jobConf, "" + rpcPort); ConfigHelper.setInputInitialAddress(jobConf, host); ConfigHelper.setInputPartitioner(jobConf, partitioner); ConfigHelper.setInputSlicePredicate(jobConf, predicate); ConfigHelper.setInputColumnFamily(jobConf, ks, cf); ConfigHelper.setRangeBatchSize(jobConf, sliceRangeSize); ConfigHelper.setInputSplitSize(jobConf, splitSize); Job job = new Job(jobConf); JobContext jobContext = new JobContext(job.getConfiguration(), job.getJobID()); Path[] tablePaths = FileInputFormat.getInputPaths(jobContext); List<org.apache.hadoop.mapreduce.InputSplit> splits = getSplits(jobContext); InputSplit[] results = new InputSplit[splits.size()]; for (int i = 0; i < splits.size(); ++i) { HiveCassandraStandardSplit csplit = new HiveCassandraStandardSplit( (ColumnFamilySplit) splits.get(i), cassandraColumnMapping, tablePaths[0]); csplit.setKeyspace(ks); csplit.setColumnFamily(cf); csplit.setRangeBatchSize(sliceRangeSize); csplit.setSplitSize(splitSize); csplit.setHost(host); csplit.setPort(rpcPort); csplit.setSlicePredicateSize(slicePredicateSize); csplit.setPartitioner(partitioner); csplit.setColumnMapping(cassandraColumnMapping); results[i] = csplit; } return results; }