Java Code Examples for org.apache.cassandra.hadoop.ConfigHelper#getInputColumnFamily()
The following examples show how to use
org.apache.cassandra.hadoop.ConfigHelper#getInputColumnFamily() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0 | 5 votes |
private void validateConfiguration(Configuration conf) { if (ConfigHelper.getInputKeyspace(conf) == null || ConfigHelper.getInputColumnFamily(conf) == null) { throw new UnsupportedOperationException("you must set the keyspace and table with setInputColumnFamily()"); } if (ConfigHelper.getInputInitialAddress(conf) == null) { throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node with setInputInitialAddress"); } if (ConfigHelper.getInputPartitioner(conf) == null) { throw new UnsupportedOperationException("You must set the Cassandra partitioner class with setInputPartitioner"); } }
Example 2
Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0 | 4 votes |
public List<org.apache.hadoop.mapreduce.InputSplit> getSplits(JobContext context) throws IOException { Configuration conf = HadoopCompat.getConfiguration(context); validateConfiguration(conf); keyspace = ConfigHelper.getInputKeyspace(conf); cfName = ConfigHelper.getInputColumnFamily(conf); partitioner = ConfigHelper.getInputPartitioner(conf); LOG.trace("partitioner is {}", partitioner); // canonical ranges, split into pieces, fetching the splits in parallel ExecutorService executor = new ThreadPoolExecutor(0, 128, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>()); List<org.apache.hadoop.mapreduce.InputSplit> splits = new ArrayList<>(); try (CqlSession session = getInputSession(ConfigHelper.getInputInitialAddress(conf).split(","), conf)) { List<Future<List<org.apache.hadoop.mapreduce.InputSplit>>> splitfutures = new ArrayList<>(); KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf); Range<Token> jobRange = null; if (jobKeyRange != null) { if (jobKeyRange.start_key != null) { if (!partitioner.preservesOrder()) { throw new UnsupportedOperationException("KeyRange based on keys can only be used with a order preserving partitioner"); } if (jobKeyRange.start_token != null) { throw new IllegalArgumentException("only start_key supported"); } if (jobKeyRange.end_token != null) { throw new IllegalArgumentException("only start_key supported"); } jobRange = new Range<>(partitioner.getToken(jobKeyRange.start_key), partitioner.getToken(jobKeyRange.end_key)); } else if (jobKeyRange.start_token != null) { jobRange = new Range<>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token), partitioner.getTokenFactory().fromString(jobKeyRange.end_token)); } else { LOG.warn("ignoring jobKeyRange specified without start_key or start_token"); } } Metadata metadata = session.getMetadata(); // canonical ranges and nodes holding replicas Map<TokenRange, Set<Node>> masterRangeNodes = getRangeMap(keyspace, metadata); for (TokenRange range : masterRangeNodes.keySet()) { if (jobRange == null) { // for each tokenRange, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(range, masterRangeNodes.get(range), conf, session))); } else { TokenRange jobTokenRange = rangeToTokenRange(metadata, jobRange); if (range.intersects(jobTokenRange)) { for (TokenRange intersection : range.intersectWith(jobTokenRange)) { // for each tokenRange, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(intersection, masterRangeNodes.get(range), conf, session))); } } } } // wait until we have all the results back for (Future<List<org.apache.hadoop.mapreduce.InputSplit>> futureInputSplits : splitfutures) { try { splits.addAll(futureInputSplits.get()); } catch (Exception e) { throw new IOException("Could not get input splits", e); } } } finally { executor.shutdownNow(); } Collections.shuffle(splits, new Random(System.nanoTime())); return splits; }
Example 3
Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0 | 4 votes |
@Override public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = HadoopCompat.getConfiguration(context); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); keyspace = ConfigHelper.getInputKeyspace(conf); partitioner = ConfigHelper.getInputPartitioner(conf); inputColumns = conf.get(INPUT_CQL_COLUMNS_CONFIG); userDefinedWhereClauses = conf.get(INPUT_CQL_WHERE_CLAUSE_CONFIG); try { // create a Cluster instance String[] locations = split.getLocations(); session = getInputSession(locations, conf); } catch (Exception e) { throw new RuntimeException(e); } //get negotiated serialization protocol nativeProtocolVersion = session.getContext().getProtocolVersion().getCode(); // If the user provides a CQL query then we will use it without validation // otherwise we will fall back to building a query using the: // inputColumns // whereClauses cqlQuery = conf.get(INPUT_CQL); // validate that the user hasn't tried to give us a custom query along with input columns // and where clauses if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) { throw new AssertionError("Cannot define a custom query with input columns and / or where clauses"); } if (StringUtils.isEmpty(cqlQuery)) { cqlQuery = buildQuery(); } LOG.trace("cqlQuery {}", cqlQuery); rowIterator = new RowIterator(); LOG.trace("created {}", rowIterator); }
Example 4
Source File: CqlRecordReader.java From stratio-cassandra with Apache License 2.0 | 4 votes |
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = HadoopCompat.getConfiguration(context); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); keyspace = ConfigHelper.getInputKeyspace(conf); partitioner = ConfigHelper.getInputPartitioner(conf); inputColumns = CqlConfigHelper.getInputcolumns(conf); userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf); try { if (cluster != null) return; // create a Cluster instance String[] locations = split.getLocations(); cluster = CqlConfigHelper.getInputCluster(locations, conf); } catch (Exception e) { throw new RuntimeException(e); } if (cluster != null) session = cluster.connect(quote(keyspace)); if (session == null) throw new RuntimeException("Can't create connection session"); //get negotiated serialization protocol nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion(); // If the user provides a CQL query then we will use it without validation // otherwise we will fall back to building a query using the: // inputColumns // whereClauses cqlQuery = CqlConfigHelper.getInputCql(conf); // validate that the user hasn't tried to give us a custom query along with input columns // and where clauses if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) { throw new AssertionError("Cannot define a custom query with input columns and / or where clauses"); } if (StringUtils.isEmpty(cqlQuery)) cqlQuery = buildQuery(); logger.debug("cqlQuery {}", cqlQuery); rowIterator = new RowIterator(); logger.debug("created {}", rowIterator); }