Java Code Examples for org.apache.cassandra.hadoop.ConfigHelper#getInputColumnFamily()

The following examples show how to use org.apache.cassandra.hadoop.ConfigHelper#getInputColumnFamily() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

5 votes

private void validateConfiguration(Configuration conf) {
    if (ConfigHelper.getInputKeyspace(conf) == null || ConfigHelper.getInputColumnFamily(conf) == null) {
        throw new UnsupportedOperationException("you must set the keyspace and table with setInputColumnFamily()");
    }
    if (ConfigHelper.getInputInitialAddress(conf) == null) {
        throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node with setInputInitialAddress");
    }
    if (ConfigHelper.getInputPartitioner(conf) == null) {
        throw new UnsupportedOperationException("You must set the Cassandra partitioner class with setInputPartitioner");
    }
}

Example 2

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

4 votes

public List<org.apache.hadoop.mapreduce.InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = HadoopCompat.getConfiguration(context);

    validateConfiguration(conf);

    keyspace = ConfigHelper.getInputKeyspace(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    LOG.trace("partitioner is {}", partitioner);

    // canonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = new ThreadPoolExecutor(0, 128, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
    List<org.apache.hadoop.mapreduce.InputSplit> splits = new ArrayList<>();

    try (CqlSession session = getInputSession(ConfigHelper.getInputInitialAddress(conf).split(","), conf)) {
        List<Future<List<org.apache.hadoop.mapreduce.InputSplit>>> splitfutures = new ArrayList<>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null) {
            if (jobKeyRange.start_key != null) {
                if (!partitioner.preservesOrder()) {
                    throw new UnsupportedOperationException("KeyRange based on keys can only be used with a order preserving partitioner");
                }
                if (jobKeyRange.start_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                if (jobKeyRange.end_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                jobRange = new Range<>(partitioner.getToken(jobKeyRange.start_key),
                        partitioner.getToken(jobKeyRange.end_key));
            } else if (jobKeyRange.start_token != null) {
                jobRange = new Range<>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                        partitioner.getTokenFactory().fromString(jobKeyRange.end_token));
            } else {
                LOG.warn("ignoring jobKeyRange specified without start_key or start_token");
            }
        }

        Metadata metadata = session.getMetadata();

        // canonical ranges and nodes holding replicas
        Map<TokenRange, Set<Node>> masterRangeNodes = getRangeMap(keyspace, metadata);

        for (TokenRange range : masterRangeNodes.keySet()) {
            if (jobRange == null) {
                // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, masterRangeNodes.get(range), conf, session)));
            } else {
                TokenRange jobTokenRange = rangeToTokenRange(metadata, jobRange);
                if (range.intersects(jobTokenRange)) {
                    for (TokenRange intersection : range.intersectWith(jobTokenRange)) {
                        // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(intersection, masterRangeNodes.get(range), conf, session)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<org.apache.hadoop.mapreduce.InputSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

Example 3

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

4 votes

@Override
public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
            ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = conf.get(INPUT_CQL_COLUMNS_CONFIG);
    userDefinedWhereClauses = conf.get(INPUT_CQL_WHERE_CLAUSE_CONFIG);

    try {

        // create a Cluster instance
        String[] locations = split.getLocations();
        session = getInputSession(locations, conf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    //get negotiated serialization protocol
    nativeProtocolVersion = session.getContext().getProtocolVersion().getCode();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = conf.get(INPUT_CQL);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    LOG.trace("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    LOG.trace("created {}", rowIterator);
}

Example 4

Source File: CqlRecordReader.java From stratio-cassandra with Apache License 2.0

4 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException
{
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
                  ? (int) this.split.getLength()
                  : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try
    {
        if (cluster != null)
            return;

        // create a Cluster instance
        String[] locations = split.getLocations();
        cluster = CqlConfigHelper.getInputCluster(locations, conf);
    }
    catch (Exception e)
    {
        throw new RuntimeException(e);
    }

    if (cluster != null)
        session = cluster.connect(quote(keyspace));

    if (session == null)
      throw new RuntimeException("Can't create connection session");

    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) ||
                                             StringUtils.isNotEmpty(userDefinedWhereClauses)))
    {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery))
        cqlQuery = buildQuery();
    logger.debug("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    logger.debug("created {}", rowIterator);
}