org.apache.cassandra.hadoop.ColumnFamilySplit Java Exaples

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

5 votes

public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
    TaskAttemptContext tac = HadoopCompat.newTaskAttemptContext(jobConf, new TaskAttemptID());
    List<org.apache.hadoop.mapreduce.InputSplit> newInputSplits = this.getSplits(tac);
    InputSplit[] oldInputSplits = new InputSplit[newInputSplits.size()];
    for (int i = 0; i < newInputSplits.size(); i++) {
        oldInputSplits[i] = (ColumnFamilySplit) newInputSplits.get(i);
    }
    return oldInputSplits;
}

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

4 votes

@Override
public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
            ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = conf.get(INPUT_CQL_COLUMNS_CONFIG);
    userDefinedWhereClauses = conf.get(INPUT_CQL_WHERE_CLAUSE_CONFIG);

    try {

        // create a Cluster instance
        String[] locations = split.getLocations();
        session = getInputSession(locations, conf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    //get negotiated serialization protocol
    nativeProtocolVersion = session.getContext().getProtocolVersion().getCode();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = conf.get(INPUT_CQL);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    LOG.trace("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    LOG.trace("created {}", rowIterator);
}

Source File: CqlRecordReader.java From stratio-cassandra with Apache License 2.0

4 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException
{
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
                  ? (int) this.split.getLength()
                  : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try
    {
        if (cluster != null)
            return;

        // create a Cluster instance
        String[] locations = split.getLocations();
        cluster = CqlConfigHelper.getInputCluster(locations, conf);
    }
    catch (Exception e)
    {
        throw new RuntimeException(e);
    }

    if (cluster != null)
        session = cluster.connect(quote(keyspace));

    if (session == null)
      throw new RuntimeException("Can't create connection session");

    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) ||
                                             StringUtils.isNotEmpty(userDefinedWhereClauses)))
    {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery))
        cqlQuery = buildQuery();
    logger.debug("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    logger.debug("created {}", rowIterator);
}

Source File: HiveCassandraStandardSplit.java From Hive-Cassandra with Apache License 2.0

4 votes

public HiveCassandraStandardSplit() {
  super((Path) null, 0, 0, (String[]) null);
  columnMapping = "";
  split  = new ColumnFamilySplit(null,null,null);
}

Source File: HiveCassandraStandardSplit.java From Hive-Cassandra with Apache License 2.0

4 votes

public HiveCassandraStandardSplit(ColumnFamilySplit split, String columnsMapping, Path dummyPath) {
  super(dummyPath, 0, 0, (String[]) null);
  this.split = split;
  columnMapping = columnsMapping;
}

Source File: HiveCassandraStandardSplit.java From Hive-Cassandra with Apache License 2.0

4 votes

public ColumnFamilySplit getSplit() {
  return split;
}

Source File: HiveCassandraStandardColumnInputFormat.java From Hive-Cassandra with Apache License 2.0

4 votes

@Override
public RecordReader<BytesWritable, MapWritable> getRecordReader(InputSplit split,
    JobConf jobConf, final Reporter reporter) throws IOException {
  HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split;

  List<String> columns = AbstractColumnSerDe.parseColumnMapping(cassandraSplit.getColumnMapping());
  isTransposed = AbstractColumnSerDe.isTransposed(columns);


  List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

  if (columns.size() < readColIDs.size()) {
    throw new IOException("Cannot read more columns than the given table contains.");
  }

  org.apache.cassandra.hadoop.ColumnFamilySplit cfSplit = cassandraSplit.getSplit();
  Job job = new Job(jobConf);

  TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {
    @Override
    public void progress() {
      reporter.progress();
    }
  };

  SlicePredicate predicate = new SlicePredicate();

  if (isTransposed || readColIDs.size() == columns.size() || readColIDs.size() == 0) {
    SliceRange range = new SliceRange();
    AbstractType comparator = BytesType.instance;

    String comparatorType = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR);
    if (comparatorType != null && !comparatorType.equals("")) {
      try {
        comparator = TypeParser.parse(comparatorType);
      } catch (Exception ex) {
        throw new IOException("Comparator class not found.");
      }
    }

    String sliceStart = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START);
    String sliceEnd = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH);
    String reversed = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED);

    range.setStart(comparator.fromString(sliceStart == null ? "" : sliceStart));
    range.setFinish(comparator.fromString(sliceEnd == null ? "" : sliceEnd));
    range.setReversed(reversed == null ? false : reversed.equals("true"));
    range.setCount(cassandraSplit.getSlicePredicateSize());
    predicate.setSlice_range(range);
  } else {
    int iKey = columns.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN);
    predicate.setColumn_names(getColumnNames(iKey, columns, readColIDs));
  }


  try {
    ConfigHelper.setInputColumnFamily(tac.getConfiguration(),
        cassandraSplit.getKeyspace(), cassandraSplit.getColumnFamily());

    ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate);
    ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize());
    ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + "");
    ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost());
    ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner());
    // Set Split Size
    ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize());

    CassandraHiveRecordReader rr = null;

    if(isTransposed && tac.getConfiguration().getBoolean(AbstractColumnSerDe.CASSANDRA_ENABLE_WIDEROW_ITERATOR, true)) {
      rr = new CassandraHiveRecordReader(new ColumnFamilyWideRowRecordReader(), isTransposed);
    } else {
      rr = new CassandraHiveRecordReader(new ColumnFamilyRecordReader(), isTransposed);
    }
    rr.initialize(cfSplit, tac);

    return rr;

  } catch (Exception ie) {
    throw new IOException(ie);
  }
}

Source File: HiveCassandraStandardColumnInputFormat.java From Hive-Cassandra with Apache License 2.0

4 votes

@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
  String ks = jobConf.get(AbstractColumnSerDe.CASSANDRA_KEYSPACE_NAME);
  String cf = jobConf.get(AbstractColumnSerDe.CASSANDRA_CF_NAME);
  int slicePredicateSize = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_SIZE,
      AbstractColumnSerDe.DEFAULT_SLICE_PREDICATE_SIZE);
  int sliceRangeSize = jobConf.getInt(
      AbstractColumnSerDe.CASSANDRA_RANGE_BATCH_SIZE,
      AbstractColumnSerDe.DEFAULT_RANGE_BATCH_SIZE);
  int splitSize = jobConf.getInt(
      AbstractColumnSerDe.CASSANDRA_SPLIT_SIZE,
      AbstractColumnSerDe.DEFAULT_SPLIT_SIZE);
  String cassandraColumnMapping = jobConf.get(AbstractColumnSerDe.CASSANDRA_COL_MAPPING);
  int rpcPort = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_PORT, 9160);
  String host = jobConf.get(AbstractColumnSerDe.CASSANDRA_HOST);
  String partitioner = jobConf.get(AbstractColumnSerDe.CASSANDRA_PARTITIONER);

  if (cassandraColumnMapping == null) {
    throw new IOException("cassandra.columns.mapping required for Cassandra Table.");
  }

  SliceRange range = new SliceRange();
  range.setStart(new byte[0]);
  range.setFinish(new byte[0]);
  range.setReversed(false);
  range.setCount(slicePredicateSize);
  SlicePredicate predicate = new SlicePredicate();
  predicate.setSlice_range(range);

  ConfigHelper.setInputRpcPort(jobConf, "" + rpcPort);
  ConfigHelper.setInputInitialAddress(jobConf, host);
  ConfigHelper.setInputPartitioner(jobConf, partitioner);
  ConfigHelper.setInputSlicePredicate(jobConf, predicate);
  ConfigHelper.setInputColumnFamily(jobConf, ks, cf);
  ConfigHelper.setRangeBatchSize(jobConf, sliceRangeSize);
  ConfigHelper.setInputSplitSize(jobConf, splitSize);

  Job job = new Job(jobConf);
  JobContext jobContext = new JobContext(job.getConfiguration(), job.getJobID());

  Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);
  List<org.apache.hadoop.mapreduce.InputSplit> splits = getSplits(jobContext);
  InputSplit[] results = new InputSplit[splits.size()];

  for (int i = 0; i < splits.size(); ++i) {
    HiveCassandraStandardSplit csplit = new HiveCassandraStandardSplit(
        (ColumnFamilySplit) splits.get(i), cassandraColumnMapping, tablePaths[0]);
    csplit.setKeyspace(ks);
    csplit.setColumnFamily(cf);
    csplit.setRangeBatchSize(sliceRangeSize);
    csplit.setSplitSize(splitSize);
    csplit.setHost(host);
    csplit.setPort(rpcPort);
    csplit.setSlicePredicateSize(slicePredicateSize);
    csplit.setPartitioner(partitioner);
    csplit.setColumnMapping(cassandraColumnMapping);
    results[i] = csplit;
  }
  return results;
}

org.apache.cassandra.hadoop.ColumnFamilySplit Java Examples