org.apache.hadoop.hbase.client.Scan#setCacheBlocks

Source File: CubeHBaseRPC.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public static Scan buildScan(RawScan rawScan) {
    Scan scan = new Scan();
    scan.setCaching(rawScan.hbaseCaching);
    scan.setMaxResultSize(rawScan.hbaseMaxResultSize);
    scan.setCacheBlocks(true);
    scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE));

    if (rawScan.startKey != null) {
        scan.setStartRow(rawScan.startKey);
    }
    if (rawScan.endKey != null) {
        scan.setStopRow(rawScan.endKey);
    }
    if (rawScan.fuzzyKeys != null) {
        applyFuzzyFilter(scan, rawScan.fuzzyKeys);
    }
    if (rawScan.hbaseColumns != null) {
        applyHBaseColums(scan, rawScan.hbaseColumns);
    }

    return scan;
}

Source File: LeastRecentlyUsedPruner.java From metron with Apache License 2.0

6 votes

public static void setupHBaseJob(Job job, String sourceTable, String cf) throws IOException {
        Scan scan = new Scan();
        if(cf != null) {
            scan.addFamily(Bytes.toBytes(cf));
        }
        scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs
        scan.setCacheBlocks(false);  // don't set to true for MR jobs
// set other scan attrs

        TableMapReduceUtil.initTableMapperJob(
                sourceTable,      // input table
                scan,	          // Scan instance to control CF and attribute selection
                PrunerMapper.class,   // mapper class
                null,	          // mapper output key
                null,	          // mapper output value
                job);
        TableMapReduceUtil.initTableReducerJob(
                sourceTable,      // output table
                null,             // reducer class
                job);
    }

Source File: TestSeekBeforeWithReverseScan.java From hbase with Apache License 2.0

5 votes

@Test
public void testReverseScanWithoutPadding() throws Exception {
  byte[] row1 = Bytes.toBytes("a");
  byte[] row2 = Bytes.toBytes("ab");
  byte[] row3 = Bytes.toBytes("b");

  Put put1 = new Put(row1);
  put1.addColumn(cfName, cqName, HConstants.EMPTY_BYTE_ARRAY);
  Put put2 = new Put(row2);
  put2.addColumn(cfName, cqName, HConstants.EMPTY_BYTE_ARRAY);
  Put put3 = new Put(row3);
  put3.addColumn(cfName, cqName, HConstants.EMPTY_BYTE_ARRAY);

  region.put(put1);
  region.put(put2);
  region.put(put3);
  region.flush(true);
  Scan scan = new Scan();
  scan.setCacheBlocks(false);
  scan.setReversed(true);
  scan.setFilter(new FirstKeyOnlyFilter());
  scan.addFamily(cfName);
  RegionScanner scanner = region.getScanner(scan);
  List<Cell> res = new ArrayList<>();
  int count = 1;
  while (scanner.next(res)) {
    count++;
  }
  assertEquals("b", Bytes.toString(res.get(0).getRowArray(), res.get(0).getRowOffset(),
      res.get(0).getRowLength()));
  assertEquals("ab", Bytes.toString(res.get(1).getRowArray(), res.get(1).getRowOffset(),
      res.get(1).getRowLength()));
  assertEquals("a", Bytes.toString(res.get(2).getRowArray(), res.get(2).getRowOffset(),
      res.get(2).getRowLength()));
  assertEquals(3, count);
}

Source File: SimpleHBaseStore.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

Reader() throws IOException {
    Connection conn = HBaseConnection.get(KylinConfig.getInstanceFromEnv().getStorageUrl());
    table = conn.getTable(htableName);

    Scan scan = new Scan();
    scan.addFamily(CF_B);
    scan.setCaching(1024);
    scan.setCacheBlocks(true);
    scanner = table.getScanner(scan);
}

Source File: PrepareClusterJob.java From recsys-offline with Apache License 2.0

5 votes

public void run() {
	
	try {
		Job job = Job.getInstance(HBaseContext.config, "ClusterPrepareJob");
		job.setJarByClass(PrepareClusterJob.class);

		Scan scan = new Scan();
		scan.setCaching(500);
		scan.setCacheBlocks(false);
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_yearrate.getBytes());
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_repaylimittime.getBytes());
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_progress.getBytes());

		Filter filter = new SingleColumnValueFilter(Bytes.toBytes(Constants.hbase_column_family), 
				Bytes.toBytes(Constants.hbase_column_progress), CompareOp.NOT_EQUAL, Bytes.toBytes("100"));

		scan.setFilter(filter);

		TableMapReduceUtil.initTableMapperJob(Constants.hbase_p2p_table,
				scan, HBaseReadMapper.class, Text.class, Text.class, job);
		TableMapReduceUtil.initTableReducerJob(
				Constants.hbase_cluster_model_table,
				HBaseWriteReducer.class, job);
		job.setNumReduceTasks(1);

		boolean b = job.waitForCompletion(true);
		if (!b) {
			throw new IOException("error with job!");
		}
	} catch (Exception e) {
		e.printStackTrace();
	}
}

Source File: CubeSegmentTupleIterator.java From Kylin with Apache License 2.0

5 votes

private Scan buildScan(HBaseKeyRange keyRange) {
    Scan scan = new Scan();
    scan.setCaching(SCAN_CACHE);
    scan.setCacheBlocks(true);
    scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE));
    for (RowValueDecoder valueDecoder : this.rowValueDecoders) {
        HBaseColumnDesc hbaseColumn = valueDecoder.getHBaseColumn();
        byte[] byteFamily = Bytes.toBytes(hbaseColumn.getColumnFamilyName());
        byte[] byteQualifier = Bytes.toBytes(hbaseColumn.getQualifier());
        scan.addColumn(byteFamily, byteQualifier);
    }
    scan.setStartRow(keyRange.getStartKey());
    scan.setStopRow(keyRange.getStopKey());
    return scan;
}

Source File: HBaseLogReader.java From eagle with Apache License 2.0

5 votes

/**
 * TODO If the required field is null for a row, then this row will not be fetched. That could be a
 * problem for counting Need another version of read to strictly get the number of rows which will return
 * all the columns for a column family
 */
@Override
public void open() throws IOException {
    if (isOpen) {
        return; // silently return
    }
    try {
        tbl = EagleConfigFactory.load().getHTable(schema.getTable());
    } catch (RuntimeException ex) {
        throw new IOException(ex);
    }

    String rowkeyRegex = buildRegex2(searchTags);
    RegexStringComparator regexStringComparator = new RegexStringComparator(rowkeyRegex);
    regexStringComparator.setCharset(Charset.forName("ISO-8859-1"));
    RowFilter filter = new RowFilter(CompareOp.EQUAL, regexStringComparator);
    FilterList filterList = new FilterList();
    filterList.addFilter(filter);
    Scan s1 = new Scan();
    // reverse timestamp, startRow is stopKey, and stopRow is startKey
    s1.setStartRow(stopKey);
    s1.setStopRow(startKey);
    s1.setFilter(filterList);
    // TODO the # of cached rows should be minimum of (pagesize and 100)
    s1.setCaching(100);
    // TODO not optimized for all applications
    s1.setCacheBlocks(true);
    // scan specified columnfamily and qualifiers
    for (byte[] qualifier : qualifiers) {
        s1.addColumn(schema.getColumnFamily().getBytes(), qualifier);
    }
    rs = tbl.getScanner(s1);
    isOpen = true;
}

Source File: MetaDataClient.java From phoenix with Apache License 2.0

4 votes

private long updateStatisticsInternal(PName physicalName, PTable logicalTable) throws SQLException {
    ReadOnlyProps props = connection.getQueryServices().getProps();
    final long msMinBetweenUpdates = props
            .getLong(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB,
                    props.getLong(QueryServices.STATS_UPDATE_FREQ_MS_ATTRIB,
                            QueryServicesOptions.DEFAULT_STATS_UPDATE_FREQ_MS) / 2);
    byte[] tenantIdBytes = ByteUtil.EMPTY_BYTE_ARRAY;
    Long scn = connection.getSCN();
    // Always invalidate the cache
    long clientTimeStamp = connection.getSCN() == null ? HConstants.LATEST_TIMESTAMP : scn;
    String query = "SELECT CURRENT_DATE()," + LAST_STATS_UPDATE_TIME + " FROM " + PhoenixDatabaseMetaData.SYSTEM_STATS_NAME
            + " WHERE " + PHYSICAL_NAME + "='" + physicalName.getString() + "' AND " + COLUMN_FAMILY
            + " IS NULL AND " + REGION_NAME + " IS NULL AND " + LAST_STATS_UPDATE_TIME + " IS NOT NULL";
    ResultSet rs = connection.createStatement().executeQuery(query);
    long msSinceLastUpdate = Long.MAX_VALUE;
    if (rs.next()) {
        msSinceLastUpdate = rs.getLong(1) - rs.getLong(2);
    }
    long rowCount = 0;
    if (msSinceLastUpdate >= msMinBetweenUpdates) {
        /*
         * Execute a COUNT(*) through PostDDLCompiler as we need to use the logicalTable passed through,
         * since it may not represent a "real" table in the case of the view indexes of a base table.
         */
        PostDDLCompiler compiler = new PostDDLCompiler(connection);
        TableRef tableRef = new TableRef(null, logicalTable, clientTimeStamp, false);
        MutationPlan plan = compiler.compile(Collections.singletonList(tableRef), null, null, null, clientTimeStamp);
        Scan scan = plan.getContext().getScan();
        scan.setCacheBlocks(false);
        scan.setAttribute(BaseScannerRegionObserver.ANALYZE_TABLE, PDataType.TRUE_BYTES);
        MutationState mutationState = plan.execute();
        rowCount = mutationState.getUpdateCount();
    }

    /*
     *  Update the stats table so that client will pull the new one with the updated stats.
     *  Even if we don't run the command due to the last update time, invalidate the cache.
     *  This supports scenarios in which a major compaction was manually initiated and the
     *  client wants the modified stats to be reflected immediately.
     */
    connection.getQueryServices().clearTableFromCache(tenantIdBytes,
            Bytes.toBytes(SchemaUtil.getSchemaNameFromFullName(physicalName.getString())),
            Bytes.toBytes(SchemaUtil.getTableNameFromFullName(physicalName.getString())), clientTimeStamp);
    return rowCount;
}

Source File: ThriftHBaseServiceHandler.java From hbase with Apache License 2.0

4 votes

@Override
public int scannerOpenWithScan(ByteBuffer tableName, TScan tScan,
    Map<ByteBuffer, ByteBuffer> attributes)
    throws IOError {

  Table table = null;
  try {
    table = getTable(tableName);
    Scan scan = new Scan();
    addAttributes(scan, attributes);
    if (tScan.isSetStartRow()) {
      scan.withStartRow(tScan.getStartRow());
    }
    if (tScan.isSetStopRow()) {
      scan.withStopRow(tScan.getStopRow());
    }
    if (tScan.isSetTimestamp()) {
      scan.setTimeRange(0, tScan.getTimestamp());
    }
    if (tScan.isSetCaching()) {
      scan.setCaching(tScan.getCaching());
    }
    if (tScan.isSetBatchSize()) {
      scan.setBatch(tScan.getBatchSize());
    }
    if (tScan.isSetColumns() && !tScan.getColumns().isEmpty()) {
      for(ByteBuffer column : tScan.getColumns()) {
        byte [][] famQf = CellUtil.parseColumn(getBytes(column));
        if(famQf.length == 1) {
          scan.addFamily(famQf[0]);
        } else {
          scan.addColumn(famQf[0], famQf[1]);
        }
      }
    }
    if (tScan.isSetFilterString()) {
      ParseFilter parseFilter = new ParseFilter();
      scan.setFilter(
          parseFilter.parseFilterString(tScan.getFilterString()));
    }
    if (tScan.isSetReversed()) {
      scan.setReversed(tScan.isReversed());
    }
    if (tScan.isSetCacheBlocks()) {
      scan.setCacheBlocks(tScan.isCacheBlocks());
    }
    return addScanner(table.getScanner(scan), tScan.sortColumns);
  } catch (IOException e) {
    LOG.warn(e.getMessage(), e);
    throw getIOError(e);
  } finally{
    closeTable(table);
  }
}

Source File: ExportHBaseTableToAvro.java From HBase-ToHDFS with Apache License 2.0

4 votes

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out.println("ExportHBaseTableToAvro {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowKeyColumn.Optional}");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];
  String rowKeyColumn = "";
  
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToAvro.class);
  job.setJobName("ExportHBaseTableToAvro ");

  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
  
  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroKeyOutputFormat.class);
  AvroKeyOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());

  AvroJob.setOutputKeySchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroKeyOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}

Source File: HBaseStorage.java From spork with Apache License 2.0

4 votes

private void initScan() throws IOException{
    scan = new Scan();

    scan.setCacheBlocks(cacheBlocks_);
    scan.setCaching(caching_);

    // Set filters, if any.
    if (configuredOptions_.hasOption("gt")) {
        gt_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("gt")));
        addRowFilter(CompareOp.GREATER, gt_);
        scan.setStartRow(gt_);
    }
    if (configuredOptions_.hasOption("lt")) {
        lt_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("lt")));
        addRowFilter(CompareOp.LESS, lt_);
        scan.setStopRow(lt_);
    }
    if (configuredOptions_.hasOption("gte")) {
        gte_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("gte")));
        scan.setStartRow(gte_);
    }
    if (configuredOptions_.hasOption("lte")) {
        lte_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("lte")));
        byte[] lt = increment(lte_);
        if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("Incrementing lte value of %s from bytes %s to %s to set stop row",
                      Bytes.toString(lte_), toString(lte_), toString(lt)));
        }

        if (lt != null) {
            scan.setStopRow(increment(lte_));
        }

        // The WhileMatchFilter will short-circuit the scan after we no longer match. The
        // setStopRow call will limit the number of regions we need to scan
        addFilter(new WhileMatchFilter(new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(lte_))));
    }
    if (configuredOptions_.hasOption("regex")) {
        regex_ = Utils.slashisize(configuredOptions_.getOptionValue("regex"));
        addFilter(new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regex_)));
    }
    if (configuredOptions_.hasOption("minTimestamp") || configuredOptions_.hasOption("maxTimestamp")){
        scan.setTimeRange(minTimestamp_, maxTimestamp_);
    }
    if (configuredOptions_.hasOption("timestamp")){
        scan.setTimeStamp(timestamp_);
    }

    // if the group of columnInfos for this family doesn't contain a prefix, we don't need
    // to set any filters, we can just call addColumn or addFamily. See javadocs below.
    boolean columnPrefixExists = false;
    for (ColumnInfo columnInfo : columnInfo_) {
        if (columnInfo.getColumnPrefix() != null) {
            columnPrefixExists = true;
            break;
        }
    }

    if (!columnPrefixExists) {
        addFiltersWithoutColumnPrefix(columnInfo_);
    }
    else {
        addFiltersWithColumnPrefix(columnInfo_);
    }
}

Source File: JobHistoryService.java From hraven with Apache License 2.0

4 votes

/**
 * Removes the job's row from the job_history table, and all related task rows
 * from the job_history_task table.
 * @param key the job to be removed
 * @return the number of rows deleted.
 * @throws IOException
 */
public int removeJob(JobKey key) throws IOException {
  byte[] jobRow = jobKeyConv.toBytes(key);

  Table historyTable =
      hbaseConnection.getTable(TableName.valueOf(Constants.HISTORY_TABLE));
  historyTable.delete(new Delete(jobRow));
  historyTable.close();

  int deleteCount = 1;

  // delete all task rows
  Scan taskScan = getTaskScan(key);
  // only need the row keys back to delete (all should have taskid)
  taskScan.addColumn(Constants.INFO_FAM_BYTES,
      JobHistoryKeys.KEYS_TO_BYTES.get(JobHistoryKeys.TASKID));
  // no reason to cache rows we're deleting
  taskScan.setCacheBlocks(false);
  List<Delete> taskDeletes = new ArrayList<Delete>();
  Table taskTable = hbaseConnection
      .getTable(TableName.valueOf(Constants.HISTORY_TASK_TABLE));
  ResultScanner scanner = taskTable.getScanner(taskScan);
  try {
    for (Result r : scanner) {
      if (r != null && !r.isEmpty()) {
        byte[] rowKey = r.getRow();
        TaskKey taskKey = taskKeyConv.fromBytes(rowKey);
        if (!key.equals(taskKey)) {
          LOG.warn("Found task not in the current job "
              + Bytes.toStringBinary(rowKey));
          break;
        }
        taskDeletes.add(new Delete(r.getRow()));
      }
    }
    // Hang on the count because delete will modify our list.
    deleteCount += taskDeletes.size();
    if (taskDeletes.size() > 0) {
      LOG.info("Deleting " + taskDeletes.size() + " tasks for job " + key);
      taskTable.delete(taskDeletes);
    }
  } finally {
    scanner.close();
    taskTable.close();
  }
  return deleteCount;
}

Source File: ExportHBaseTableToParquet.java From HBase-ToHDFS with Apache License 2.0

4 votes

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out
        .println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];

  String rowKeyColumn = "";
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();
  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToParquet.class);
  job.setJobName("ExportHBaseTableToParquet ");

  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroParquetOutputFormat.class);
  AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());
  AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}

Source File: AggregationEndpoint.java From geowave with Apache License 2.0

4 votes

private Object getValue(
    final Aggregation aggregation,
    final Filter filter,
    final DataTypeAdapter dataAdapter,
    final Short internalAdapterId,
    final HBaseDistributableFilter hdFilter,
    final boolean blockCaching,
    final int scanCacheSize,
    final String[] authorizations) throws IOException {
  final Scan scan = new Scan();
  scan.setMaxVersions(1);
  scan.setCacheBlocks(blockCaching);

  if (scanCacheSize != HConstants.DEFAULT_HBASE_CLIENT_SCANNER_CACHING) {
    scan.setCaching(scanCacheSize);
  }

  if (filter != null) {
    scan.setFilter(filter);
  }

  if (internalAdapterId != null) {
    scan.addFamily(StringUtils.stringToBinary(ByteArrayUtils.shortToString(internalAdapterId)));
  }

  if (authorizations != null) {
    scan.setAuthorizations(new Authorizations(authorizations));
  }
  env.getRegion().getCoprocessorHost().preScannerOpen(scan);
  try (InternalScanner scanner = env.getRegion().getScanner(scan)) {
    final List<Cell> results = new ArrayList<>();
    boolean hasNext;
    do {
      hasNext = scanner.next(results);
      if (!results.isEmpty()) {
        if (hdFilter != null) {
          if (dataAdapter != null) {
            final Object row = hdFilter.decodeRow(dataAdapter);

            if (row != null) {
              aggregation.aggregate(row);
            } else {
              LOGGER.error("DataAdapter failed to decode row");
            }
          } else {
            aggregation.aggregate(hdFilter.getPersistenceEncoding());
          }
        } else {
          aggregation.aggregate(null);
        }
        results.clear();
      }
    } while (hasNext);
  }
  return aggregation.getResult();
}

Source File: TableInputFormat.java From hbase with Apache License 2.0

4 votes

/**
 * Sets up a {@link Scan} instance, applying settings from the configuration property
 * constants defined in {@code TableInputFormat}.  This allows specifying things such as:
 * <ul>
 *   <li>start and stop rows</li>
 *   <li>column qualifiers or families</li>
 *   <li>timestamps or timerange</li>
 *   <li>scanner caching and batch size</li>
 * </ul>
 */
public static Scan createScanFromConfiguration(Configuration conf) throws IOException {
  Scan scan = new Scan();

  if (conf.get(SCAN_ROW_START) != null) {
    scan.withStartRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_START)));
  }

  if (conf.get(SCAN_ROW_STOP) != null) {
    scan.withStopRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_STOP)));
  }

  if (conf.get(SCAN_COLUMNS) != null) {
    addColumns(scan, conf.get(SCAN_COLUMNS));
  }

  for (String columnFamily : conf.getTrimmedStrings(SCAN_COLUMN_FAMILY)) {
    scan.addFamily(Bytes.toBytes(columnFamily));
  }

  if (conf.get(SCAN_TIMESTAMP) != null) {
    scan.setTimestamp(Long.parseLong(conf.get(SCAN_TIMESTAMP)));
  }

  if (conf.get(SCAN_TIMERANGE_START) != null && conf.get(SCAN_TIMERANGE_END) != null) {
    scan.setTimeRange(
        Long.parseLong(conf.get(SCAN_TIMERANGE_START)),
        Long.parseLong(conf.get(SCAN_TIMERANGE_END)));
  }

  if (conf.get(SCAN_MAXVERSIONS) != null) {
    scan.readVersions(Integer.parseInt(conf.get(SCAN_MAXVERSIONS)));
  }

  if (conf.get(SCAN_CACHEDROWS) != null) {
    scan.setCaching(Integer.parseInt(conf.get(SCAN_CACHEDROWS)));
  }

  if (conf.get(SCAN_BATCHSIZE) != null) {
    scan.setBatch(Integer.parseInt(conf.get(SCAN_BATCHSIZE)));
  }

  // false by default, full table scans generate too much BC churn
  scan.setCacheBlocks((conf.getBoolean(SCAN_CACHEBLOCKS, false)));

  return scan;
}

Source File: MobRefReporter.java From hbase with Apache License 2.0

4 votes

/**
 * Main method for the tool.
 * @return 0 if success, 1 for bad args. 2 if job aborted with an exception,
 *   3 if mr job was unsuccessful
 */
public int run(String[] args) throws IOException, InterruptedException {
  // TODO make family and table optional
  if (args.length != 3) {
    printUsage();
    return 1;
  }
  final String output = args[0];
  final String tableName = args[1];
  final String familyName = args[2];
  final long reportStartTime = EnvironmentEdgeManager.currentTime();
  Configuration conf = getConf();
  try {
    FileSystem fs = FileSystem.get(conf);
    // check whether the current user is the same one with the owner of hbase root
    String currentUserName = UserGroupInformation.getCurrentUser().getShortUserName();
    FileStatus[] hbaseRootFileStat = fs.listStatus(new Path(conf.get(HConstants.HBASE_DIR)));
    if (hbaseRootFileStat.length > 0) {
      String owner = hbaseRootFileStat[0].getOwner();
      if (!owner.equals(currentUserName)) {
        String errorMsg = "The current user[" + currentUserName
            + "] does not have hbase root credentials."
            + " If this job fails due to an inability to read HBase's internal directories, "
            + "you will need to rerun as a user with sufficient permissions. The HBase superuser "
            + "is a safe choice.";
        LOG.warn(errorMsg);
      }
    } else {
      LOG.error("The passed configs point to an HBase dir does not exist: {}",
          conf.get(HConstants.HBASE_DIR));
      throw new IOException("The target HBase does not exist");
    }

    byte[] family;
    int maxVersions;
    TableName tn = TableName.valueOf(tableName);
    try (Connection connection = ConnectionFactory.createConnection(conf);
         Admin admin = connection.getAdmin()) {
      TableDescriptor htd = admin.getDescriptor(tn);
      ColumnFamilyDescriptor hcd = htd.getColumnFamily(Bytes.toBytes(familyName));
      if (hcd == null || !hcd.isMobEnabled()) {
        throw new IOException("Column family " + familyName + " is not a MOB column family");
      }
      family = hcd.getName();
      maxVersions = hcd.getMaxVersions();
    }


    String id = getClass().getSimpleName() + UUID.randomUUID().toString().replace("-", "");
    Job job = null;
    Scan scan = new Scan();
    scan.addFamily(family);
    // Do not retrieve the mob data when scanning
    scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
    scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE));
    // If a scanner caching value isn't set, pick a smaller default since we know we're doing
    // a full table scan and don't want to impact other clients badly.
    scan.setCaching(conf.getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 10000));
    scan.setCacheBlocks(false);
    scan.readVersions(maxVersions);
    conf.set(REPORT_JOB_ID, id);

    job = Job.getInstance(conf);
    job.setJarByClass(getClass());
    TableMapReduceUtil.initTableMapperJob(tn, scan,
        MobRefMapper.class, Text.class, ImmutableBytesWritable.class, job);

    job.setReducerClass(MobRefReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(output));

    job.setJobName(getClass().getSimpleName() + "-" + tn + "-" + familyName);
    // for use in the reducer. easier than re-parsing it out of the scan string.
    job.getConfiguration().set(TableInputFormat.SCAN_COLUMN_FAMILY, familyName);

    // Use when we start this job as the base point for file "recency".
    job.getConfiguration().setLong(REPORT_START_DATETIME, reportStartTime);

    if (job.waitForCompletion(true)) {
      LOG.info("Finished creating report for '{}', family='{}'", tn, familyName);
    } else {
      System.err.println("Job was not successful");
      return 3;
    }
    return 0;

  } catch (ClassNotFoundException | RuntimeException | IOException | InterruptedException e) {
    System.err.println("Job aborted due to exception " + e);
    return 2; // job failed
  }
}

Source File: HBase_1_1_2_ClientService.java From nifi with Apache License 2.0

4 votes

protected ResultScanner getResults(final Table table, final String startRow, final String endRow, final String filterExpression, final Long timerangeMin, final Long timerangeMax,
        final Integer limitRows, final Boolean isReversed, final Boolean blockCache, final Collection<Column> columns, List<String> authorizations)  throws IOException {
    final Scan scan = new Scan();
    if (!StringUtils.isBlank(startRow)){
        scan.setStartRow(startRow.getBytes(StandardCharsets.UTF_8));
    }
    if (!StringUtils.isBlank(endRow)){
        scan.setStopRow(   endRow.getBytes(StandardCharsets.UTF_8));
    }

    if (authorizations != null && authorizations.size() > 0) {
        scan.setAuthorizations(new Authorizations(authorizations));
    }

    Filter filter = null;
    if (columns != null) {
        for (Column col : columns) {
            if (col.getQualifier() == null) {
                scan.addFamily(col.getFamily());
            } else {
                scan.addColumn(col.getFamily(), col.getQualifier());
            }
        }
    }
    if (!StringUtils.isBlank(filterExpression)) {
        ParseFilter parseFilter = new ParseFilter();
        filter = parseFilter.parseFilterString(filterExpression);
    }
    if (filter != null){
        scan.setFilter(filter);
    }

    if (timerangeMin != null && timerangeMax != null){
        scan.setTimeRange(timerangeMin, timerangeMax);
    }

    // ->>> reserved for HBase v 2 or later
    //if (limitRows != null && limitRows > 0){
    //    scan.setLimit(limitRows)
    //}

    if (isReversed != null){
        scan.setReversed(isReversed);
    }

    scan.setCacheBlocks(blockCache);

    return table.getScanner(scan);
}

Source File: IntegrationTestBigLinkedList.java From hbase with Apache License 2.0

4 votes

public int run(Path outputDir, int numReducers) throws Exception {
  LOG.info("Running Verify with outputDir=" + outputDir +", numReducers=" + numReducers);

  job = Job.getInstance(getConf());

  job.setJobName("Link Verifier");
  job.setNumReduceTasks(numReducers);
  job.setJarByClass(getClass());

  setJobScannerConf(job);

  Scan scan = new Scan();
  scan.addColumn(FAMILY_NAME, COLUMN_PREV);
  scan.setCaching(10000);
  scan.setCacheBlocks(false);
  if (isMultiUnevenColumnFamilies(getConf())) {
    scan.addColumn(BIG_FAMILY_NAME, BIG_FAMILY_NAME);
    scan.addColumn(TINY_FAMILY_NAME, TINY_FAMILY_NAME);
  }

  TableMapReduceUtil.initTableMapperJob(getTableName(getConf()).getName(), scan,
      VerifyMapper.class, BytesWritable.class, BytesWritable.class, job);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
                                                 AbstractHBaseTool.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);

  job.setReducerClass(VerifyReducer.class);
  job.setOutputFormatClass(SequenceFileAsBinaryOutputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  TextOutputFormat.setOutputPath(job, outputDir);

  boolean success = job.waitForCompletion(true);

  if (success) {
    Counters counters = job.getCounters();
    if (null == counters) {
      LOG.warn("Counters were null, cannot verify Job completion."
          + " This is commonly a result of insufficient YARN configuration.");
      // We don't have access to the counters to know if we have "bad" counts
      return 0;
    }

    // If we find no unexpected values, the job didn't outright fail
    if (verifyUnexpectedValues(counters)) {
      // We didn't check referenced+unreferenced counts, leave that to visual inspection
      return 0;
    }
  }

  // We failed
  return 1;
}

Source File: IntegrationTestBulkLoad.java From hbase with Apache License 2.0

4 votes

/**
 * After adding data to the table start a mr job to
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
private void runCheck() throws IOException, ClassNotFoundException, InterruptedException {
  LOG.info("Running check");
  Configuration conf = getConf();
  String jobName = getTablename() + "_check" + EnvironmentEdgeManager.currentTime();
  Path p = util.getDataTestDirOnTestFS(jobName);

  Job job = new Job(conf);
  job.setJarByClass(getClass());
  job.setJobName(jobName);

  job.setPartitionerClass(NaturalKeyPartitioner.class);
  job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
  job.setSortComparatorClass(CompositeKeyComparator.class);

  Scan scan = new Scan();
  scan.addFamily(CHAIN_FAM);
  scan.addFamily(SORT_FAM);
  scan.readVersions(1);
  scan.setCacheBlocks(false);
  scan.setBatch(1000);

  int replicaCount = conf.getInt(NUM_REPLICA_COUNT_KEY, NUM_REPLICA_COUNT_DEFAULT);
  if (replicaCount != NUM_REPLICA_COUNT_DEFAULT) {
    scan.setConsistency(Consistency.TIMELINE);
  }

  TableMapReduceUtil.initTableMapperJob(
      getTablename().getName(),
      scan,
      LinkedListCheckingMapper.class,
      LinkKey.class,
      LinkChain.class,
      job
  );

  job.setReducerClass(LinkedListCheckingReducer.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);

  FileOutputFormat.setOutputPath(job, p);

  assertEquals(true, job.waitForCompletion(true));

  // Delete the files.
  util.getTestFileSystem().delete(p, true);
}

Source File: RowCounter.java From hbase with Apache License 2.0

4 votes

/**
 * Sets up the actual job.
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 * @deprecated as of release 2.3.0. Will be removed on 4.0.0. Please use main method instead.
 */
@Deprecated
public static Job createSubmittableJob(Configuration conf, String[] args)
  throws IOException {
  String tableName = args[0];
  List<MultiRowRangeFilter.RowRange> rowRangeList = null;
  long startTime = 0;
  long endTime = 0;

  StringBuilder sb = new StringBuilder();

  final String rangeSwitch = "--range=";
  final String startTimeArgKey = "--starttime=";
  final String endTimeArgKey = "--endtime=";
  final String expectedCountArg = "--expected-count=";

  // First argument is table name, starting from second
  for (int i = 1; i < args.length; i++) {
    if (args[i].startsWith(rangeSwitch)) {
      try {
        rowRangeList = parseRowRangeParameter(
          args[i].substring(args[1].indexOf(rangeSwitch)+rangeSwitch.length()));
      } catch (IllegalArgumentException e) {
        return null;
      }
      continue;
    }
    if (args[i].startsWith(startTimeArgKey)) {
      startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
      continue;
    }
    if (args[i].startsWith(endTimeArgKey)) {
      endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
      continue;
    }
    if (args[i].startsWith(expectedCountArg)) {
      conf.setLong(EXPECTED_COUNT_KEY,
        Long.parseLong(args[i].substring(expectedCountArg.length())));
      continue;
    }
    // if no switch, assume column names
    sb.append(args[i]);
    sb.append(" ");
  }
  if (endTime < startTime) {
    printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime);
    return null;
  }

  Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
  job.setJarByClass(RowCounter.class);
  Scan scan = new Scan();
  scan.setCacheBlocks(false);
  setScanFilter(scan, rowRangeList);
  if (sb.length() > 0) {
    for (String columnName : sb.toString().trim().split(" ")) {
      String family = StringUtils.substringBefore(columnName, ":");
      String qualifier = StringUtils.substringAfter(columnName, ":");

      if (StringUtils.isBlank(qualifier)) {
        scan.addFamily(Bytes.toBytes(family));
      }
      else {
        scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
      }
    }
  }
  scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
  job.setOutputFormatClass(NullOutputFormat.class);
  TableMapReduceUtil.initTableMapperJob(tableName, scan,
    RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
  job.setNumReduceTasks(0);
  return job;
}

Java Code Examples for org.apache.hadoop.hbase.client.Scan#setCacheBlocks()