Java Code Examples for org.apache.hadoop.hbase.client.Scan#setCacheBlocks()
The following examples show how to use
org.apache.hadoop.hbase.client.Scan#setCacheBlocks() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CubeHBaseRPC.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public static Scan buildScan(RawScan rawScan) { Scan scan = new Scan(); scan.setCaching(rawScan.hbaseCaching); scan.setMaxResultSize(rawScan.hbaseMaxResultSize); scan.setCacheBlocks(true); scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE)); if (rawScan.startKey != null) { scan.setStartRow(rawScan.startKey); } if (rawScan.endKey != null) { scan.setStopRow(rawScan.endKey); } if (rawScan.fuzzyKeys != null) { applyFuzzyFilter(scan, rawScan.fuzzyKeys); } if (rawScan.hbaseColumns != null) { applyHBaseColums(scan, rawScan.hbaseColumns); } return scan; }
Example 2
Source File: LeastRecentlyUsedPruner.java From metron with Apache License 2.0 | 6 votes |
public static void setupHBaseJob(Job job, String sourceTable, String cf) throws IOException { Scan scan = new Scan(); if(cf != null) { scan.addFamily(Bytes.toBytes(cf)); } scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs // set other scan attrs TableMapReduceUtil.initTableMapperJob( sourceTable, // input table scan, // Scan instance to control CF and attribute selection PrunerMapper.class, // mapper class null, // mapper output key null, // mapper output value job); TableMapReduceUtil.initTableReducerJob( sourceTable, // output table null, // reducer class job); }
Example 3
Source File: TestSeekBeforeWithReverseScan.java From hbase with Apache License 2.0 | 5 votes |
@Test public void testReverseScanWithoutPadding() throws Exception { byte[] row1 = Bytes.toBytes("a"); byte[] row2 = Bytes.toBytes("ab"); byte[] row3 = Bytes.toBytes("b"); Put put1 = new Put(row1); put1.addColumn(cfName, cqName, HConstants.EMPTY_BYTE_ARRAY); Put put2 = new Put(row2); put2.addColumn(cfName, cqName, HConstants.EMPTY_BYTE_ARRAY); Put put3 = new Put(row3); put3.addColumn(cfName, cqName, HConstants.EMPTY_BYTE_ARRAY); region.put(put1); region.put(put2); region.put(put3); region.flush(true); Scan scan = new Scan(); scan.setCacheBlocks(false); scan.setReversed(true); scan.setFilter(new FirstKeyOnlyFilter()); scan.addFamily(cfName); RegionScanner scanner = region.getScanner(scan); List<Cell> res = new ArrayList<>(); int count = 1; while (scanner.next(res)) { count++; } assertEquals("b", Bytes.toString(res.get(0).getRowArray(), res.get(0).getRowOffset(), res.get(0).getRowLength())); assertEquals("ab", Bytes.toString(res.get(1).getRowArray(), res.get(1).getRowOffset(), res.get(1).getRowLength())); assertEquals("a", Bytes.toString(res.get(2).getRowArray(), res.get(2).getRowOffset(), res.get(2).getRowLength())); assertEquals(3, count); }
Example 4
Source File: SimpleHBaseStore.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
Reader() throws IOException { Connection conn = HBaseConnection.get(KylinConfig.getInstanceFromEnv().getStorageUrl()); table = conn.getTable(htableName); Scan scan = new Scan(); scan.addFamily(CF_B); scan.setCaching(1024); scan.setCacheBlocks(true); scanner = table.getScanner(scan); }
Example 5
Source File: PrepareClusterJob.java From recsys-offline with Apache License 2.0 | 5 votes |
public void run() { try { Job job = Job.getInstance(HBaseContext.config, "ClusterPrepareJob"); job.setJarByClass(PrepareClusterJob.class); Scan scan = new Scan(); scan.setCaching(500); scan.setCacheBlocks(false); scan.addColumn(Constants.hbase_column_family.getBytes(), Constants.hbase_column_yearrate.getBytes()); scan.addColumn(Constants.hbase_column_family.getBytes(), Constants.hbase_column_repaylimittime.getBytes()); scan.addColumn(Constants.hbase_column_family.getBytes(), Constants.hbase_column_progress.getBytes()); Filter filter = new SingleColumnValueFilter(Bytes.toBytes(Constants.hbase_column_family), Bytes.toBytes(Constants.hbase_column_progress), CompareOp.NOT_EQUAL, Bytes.toBytes("100")); scan.setFilter(filter); TableMapReduceUtil.initTableMapperJob(Constants.hbase_p2p_table, scan, HBaseReadMapper.class, Text.class, Text.class, job); TableMapReduceUtil.initTableReducerJob( Constants.hbase_cluster_model_table, HBaseWriteReducer.class, job); job.setNumReduceTasks(1); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } } catch (Exception e) { e.printStackTrace(); } }
Example 6
Source File: CubeSegmentTupleIterator.java From Kylin with Apache License 2.0 | 5 votes |
private Scan buildScan(HBaseKeyRange keyRange) { Scan scan = new Scan(); scan.setCaching(SCAN_CACHE); scan.setCacheBlocks(true); scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE)); for (RowValueDecoder valueDecoder : this.rowValueDecoders) { HBaseColumnDesc hbaseColumn = valueDecoder.getHBaseColumn(); byte[] byteFamily = Bytes.toBytes(hbaseColumn.getColumnFamilyName()); byte[] byteQualifier = Bytes.toBytes(hbaseColumn.getQualifier()); scan.addColumn(byteFamily, byteQualifier); } scan.setStartRow(keyRange.getStartKey()); scan.setStopRow(keyRange.getStopKey()); return scan; }
Example 7
Source File: HBaseLogReader.java From eagle with Apache License 2.0 | 5 votes |
/** * TODO If the required field is null for a row, then this row will not be fetched. That could be a * problem for counting Need another version of read to strictly get the number of rows which will return * all the columns for a column family */ @Override public void open() throws IOException { if (isOpen) { return; // silently return } try { tbl = EagleConfigFactory.load().getHTable(schema.getTable()); } catch (RuntimeException ex) { throw new IOException(ex); } String rowkeyRegex = buildRegex2(searchTags); RegexStringComparator regexStringComparator = new RegexStringComparator(rowkeyRegex); regexStringComparator.setCharset(Charset.forName("ISO-8859-1")); RowFilter filter = new RowFilter(CompareOp.EQUAL, regexStringComparator); FilterList filterList = new FilterList(); filterList.addFilter(filter); Scan s1 = new Scan(); // reverse timestamp, startRow is stopKey, and stopRow is startKey s1.setStartRow(stopKey); s1.setStopRow(startKey); s1.setFilter(filterList); // TODO the # of cached rows should be minimum of (pagesize and 100) s1.setCaching(100); // TODO not optimized for all applications s1.setCacheBlocks(true); // scan specified columnfamily and qualifiers for (byte[] qualifier : qualifiers) { s1.addColumn(schema.getColumnFamily().getBytes(), qualifier); } rs = tbl.getScanner(s1); isOpen = true; }
Example 8
Source File: MetaDataClient.java From phoenix with Apache License 2.0 | 4 votes |
private long updateStatisticsInternal(PName physicalName, PTable logicalTable) throws SQLException { ReadOnlyProps props = connection.getQueryServices().getProps(); final long msMinBetweenUpdates = props .getLong(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB, props.getLong(QueryServices.STATS_UPDATE_FREQ_MS_ATTRIB, QueryServicesOptions.DEFAULT_STATS_UPDATE_FREQ_MS) / 2); byte[] tenantIdBytes = ByteUtil.EMPTY_BYTE_ARRAY; Long scn = connection.getSCN(); // Always invalidate the cache long clientTimeStamp = connection.getSCN() == null ? HConstants.LATEST_TIMESTAMP : scn; String query = "SELECT CURRENT_DATE()," + LAST_STATS_UPDATE_TIME + " FROM " + PhoenixDatabaseMetaData.SYSTEM_STATS_NAME + " WHERE " + PHYSICAL_NAME + "='" + physicalName.getString() + "' AND " + COLUMN_FAMILY + " IS NULL AND " + REGION_NAME + " IS NULL AND " + LAST_STATS_UPDATE_TIME + " IS NOT NULL"; ResultSet rs = connection.createStatement().executeQuery(query); long msSinceLastUpdate = Long.MAX_VALUE; if (rs.next()) { msSinceLastUpdate = rs.getLong(1) - rs.getLong(2); } long rowCount = 0; if (msSinceLastUpdate >= msMinBetweenUpdates) { /* * Execute a COUNT(*) through PostDDLCompiler as we need to use the logicalTable passed through, * since it may not represent a "real" table in the case of the view indexes of a base table. */ PostDDLCompiler compiler = new PostDDLCompiler(connection); TableRef tableRef = new TableRef(null, logicalTable, clientTimeStamp, false); MutationPlan plan = compiler.compile(Collections.singletonList(tableRef), null, null, null, clientTimeStamp); Scan scan = plan.getContext().getScan(); scan.setCacheBlocks(false); scan.setAttribute(BaseScannerRegionObserver.ANALYZE_TABLE, PDataType.TRUE_BYTES); MutationState mutationState = plan.execute(); rowCount = mutationState.getUpdateCount(); } /* * Update the stats table so that client will pull the new one with the updated stats. * Even if we don't run the command due to the last update time, invalidate the cache. * This supports scenarios in which a major compaction was manually initiated and the * client wants the modified stats to be reflected immediately. */ connection.getQueryServices().clearTableFromCache(tenantIdBytes, Bytes.toBytes(SchemaUtil.getSchemaNameFromFullName(physicalName.getString())), Bytes.toBytes(SchemaUtil.getTableNameFromFullName(physicalName.getString())), clientTimeStamp); return rowCount; }
Example 9
Source File: ThriftHBaseServiceHandler.java From hbase with Apache License 2.0 | 4 votes |
@Override public int scannerOpenWithScan(ByteBuffer tableName, TScan tScan, Map<ByteBuffer, ByteBuffer> attributes) throws IOError { Table table = null; try { table = getTable(tableName); Scan scan = new Scan(); addAttributes(scan, attributes); if (tScan.isSetStartRow()) { scan.withStartRow(tScan.getStartRow()); } if (tScan.isSetStopRow()) { scan.withStopRow(tScan.getStopRow()); } if (tScan.isSetTimestamp()) { scan.setTimeRange(0, tScan.getTimestamp()); } if (tScan.isSetCaching()) { scan.setCaching(tScan.getCaching()); } if (tScan.isSetBatchSize()) { scan.setBatch(tScan.getBatchSize()); } if (tScan.isSetColumns() && !tScan.getColumns().isEmpty()) { for(ByteBuffer column : tScan.getColumns()) { byte [][] famQf = CellUtil.parseColumn(getBytes(column)); if(famQf.length == 1) { scan.addFamily(famQf[0]); } else { scan.addColumn(famQf[0], famQf[1]); } } } if (tScan.isSetFilterString()) { ParseFilter parseFilter = new ParseFilter(); scan.setFilter( parseFilter.parseFilterString(tScan.getFilterString())); } if (tScan.isSetReversed()) { scan.setReversed(tScan.isReversed()); } if (tScan.isSetCacheBlocks()) { scan.setCacheBlocks(tScan.isCacheBlocks()); } return addScanner(table.getScanner(scan), tScan.sortColumns); } catch (IOException e) { LOG.warn(e.getMessage(), e); throw getIOError(e); } finally{ closeTable(table); } }
Example 10
Source File: ExportHBaseTableToAvro.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out.println("ExportHBaseTableToAvro {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowKeyColumn.Optional}"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String rowKeyColumn = ""; if (args.length > 5) { rowKeyColumn = args[5]; } Job job = Job.getInstance(); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.setJarByClass(ExportHBaseTableToAvro.class); job.setJobName("ExportHBaseTableToAvro "); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(AvroKeyOutputFormat.class); AvroKeyOutputFormat.setOutputPath(job, new Path(outputPath)); Schema.Parser parser = new Schema.Parser(); FileSystem fs = FileSystem.get(job.getConfiguration()); AvroJob.setOutputKeySchema(job, parser.parse(fs.open(new Path(schemaFilePath)))); if (compressionCodec.equals("snappy")) { AvroKeyOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { AvroKeyOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { // nothing } job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example 11
Source File: HBaseStorage.java From spork with Apache License 2.0 | 4 votes |
private void initScan() throws IOException{ scan = new Scan(); scan.setCacheBlocks(cacheBlocks_); scan.setCaching(caching_); // Set filters, if any. if (configuredOptions_.hasOption("gt")) { gt_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("gt"))); addRowFilter(CompareOp.GREATER, gt_); scan.setStartRow(gt_); } if (configuredOptions_.hasOption("lt")) { lt_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("lt"))); addRowFilter(CompareOp.LESS, lt_); scan.setStopRow(lt_); } if (configuredOptions_.hasOption("gte")) { gte_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("gte"))); scan.setStartRow(gte_); } if (configuredOptions_.hasOption("lte")) { lte_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("lte"))); byte[] lt = increment(lte_); if (LOG.isDebugEnabled()) { LOG.debug(String.format("Incrementing lte value of %s from bytes %s to %s to set stop row", Bytes.toString(lte_), toString(lte_), toString(lt))); } if (lt != null) { scan.setStopRow(increment(lte_)); } // The WhileMatchFilter will short-circuit the scan after we no longer match. The // setStopRow call will limit the number of regions we need to scan addFilter(new WhileMatchFilter(new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(lte_)))); } if (configuredOptions_.hasOption("regex")) { regex_ = Utils.slashisize(configuredOptions_.getOptionValue("regex")); addFilter(new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regex_))); } if (configuredOptions_.hasOption("minTimestamp") || configuredOptions_.hasOption("maxTimestamp")){ scan.setTimeRange(minTimestamp_, maxTimestamp_); } if (configuredOptions_.hasOption("timestamp")){ scan.setTimeStamp(timestamp_); } // if the group of columnInfos for this family doesn't contain a prefix, we don't need // to set any filters, we can just call addColumn or addFamily. See javadocs below. boolean columnPrefixExists = false; for (ColumnInfo columnInfo : columnInfo_) { if (columnInfo.getColumnPrefix() != null) { columnPrefixExists = true; break; } } if (!columnPrefixExists) { addFiltersWithoutColumnPrefix(columnInfo_); } else { addFiltersWithColumnPrefix(columnInfo_); } }
Example 12
Source File: JobHistoryService.java From hraven with Apache License 2.0 | 4 votes |
/** * Removes the job's row from the job_history table, and all related task rows * from the job_history_task table. * @param key the job to be removed * @return the number of rows deleted. * @throws IOException */ public int removeJob(JobKey key) throws IOException { byte[] jobRow = jobKeyConv.toBytes(key); Table historyTable = hbaseConnection.getTable(TableName.valueOf(Constants.HISTORY_TABLE)); historyTable.delete(new Delete(jobRow)); historyTable.close(); int deleteCount = 1; // delete all task rows Scan taskScan = getTaskScan(key); // only need the row keys back to delete (all should have taskid) taskScan.addColumn(Constants.INFO_FAM_BYTES, JobHistoryKeys.KEYS_TO_BYTES.get(JobHistoryKeys.TASKID)); // no reason to cache rows we're deleting taskScan.setCacheBlocks(false); List<Delete> taskDeletes = new ArrayList<Delete>(); Table taskTable = hbaseConnection .getTable(TableName.valueOf(Constants.HISTORY_TASK_TABLE)); ResultScanner scanner = taskTable.getScanner(taskScan); try { for (Result r : scanner) { if (r != null && !r.isEmpty()) { byte[] rowKey = r.getRow(); TaskKey taskKey = taskKeyConv.fromBytes(rowKey); if (!key.equals(taskKey)) { LOG.warn("Found task not in the current job " + Bytes.toStringBinary(rowKey)); break; } taskDeletes.add(new Delete(r.getRow())); } } // Hang on the count because delete will modify our list. deleteCount += taskDeletes.size(); if (taskDeletes.size() > 0) { LOG.info("Deleting " + taskDeletes.size() + " tasks for job " + key); taskTable.delete(taskDeletes); } } finally { scanner.close(); taskTable.close(); } return deleteCount; }
Example 13
Source File: ExportHBaseTableToParquet.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out .println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String rowKeyColumn = ""; if (args.length > 5) { rowKeyColumn = args[5]; } Job job = Job.getInstance(); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.setJarByClass(ExportHBaseTableToParquet.class); job.setJobName("ExportHBaseTableToParquet "); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(AvroParquetOutputFormat.class); AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath)); Schema.Parser parser = new Schema.Parser(); FileSystem fs = FileSystem.get(job.getConfiguration()); AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath)))); if (compressionCodec.equals("snappy")) { AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { // nothing } job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example 14
Source File: AggregationEndpoint.java From geowave with Apache License 2.0 | 4 votes |
private Object getValue( final Aggregation aggregation, final Filter filter, final DataTypeAdapter dataAdapter, final Short internalAdapterId, final HBaseDistributableFilter hdFilter, final boolean blockCaching, final int scanCacheSize, final String[] authorizations) throws IOException { final Scan scan = new Scan(); scan.setMaxVersions(1); scan.setCacheBlocks(blockCaching); if (scanCacheSize != HConstants.DEFAULT_HBASE_CLIENT_SCANNER_CACHING) { scan.setCaching(scanCacheSize); } if (filter != null) { scan.setFilter(filter); } if (internalAdapterId != null) { scan.addFamily(StringUtils.stringToBinary(ByteArrayUtils.shortToString(internalAdapterId))); } if (authorizations != null) { scan.setAuthorizations(new Authorizations(authorizations)); } env.getRegion().getCoprocessorHost().preScannerOpen(scan); try (InternalScanner scanner = env.getRegion().getScanner(scan)) { final List<Cell> results = new ArrayList<>(); boolean hasNext; do { hasNext = scanner.next(results); if (!results.isEmpty()) { if (hdFilter != null) { if (dataAdapter != null) { final Object row = hdFilter.decodeRow(dataAdapter); if (row != null) { aggregation.aggregate(row); } else { LOGGER.error("DataAdapter failed to decode row"); } } else { aggregation.aggregate(hdFilter.getPersistenceEncoding()); } } else { aggregation.aggregate(null); } results.clear(); } } while (hasNext); } return aggregation.getResult(); }
Example 15
Source File: TableInputFormat.java From hbase with Apache License 2.0 | 4 votes |
/** * Sets up a {@link Scan} instance, applying settings from the configuration property * constants defined in {@code TableInputFormat}. This allows specifying things such as: * <ul> * <li>start and stop rows</li> * <li>column qualifiers or families</li> * <li>timestamps or timerange</li> * <li>scanner caching and batch size</li> * </ul> */ public static Scan createScanFromConfiguration(Configuration conf) throws IOException { Scan scan = new Scan(); if (conf.get(SCAN_ROW_START) != null) { scan.withStartRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_START))); } if (conf.get(SCAN_ROW_STOP) != null) { scan.withStopRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_STOP))); } if (conf.get(SCAN_COLUMNS) != null) { addColumns(scan, conf.get(SCAN_COLUMNS)); } for (String columnFamily : conf.getTrimmedStrings(SCAN_COLUMN_FAMILY)) { scan.addFamily(Bytes.toBytes(columnFamily)); } if (conf.get(SCAN_TIMESTAMP) != null) { scan.setTimestamp(Long.parseLong(conf.get(SCAN_TIMESTAMP))); } if (conf.get(SCAN_TIMERANGE_START) != null && conf.get(SCAN_TIMERANGE_END) != null) { scan.setTimeRange( Long.parseLong(conf.get(SCAN_TIMERANGE_START)), Long.parseLong(conf.get(SCAN_TIMERANGE_END))); } if (conf.get(SCAN_MAXVERSIONS) != null) { scan.readVersions(Integer.parseInt(conf.get(SCAN_MAXVERSIONS))); } if (conf.get(SCAN_CACHEDROWS) != null) { scan.setCaching(Integer.parseInt(conf.get(SCAN_CACHEDROWS))); } if (conf.get(SCAN_BATCHSIZE) != null) { scan.setBatch(Integer.parseInt(conf.get(SCAN_BATCHSIZE))); } // false by default, full table scans generate too much BC churn scan.setCacheBlocks((conf.getBoolean(SCAN_CACHEBLOCKS, false))); return scan; }
Example 16
Source File: MobRefReporter.java From hbase with Apache License 2.0 | 4 votes |
/** * Main method for the tool. * @return 0 if success, 1 for bad args. 2 if job aborted with an exception, * 3 if mr job was unsuccessful */ public int run(String[] args) throws IOException, InterruptedException { // TODO make family and table optional if (args.length != 3) { printUsage(); return 1; } final String output = args[0]; final String tableName = args[1]; final String familyName = args[2]; final long reportStartTime = EnvironmentEdgeManager.currentTime(); Configuration conf = getConf(); try { FileSystem fs = FileSystem.get(conf); // check whether the current user is the same one with the owner of hbase root String currentUserName = UserGroupInformation.getCurrentUser().getShortUserName(); FileStatus[] hbaseRootFileStat = fs.listStatus(new Path(conf.get(HConstants.HBASE_DIR))); if (hbaseRootFileStat.length > 0) { String owner = hbaseRootFileStat[0].getOwner(); if (!owner.equals(currentUserName)) { String errorMsg = "The current user[" + currentUserName + "] does not have hbase root credentials." + " If this job fails due to an inability to read HBase's internal directories, " + "you will need to rerun as a user with sufficient permissions. The HBase superuser " + "is a safe choice."; LOG.warn(errorMsg); } } else { LOG.error("The passed configs point to an HBase dir does not exist: {}", conf.get(HConstants.HBASE_DIR)); throw new IOException("The target HBase does not exist"); } byte[] family; int maxVersions; TableName tn = TableName.valueOf(tableName); try (Connection connection = ConnectionFactory.createConnection(conf); Admin admin = connection.getAdmin()) { TableDescriptor htd = admin.getDescriptor(tn); ColumnFamilyDescriptor hcd = htd.getColumnFamily(Bytes.toBytes(familyName)); if (hcd == null || !hcd.isMobEnabled()) { throw new IOException("Column family " + familyName + " is not a MOB column family"); } family = hcd.getName(); maxVersions = hcd.getMaxVersions(); } String id = getClass().getSimpleName() + UUID.randomUUID().toString().replace("-", ""); Job job = null; Scan scan = new Scan(); scan.addFamily(family); // Do not retrieve the mob data when scanning scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE)); // If a scanner caching value isn't set, pick a smaller default since we know we're doing // a full table scan and don't want to impact other clients badly. scan.setCaching(conf.getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 10000)); scan.setCacheBlocks(false); scan.readVersions(maxVersions); conf.set(REPORT_JOB_ID, id); job = Job.getInstance(conf); job.setJarByClass(getClass()); TableMapReduceUtil.initTableMapperJob(tn, scan, MobRefMapper.class, Text.class, ImmutableBytesWritable.class, job); job.setReducerClass(MobRefReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(output)); job.setJobName(getClass().getSimpleName() + "-" + tn + "-" + familyName); // for use in the reducer. easier than re-parsing it out of the scan string. job.getConfiguration().set(TableInputFormat.SCAN_COLUMN_FAMILY, familyName); // Use when we start this job as the base point for file "recency". job.getConfiguration().setLong(REPORT_START_DATETIME, reportStartTime); if (job.waitForCompletion(true)) { LOG.info("Finished creating report for '{}', family='{}'", tn, familyName); } else { System.err.println("Job was not successful"); return 3; } return 0; } catch (ClassNotFoundException | RuntimeException | IOException | InterruptedException e) { System.err.println("Job aborted due to exception " + e); return 2; // job failed } }
Example 17
Source File: HBase_1_1_2_ClientService.java From nifi with Apache License 2.0 | 4 votes |
protected ResultScanner getResults(final Table table, final String startRow, final String endRow, final String filterExpression, final Long timerangeMin, final Long timerangeMax, final Integer limitRows, final Boolean isReversed, final Boolean blockCache, final Collection<Column> columns, List<String> authorizations) throws IOException { final Scan scan = new Scan(); if (!StringUtils.isBlank(startRow)){ scan.setStartRow(startRow.getBytes(StandardCharsets.UTF_8)); } if (!StringUtils.isBlank(endRow)){ scan.setStopRow( endRow.getBytes(StandardCharsets.UTF_8)); } if (authorizations != null && authorizations.size() > 0) { scan.setAuthorizations(new Authorizations(authorizations)); } Filter filter = null; if (columns != null) { for (Column col : columns) { if (col.getQualifier() == null) { scan.addFamily(col.getFamily()); } else { scan.addColumn(col.getFamily(), col.getQualifier()); } } } if (!StringUtils.isBlank(filterExpression)) { ParseFilter parseFilter = new ParseFilter(); filter = parseFilter.parseFilterString(filterExpression); } if (filter != null){ scan.setFilter(filter); } if (timerangeMin != null && timerangeMax != null){ scan.setTimeRange(timerangeMin, timerangeMax); } // ->>> reserved for HBase v 2 or later //if (limitRows != null && limitRows > 0){ // scan.setLimit(limitRows) //} if (isReversed != null){ scan.setReversed(isReversed); } scan.setCacheBlocks(blockCache); return table.getScanner(scan); }
Example 18
Source File: IntegrationTestBigLinkedList.java From hbase with Apache License 2.0 | 4 votes |
public int run(Path outputDir, int numReducers) throws Exception { LOG.info("Running Verify with outputDir=" + outputDir +", numReducers=" + numReducers); job = Job.getInstance(getConf()); job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); setJobScannerConf(job); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_PREV); scan.setCaching(10000); scan.setCacheBlocks(false); if (isMultiUnevenColumnFamilies(getConf())) { scan.addColumn(BIG_FAMILY_NAME, BIG_FAMILY_NAME); scan.addColumn(TINY_FAMILY_NAME, TINY_FAMILY_NAME); } TableMapReduceUtil.initTableMapperJob(getTableName(getConf()).getName(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(SequenceFileAsBinaryOutputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); TextOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); if (success) { Counters counters = job.getCounters(); if (null == counters) { LOG.warn("Counters were null, cannot verify Job completion." + " This is commonly a result of insufficient YARN configuration."); // We don't have access to the counters to know if we have "bad" counts return 0; } // If we find no unexpected values, the job didn't outright fail if (verifyUnexpectedValues(counters)) { // We didn't check referenced+unreferenced counts, leave that to visual inspection return 0; } } // We failed return 1; }
Example 19
Source File: IntegrationTestBulkLoad.java From hbase with Apache License 2.0 | 4 votes |
/** * After adding data to the table start a mr job to * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ private void runCheck() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("Running check"); Configuration conf = getConf(); String jobName = getTablename() + "_check" + EnvironmentEdgeManager.currentTime(); Path p = util.getDataTestDirOnTestFS(jobName); Job job = new Job(conf); job.setJarByClass(getClass()); job.setJobName(jobName); job.setPartitionerClass(NaturalKeyPartitioner.class); job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class); Scan scan = new Scan(); scan.addFamily(CHAIN_FAM); scan.addFamily(SORT_FAM); scan.readVersions(1); scan.setCacheBlocks(false); scan.setBatch(1000); int replicaCount = conf.getInt(NUM_REPLICA_COUNT_KEY, NUM_REPLICA_COUNT_DEFAULT); if (replicaCount != NUM_REPLICA_COUNT_DEFAULT) { scan.setConsistency(Consistency.TIMELINE); } TableMapReduceUtil.initTableMapperJob( getTablename().getName(), scan, LinkedListCheckingMapper.class, LinkKey.class, LinkChain.class, job ); job.setReducerClass(LinkedListCheckingReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); FileOutputFormat.setOutputPath(job, p); assertEquals(true, job.waitForCompletion(true)); // Delete the files. util.getTestFileSystem().delete(p, true); }
Example 20
Source File: RowCounter.java From hbase with Apache License 2.0 | 4 votes |
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. * @deprecated as of release 2.3.0. Will be removed on 4.0.0. Please use main method instead. */ @Deprecated public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; List<MultiRowRangeFilter.RowRange> rowRangeList = null; long startTime = 0; long endTime = 0; StringBuilder sb = new StringBuilder(); final String rangeSwitch = "--range="; final String startTimeArgKey = "--starttime="; final String endTimeArgKey = "--endtime="; final String expectedCountArg = "--expected-count="; // First argument is table name, starting from second for (int i = 1; i < args.length; i++) { if (args[i].startsWith(rangeSwitch)) { try { rowRangeList = parseRowRangeParameter( args[i].substring(args[1].indexOf(rangeSwitch)+rangeSwitch.length())); } catch (IllegalArgumentException e) { return null; } continue; } if (args[i].startsWith(startTimeArgKey)) { startTime = Long.parseLong(args[i].substring(startTimeArgKey.length())); continue; } if (args[i].startsWith(endTimeArgKey)) { endTime = Long.parseLong(args[i].substring(endTimeArgKey.length())); continue; } if (args[i].startsWith(expectedCountArg)) { conf.setLong(EXPECTED_COUNT_KEY, Long.parseLong(args[i].substring(expectedCountArg.length()))); continue; } // if no switch, assume column names sb.append(args[i]); sb.append(" "); } if (endTime < startTime) { printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime); return null; } Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(RowCounter.class); Scan scan = new Scan(); scan.setCacheBlocks(false); setScanFilter(scan, rowRangeList); if (sb.length() > 0) { for (String columnName : sb.toString().trim().split(" ")) { String family = StringUtils.substringBefore(columnName, ":"); String qualifier = StringUtils.substringAfter(columnName, ":"); if (StringUtils.isBlank(qualifier)) { scan.addFamily(Bytes.toBytes(family)); } else { scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier)); } } } scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime); job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(0); return job; }