org.apache.hadoop.hbase.filter.RegexStringComparator Java Examples
The following examples show how to use
org.apache.hadoop.hbase.filter.RegexStringComparator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ScannerModel.java From hbase with Apache License 2.0 | 6 votes |
public ByteArrayComparableModel( ByteArrayComparable comparator) { String typeName = comparator.getClass().getSimpleName(); ComparatorType type = ComparatorType.valueOf(typeName); this.type = typeName; switch (type) { case BinaryComparator: case BinaryPrefixComparator: this.value = Bytes.toString(Base64.getEncoder().encode(comparator.getValue())); break; case BitComparator: this.value = Bytes.toString(Base64.getEncoder().encode(comparator.getValue())); this.op = ((BitComparator)comparator).getOperator().toString(); break; case NullComparator: break; case RegexStringComparator: case SubstringComparator: this.value = Bytes.toString(comparator.getValue()); break; default: throw new RuntimeException("unhandled filter type: " + type); } }
Example #2
Source File: TestTableInputFormat.java From hbase with Apache License 2.0 | 6 votes |
@Override protected void initialize(JobContext job) throws IOException { Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create( job.getConfiguration())); TableName tableName = TableName.valueOf("exampleTable"); // mandatory initializeTable(connection, tableName); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; //optional Scan scan = new Scan(); for (byte[] family : inputColumns) { scan.addFamily(family); } Filter exampleFilter = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); scan.setFilter(exampleFilter); setScan(scan); }
Example #3
Source File: TestTableInputFormat.java From hbase with Apache License 2.0 | 6 votes |
@Override public void configure(JobConf job) { try { Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job)); TableName tableName = TableName.valueOf("exampleJobConfigurableTable"); // mandatory initializeTable(connection, tableName); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; //optional Scan scan = new Scan(); for (byte[] family : inputColumns) { scan.addFamily(family); } Filter exampleFilter = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); scan.setFilter(exampleFilter); setScan(scan); } catch (IOException exception) { throw new RuntimeException("Failed to initialize.", exception); } }
Example #4
Source File: TestTableInputFormat.java From hbase with Apache License 2.0 | 6 votes |
@Override public void configure(JobConf job) { try { Connection connection = ConnectionFactory.createConnection(job); Table exampleTable = connection.getTable(TableName.valueOf(("exampleDeprecatedTable"))); // mandatory initializeTable(connection, exampleTable.getName()); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; // optional Scan scan = new Scan(); for (byte[] family : inputColumns) { scan.addFamily(family); } Filter exampleFilter = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); scan.setFilter(exampleFilter); setScan(scan); } catch (IOException exception) { throw new RuntimeException("Failed to configure for job.", exception); } }
Example #5
Source File: TestTableInputFormat.java From hbase with Apache License 2.0 | 6 votes |
@Override public void configure(JobConf job) { try { Connection connection = ConnectionFactory.createConnection(job); Table exampleTable = connection.getTable(TableName.valueOf("exampleDeprecatedTable")); // mandatory initializeTable(connection, exampleTable.getName()); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; // mandatory setInputColumns(inputColumns); Filter exampleFilter = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); // optional setRowFilter(exampleFilter); } catch (IOException exception) { throw new RuntimeException("Failed to configure for job.", exception); } }
Example #6
Source File: PermissionStorage.java From hbase with Apache License 2.0 | 5 votes |
static private void removeTablePermissions(TableName tableName, byte[] column, Table table, boolean closeTable) throws IOException { Scan scan = new Scan(); scan.addFamily(ACL_LIST_FAMILY); String columnName = Bytes.toString(column); scan.setFilter(new QualifierFilter(CompareOperator.EQUAL, new RegexStringComparator( String.format("(%s%s%s)|(%s%s)$", ACL_KEY_DELIMITER, columnName, ACL_KEY_DELIMITER, ACL_KEY_DELIMITER, columnName)))); Set<byte[]> qualifierSet = new TreeSet<>(Bytes.BYTES_COMPARATOR); ResultScanner scanner = null; try { scanner = table.getScanner(scan); for (Result res : scanner) { for (byte[] q : res.getFamilyMap(ACL_LIST_FAMILY).navigableKeySet()) { qualifierSet.add(q); } } if (qualifierSet.size() > 0) { Delete d = new Delete(tableName.getName()); for (byte[] qualifier : qualifierSet) { d.addColumns(ACL_LIST_FAMILY, qualifier); } table.delete(d); } } finally { if (scanner != null) { scanner.close(); } if (closeTable) { table.close(); } } }
Example #7
Source File: TestTableInputFormat.java From hbase with Apache License 2.0 | 5 votes |
protected void initialize(JobConf job, String table) throws IOException { Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job)); TableName tableName = TableName.valueOf(table); // mandatory initializeTable(connection, tableName); byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }; // mandatory setInputColumns(inputColumns); Filter exampleFilter = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); // optional setRowFilter(exampleFilter); }
Example #8
Source File: TestFromClientSide.java From hbase with Apache License 2.0 | 5 votes |
@Test public void testFilters() throws Exception { final TableName tableName = name.getTableName(); try (Table ht = TEST_UTIL.createTable(tableName, FAMILY)) { byte[][] ROWS = makeN(ROW, 10); byte[][] QUALIFIERS = { Bytes.toBytes("col0-<d2v1>-<d3v2>"), Bytes.toBytes("col1-<d2v1>-<d3v2>"), Bytes.toBytes("col2-<d2v1>-<d3v2>"), Bytes.toBytes("col3-<d2v1>-<d3v2>"), Bytes.toBytes("col4-<d2v1>-<d3v2>"), Bytes.toBytes("col5-<d2v1>-<d3v2>"), Bytes.toBytes("col6-<d2v1>-<d3v2>"), Bytes.toBytes("col7-<d2v1>-<d3v2>"), Bytes.toBytes("col8-<d2v1>-<d3v2>"), Bytes.toBytes("col9-<d2v1>-<d3v2>") }; for (int i = 0; i < 10; i++) { Put put = new Put(ROWS[i]); put.setDurability(Durability.SKIP_WAL); put.addColumn(FAMILY, QUALIFIERS[i], VALUE); ht.put(put); } Scan scan = new Scan(); scan.addFamily(FAMILY); Filter filter = new QualifierFilter(CompareOperator.EQUAL, new RegexStringComparator("col[1-5]")); scan.setFilter(filter); try (ResultScanner scanner = ht.getScanner(scan)) { int expectedIndex = 1; for (Result result : scanner) { assertEquals(1, result.size()); assertTrue(Bytes.equals(CellUtil.cloneRow(result.rawCells()[0]), ROWS[expectedIndex])); assertTrue(Bytes.equals(CellUtil.cloneQualifier(result.rawCells()[0]), QUALIFIERS[expectedIndex])); expectedIndex++; } assertEquals(6, expectedIndex); } } }
Example #9
Source File: Reads.java From java-docs-samples with Apache License 2.0 | 5 votes |
public static void readFilter(String projectId, String instanceId, String tableId) { // Initialize client that will be used to send requests. This client only needs to be created // once, and can be reused for multiple requests. After completing all of your requests, call // the "close" method on the client to safely clean up any remaining background resources. try (Connection connection = BigtableConfiguration.connect(projectId, instanceId)) { Table table = connection.getTable(TableName.valueOf(tableId)); ValueFilter valueFilter = new ValueFilter(CompareOp.EQUAL, new RegexStringComparator("PQ2A.*")); Scan scan = new Scan().setFilter(valueFilter); ResultScanner rows = table.getScanner(scan); for (Result row : rows) { printRow(row); } } catch (IOException e) { System.out.println( "Unable to initialize service client, as a network error occurred: \n" + e.toString()); } }
Example #10
Source File: Filters.java From java-docs-samples with Apache License 2.0 | 5 votes |
public static void filterLimitValueRegex(String projectId, String instanceId, String tableId) { // A filter that matches cells whose value satisfies the given regex Filter filter = new ValueFilter(CompareOp.EQUAL, new RegexStringComparator("PQ2A.*$")); Scan scan = new Scan().setFilter(filter); readWithFilter(projectId, instanceId, tableId, scan); }
Example #11
Source File: Filters.java From java-docs-samples with Apache License 2.0 | 5 votes |
public static void filterLimitColQualifierRegex( String projectId, String instanceId, String tableId) { // A filter that matches cells whose column qualifier satisfies the given regex Filter filter = new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator("connected_.*$")); Scan scan = new Scan().setFilter(filter); readWithFilter(projectId, instanceId, tableId, scan); }
Example #12
Source File: Filters.java From java-docs-samples with Apache License 2.0 | 5 votes |
public static void filterLimitColFamilyRegex( String projectId, String instanceId, String tableId) { // A filter that matches cells whose column family satisfies the given regex Filter filter = new FamilyFilter(CompareOp.EQUAL, new RegexStringComparator("stats_.*$")); Scan scan = new Scan().setFilter(filter); readWithFilter(projectId, instanceId, tableId, scan); }
Example #13
Source File: HBaseIOTest.java From beam with Apache License 2.0 | 5 votes |
/** Tests reading all rows using a filter. */ @Test public void testReadingWithFilter() throws Exception { final String table = tmpTable.getName(); final int numRows = 1001; createAndWriteData(table, numRows); String regex = ".*17.*"; Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); runReadTestLength( HBaseIO.read().withConfiguration(conf).withTableId(table).withFilter(filter), false, 20); }
Example #14
Source File: HBaseLogReader.java From eagle with Apache License 2.0 | 5 votes |
/** * TODO If the required field is null for a row, then this row will not be fetched. That could be a * problem for counting Need another version of read to strictly get the number of rows which will return * all the columns for a column family */ @Override public void open() throws IOException { if (isOpen) { return; // silently return } try { tbl = EagleConfigFactory.load().getHTable(schema.getTable()); } catch (RuntimeException ex) { throw new IOException(ex); } String rowkeyRegex = buildRegex2(searchTags); RegexStringComparator regexStringComparator = new RegexStringComparator(rowkeyRegex); regexStringComparator.setCharset(Charset.forName("ISO-8859-1")); RowFilter filter = new RowFilter(CompareOp.EQUAL, regexStringComparator); FilterList filterList = new FilterList(); filterList.addFilter(filter); Scan s1 = new Scan(); // reverse timestamp, startRow is stopKey, and stopRow is startKey s1.setStartRow(stopKey); s1.setStopRow(startKey); s1.setFilter(filterList); // TODO the # of cached rows should be minimum of (pagesize and 100) s1.setCaching(100); // TODO not optimized for all applications s1.setCacheBlocks(true); // scan specified columnfamily and qualifiers for (byte[] qualifier : qualifiers) { s1.addColumn(schema.getColumnFamily().getBytes(), qualifier); } rs = tbl.getScanner(s1); isOpen = true; }
Example #15
Source File: HBaseLogReader.java From Eagle with Apache License 2.0 | 5 votes |
/** * TODO If the required field is null for a row, then this row will not be fetched. That could be a problem for counting * Need another version of read to strictly get the number of rows which will return all the columns for a column family */ public void open() throws IOException { if (isOpen) return; // silently return try { tbl = EagleConfigFactory.load().getHTable(schema.getTable()); } catch (RuntimeException ex) { throw new IOException(ex); } String rowkeyRegex = buildRegex2(searchTags); RegexStringComparator regexStringComparator = new RegexStringComparator( rowkeyRegex); regexStringComparator.setCharset(Charset.forName("ISO-8859-1")); RowFilter filter = new RowFilter(CompareOp.EQUAL, regexStringComparator); FilterList filterList = new FilterList(); filterList.addFilter(filter); Scan s1 = new Scan(); // reverse timestamp, startRow is stopKey, and stopRow is startKey s1.setStartRow(stopKey); s1.setStopRow(startKey); s1.setFilter(filterList); // TODO the # of cached rows should be minimum of (pagesize and 100) s1.setCaching(100); // TODO not optimized for all applications s1.setCacheBlocks(true); // scan specified columnfamily and qualifiers for(byte[] qualifier : qualifiers){ s1.addColumn(schema.getColumnFamily().getBytes(), qualifier); } rs = tbl.getScanner(s1); isOpen = true; }
Example #16
Source File: HBaseIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testReadingWithFilterSDF() throws Exception { final String table = tmpTable.getName(); final int numRows = 1001; createAndWriteData(table, numRows); String regex = ".*17.*"; Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); runReadTestLength( HBaseIO.read().withConfiguration(conf).withTableId(table).withFilter(filter), true, 20); }
Example #17
Source File: JobHistoryService.java From hraven with Apache License 2.0 | 4 votes |
/** * Returns the {@link Flow} runs' stats - summed up per flow If the * {@code version} parameter is non-null, the returned results will be * restricted to those matching this app version. * * <p> * <strong>Note:</strong> this retrieval method will omit the configuration * data from all of the returned jobs. * </p> * * @param cluster the cluster where the jobs were run * @param user the user running the jobs * @param appId the application identifier for the jobs * @param version if non-null, only flows matching this application version * will be returned * @param startTime the start time for the flows to be looked at * @param endTime the end time for the flows to be looked at * @param limit the maximum number of flows to return * @return */ public List<Flow> getFlowTimeSeriesStats(String cluster, String user, String appId, String version, long startTime, long endTime, int limit, byte[] startRow) throws IOException { // app portion of row key byte[] rowPrefix = Bytes.toBytes((cluster + Constants.SEP + user + Constants.SEP + appId + Constants.SEP)); byte[] scanStartRow; if (startRow != null) { scanStartRow = startRow; } else { if (endTime != 0) { // use end time in start row, if present long endRunId = FlowKey.encodeRunId(endTime); scanStartRow = Bytes.add(rowPrefix, Bytes.toBytes(endRunId), Constants.SEP_BYTES); } else { scanStartRow = rowPrefix; } } // TODO: use RunMatchFilter to limit scan on the server side Scan scan = new Scan(); scan.setStartRow(scanStartRow); FilterList filters = new FilterList(FilterList.Operator.MUST_PASS_ALL); if (startTime != 0) { // if limited by start time, early out as soon as we hit it long startRunId = FlowKey.encodeRunId(startTime); // zero byte at the end makes the startRunId inclusive byte[] scanEndRow = Bytes.add(rowPrefix, Bytes.toBytes(startRunId), Constants.ZERO_SINGLE_BYTE); scan.setStopRow(scanEndRow); } else { // require that all rows match the app prefix we're looking for filters.addFilter(new WhileMatchFilter(new PrefixFilter(rowPrefix))); } // if version is passed, restrict the rows returned to that version if (version != null && version.length() > 0) { filters.addFilter(new SingleColumnValueFilter(Constants.INFO_FAM_BYTES, Constants.VERSION_COLUMN_BYTES, CompareFilter.CompareOp.EQUAL, Bytes.toBytes(version))); } // filter out all config columns except the queue name filters.addFilter(new QualifierFilter(CompareFilter.CompareOp.NOT_EQUAL, new RegexStringComparator( "^c\\!((?!" + Constants.HRAVEN_QUEUE + ").)*$"))); scan.setFilter(filters); LOG.info("scan : \n " + scan.toJSON() + " \n"); return createFromResults(scan, false, limit); }
Example #18
Source File: HBaseStorage.java From spork with Apache License 2.0 | 4 votes |
private void initScan() throws IOException{ scan = new Scan(); scan.setCacheBlocks(cacheBlocks_); scan.setCaching(caching_); // Set filters, if any. if (configuredOptions_.hasOption("gt")) { gt_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("gt"))); addRowFilter(CompareOp.GREATER, gt_); scan.setStartRow(gt_); } if (configuredOptions_.hasOption("lt")) { lt_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("lt"))); addRowFilter(CompareOp.LESS, lt_); scan.setStopRow(lt_); } if (configuredOptions_.hasOption("gte")) { gte_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("gte"))); scan.setStartRow(gte_); } if (configuredOptions_.hasOption("lte")) { lte_ = Bytes.toBytesBinary(Utils.slashisize(configuredOptions_.getOptionValue("lte"))); byte[] lt = increment(lte_); if (LOG.isDebugEnabled()) { LOG.debug(String.format("Incrementing lte value of %s from bytes %s to %s to set stop row", Bytes.toString(lte_), toString(lte_), toString(lt))); } if (lt != null) { scan.setStopRow(increment(lte_)); } // The WhileMatchFilter will short-circuit the scan after we no longer match. The // setStopRow call will limit the number of regions we need to scan addFilter(new WhileMatchFilter(new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(lte_)))); } if (configuredOptions_.hasOption("regex")) { regex_ = Utils.slashisize(configuredOptions_.getOptionValue("regex")); addFilter(new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regex_))); } if (configuredOptions_.hasOption("minTimestamp") || configuredOptions_.hasOption("maxTimestamp")){ scan.setTimeRange(minTimestamp_, maxTimestamp_); } if (configuredOptions_.hasOption("timestamp")){ scan.setTimeStamp(timestamp_); } // if the group of columnInfos for this family doesn't contain a prefix, we don't need // to set any filters, we can just call addColumn or addFamily. See javadocs below. boolean columnPrefixExists = false; for (ColumnInfo columnInfo : columnInfo_) { if (columnInfo.getColumnPrefix() != null) { columnPrefixExists = true; break; } } if (!columnPrefixExists) { addFiltersWithoutColumnPrefix(columnInfo_); } else { addFiltersWithColumnPrefix(columnInfo_); } }
Example #19
Source File: QuotaTableUtil.java From hbase with Apache License 2.0 | 4 votes |
/** * converts quotafilter to serializeable filterlists. */ public static Filter makeFilter(final QuotaFilter filter) { FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL); if (StringUtils.isNotEmpty(filter.getUserFilter())) { FilterList userFilters = new FilterList(FilterList.Operator.MUST_PASS_ONE); boolean hasFilter = false; if (StringUtils.isNotEmpty(filter.getNamespaceFilter())) { FilterList nsFilters = new FilterList(FilterList.Operator.MUST_PASS_ALL); nsFilters.addFilter(new RowFilter(CompareOperator.EQUAL, new RegexStringComparator(getUserRowKeyRegex(filter.getUserFilter()), 0))); nsFilters.addFilter(new QualifierFilter(CompareOperator.EQUAL, new RegexStringComparator( getSettingsQualifierRegexForUserNamespace(filter.getNamespaceFilter()), 0))); userFilters.addFilter(nsFilters); hasFilter = true; } if (StringUtils.isNotEmpty(filter.getTableFilter())) { FilterList tableFilters = new FilterList(FilterList.Operator.MUST_PASS_ALL); tableFilters.addFilter(new RowFilter(CompareOperator.EQUAL, new RegexStringComparator(getUserRowKeyRegex(filter.getUserFilter()), 0))); tableFilters.addFilter(new QualifierFilter(CompareOperator.EQUAL, new RegexStringComparator( getSettingsQualifierRegexForUserTable(filter.getTableFilter()), 0))); userFilters.addFilter(tableFilters); hasFilter = true; } if (!hasFilter) { userFilters.addFilter(new RowFilter(CompareOperator.EQUAL, new RegexStringComparator(getUserRowKeyRegex(filter.getUserFilter()), 0))); } filterList.addFilter(userFilters); } else if (StringUtils.isNotEmpty(filter.getTableFilter())) { filterList.addFilter(new RowFilter(CompareOperator.EQUAL, new RegexStringComparator(getTableRowKeyRegex(filter.getTableFilter()), 0))); } else if (StringUtils.isNotEmpty(filter.getNamespaceFilter())) { filterList.addFilter(new RowFilter(CompareOperator.EQUAL, new RegexStringComparator(getNamespaceRowKeyRegex(filter.getNamespaceFilter()), 0))); } else if (StringUtils.isNotEmpty(filter.getRegionServerFilter())) { filterList.addFilter(new RowFilter(CompareOperator.EQUAL, new RegexStringComparator( getRegionServerRowKeyRegex(filter.getRegionServerFilter()), 0))); } return filterList; }
Example #20
Source File: TestFromClientSide5.java From hbase with Apache License 2.0 | 4 votes |
@Test public void testFiltersWithReverseScan() throws Exception { final TableName tableName = name.getTableName(); try (Table ht = TEST_UTIL.createTable(tableName, FAMILY)) { byte[][] ROWS = makeN(ROW, 10); byte[][] QUALIFIERS = {Bytes.toBytes("col0-<d2v1>-<d3v2>"), Bytes.toBytes("col1-<d2v1>-<d3v2>"), Bytes.toBytes("col2-<d2v1>-<d3v2>"), Bytes.toBytes("col3-<d2v1>-<d3v2>"), Bytes.toBytes("col4-<d2v1>-<d3v2>"), Bytes.toBytes("col5-<d2v1>-<d3v2>"), Bytes.toBytes("col6-<d2v1>-<d3v2>"), Bytes.toBytes("col7-<d2v1>-<d3v2>"), Bytes.toBytes("col8-<d2v1>-<d3v2>"), Bytes.toBytes("col9-<d2v1>-<d3v2>")}; for (int i = 0; i < 10; i++) { Put put = new Put(ROWS[i]); put.addColumn(FAMILY, QUALIFIERS[i], VALUE); ht.put(put); } Scan scan = new Scan(); scan.setReversed(true); scan.addFamily(FAMILY); Filter filter = new QualifierFilter(CompareOperator.EQUAL, new RegexStringComparator("col[1-5]")); scan.setFilter(filter); try (ResultScanner scanner = ht.getScanner(scan)) { int expectedIndex = 5; for (Result result : scanner) { assertEquals(1, result.size()); Cell c = result.rawCells()[0]; assertTrue(Bytes.equals(c.getRowArray(), c.getRowOffset(), c.getRowLength(), ROWS[expectedIndex], 0, ROWS[expectedIndex].length)); assertTrue(Bytes.equals(c.getQualifierArray(), c.getQualifierOffset(), c.getQualifierLength(), QUALIFIERS[expectedIndex], 0, QUALIFIERS[expectedIndex].length)); expectedIndex--; } assertEquals(0, expectedIndex); } } }
Example #21
Source File: Filters.java From java-docs-samples with Apache License 2.0 | 4 votes |
public static void filterLimitRowRegex(String projectId, String instanceId, String tableId) { // A filter that matches cells from rows whose keys satisfy the given regex Filter filter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(".*#20190501$")); Scan scan = new Scan().setFilter(filter).setMaxVersions(); readWithFilter(projectId, instanceId, tableId, scan); }