org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj Java Examples
The following examples show how to use
org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testEmptyDecimalStatsToColumnStatistics() { DecimalColumnStatsData emptyDecimalColumnStatsData = new DecimalColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(emptyDecimalColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.empty(), Optional.empty()))); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
Example #2
Source File: HiveCatalog.java From flink with Apache License 2.0 | 6 votes |
@Override public CatalogColumnStatistics getTableColumnStatistics(ObjectPath tablePath) throws TableNotExistException, CatalogException { Table hiveTable = getHiveTable(tablePath); try { if (!isTablePartitioned(hiveTable)) { List<ColumnStatisticsObj> columnStatisticsObjs = client.getTableColumnStatistics( hiveTable.getDbName(), hiveTable.getTableName(), getFieldNames(hiveTable.getSd().getCols())); return new CatalogColumnStatistics(HiveStatsUtil.createCatalogColumnStats(columnStatisticsObjs)); } else { // TableColumnStats of partitioned table is unknown, the behavior is same as HIVE return CatalogColumnStatistics.UNKNOWN; } } catch (TException e) { throw new CatalogException(String.format("Failed to get table column stats of table %s", tablePath.getFullName()), e); } }
Example #3
Source File: HiveStatsUtil.java From flink with Apache License 2.0 | 6 votes |
private static ColumnStatistics createHiveColumnStatistics( Map<String, CatalogColumnStatisticsDataBase> colStats, StorageDescriptor sd, ColumnStatisticsDesc desc) { List<ColumnStatisticsObj> colStatsList = new ArrayList<>(); for (FieldSchema field : sd.getCols()) { String hiveColName = field.getName(); String hiveColType = field.getType(); CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName()); if (null != flinkColStat) { ColumnStatisticsData statsData = getColumnStatisticsData(HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)), flinkColStat); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData); colStatsList.add(columnStatisticsObj); } } return new ColumnStatistics(desc, colStatsList); }
Example #4
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testEmptyBinaryStatsToColumnStatistics() { BinaryColumnStatsData emptyBinaryColumnStatsData = new BinaryColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BINARY_TYPE_NAME, binaryStats(emptyBinaryColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
Example #5
Source File: PartitionedTableReplicationTest.java From circus-train with Apache License 2.0 | 6 votes |
@Test public void noMatchingPartitions() throws Exception { when(replica.getLocationManager(TableType.PARTITIONED, targetTableLocation, EVENT_ID, sourceLocationManager)) .thenReturn(replicaLocationManager); PartitionsAndStatistics emptyPartitionsAndStats = new PartitionsAndStatistics(sourceTable.getPartitionKeys(), Collections.<Partition>emptyList(), Collections.<String, List<ColumnStatisticsObj>>emptyMap()); when(source.getPartitions(sourceTable, PARTITION_PREDICATE, MAX_PARTITIONS)).thenReturn(emptyPartitionsAndStats); when(source.getLocationManager(sourceTable, Collections.<Partition>emptyList(), EVENT_ID, copierOptions)) .thenReturn(sourceLocationManager); PartitionedTableReplication replication = new PartitionedTableReplication(DATABASE, TABLE, partitionPredicate, source, replica, copierFactoryManager, eventIdFactory, targetTableLocation, DATABASE, TABLE, copierOptions, listener, dataManipulatorFactoryManager); replication.replicate(); verifyZeroInteractions(copier); InOrder replicationOrder = inOrder(sourceLocationManager, replica, replicaLocationManager, listener); replicationOrder.verify(replica).validateReplicaTable(DATABASE, TABLE); replicationOrder .verify(replica) .updateMetadata(EVENT_ID, sourceTableAndStatistics, DATABASE, TABLE, replicaLocationManager); }
Example #6
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testBinaryStatsToColumnStatistics() { BinaryColumnStatsData binaryColumnStatsData = new BinaryColumnStatsData(); binaryColumnStatsData.setMaxColLen(100); binaryColumnStatsData.setAvgColLen(22.2); binaryColumnStatsData.setNumNulls(2); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BINARY_TYPE_NAME, binaryStats(binaryColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(4)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.of(100)); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.of(44)); assertEquals(actual.getNullsCount(), OptionalLong.of(2)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
Example #7
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testEmptyStringColumnStatsData() { StringColumnStatsData emptyStringColumnStatsData = new StringColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", STRING_TYPE_NAME, stringStats(emptyStringColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
Example #8
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testStringStatsToColumnStatistics() { StringColumnStatsData stringColumnStatsData = new StringColumnStatsData(); stringColumnStatsData.setMaxColLen(100); stringColumnStatsData.setAvgColLen(23.333); stringColumnStatsData.setNumNulls(1); stringColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", STRING_TYPE_NAME, stringStats(stringColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(2)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.of(100)); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.of(23)); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(1)); }
Example #9
Source File: ThriftHiveMetastore.java From presto with Apache License 2.0 | 6 votes |
private void setPartitionColumnStatistics( HiveIdentity identity, String databaseName, String tableName, String partitionName, Map<String, HiveType> columns, Map<String, HiveColumnStatistics> columnStatistics, OptionalLong rowCount) { List<ColumnStatisticsObj> metastoreColumnStatistics = columnStatistics.entrySet().stream() .filter(entry -> columns.containsKey(entry.getKey())) .map(entry -> createMetastoreColumnStatistics(entry.getKey(), columns.get(entry.getKey()), entry.getValue(), rowCount)) .collect(toImmutableList()); if (!metastoreColumnStatistics.isEmpty()) { setPartitionColumnStatistics(identity, databaseName, tableName, partitionName, metastoreColumnStatistics); } }
Example #10
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testEmptyDateStatsToColumnStatistics() { DateColumnStatsData emptyDateColumnStatsData = new DateColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(emptyDateColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.of(new DateStatistics(Optional.empty(), Optional.empty()))); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
Example #11
Source File: ReplicaTest.java From circus-train with Apache License 2.0 | 6 votes |
@Test public void alteringExistingPartitionedReplicaViewSucceeds() throws Exception, IOException { convertSourceTableToView(); convertExistingReplicaTableToView(); when(mockMetaStoreClient .getPartitionsByNames(DB_NAME, TABLE_NAME, Lists.newArrayList("c=one/d=two", "c=three/d=four"))) .thenReturn(Arrays.asList(existingPartition)); existingReplicaTable.getParameters().put(REPLICATION_EVENT.parameterName(), "previousEventId"); replica .updateMetadata(EVENT_ID, tableAndStatistics, new PartitionsAndStatistics(sourceTable.getPartitionKeys(), Collections.<Partition>emptyList(), Collections.<String, List<ColumnStatisticsObj>>emptyMap()), DB_NAME, TABLE_NAME, mockReplicaLocationManager); verify(alterTableService).alterTable(eq(mockMetaStoreClient), eq(existingReplicaTable), any(Table.class)); verify(mockReplicaLocationManager, never()).addCleanUpLocation(anyString(), any(Path.class)); }
Example #12
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testDateStatsToColumnStatistics() { DateColumnStatsData dateColumnStatsData = new DateColumnStatsData(); dateColumnStatsData.setLowValue(new Date(1000)); dateColumnStatsData.setHighValue(new Date(2000)); dateColumnStatsData.setNumNulls(1); dateColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(dateColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.of(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1000)), Optional.of(LocalDate.ofEpochDay(2000))))); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19)); }
Example #13
Source File: MockThriftMetastoreClient.java From presto with Apache License 2.0 | 6 votes |
@Override public List<ColumnStatisticsObj> getTableColumnStatistics(String databaseName, String tableName, List<String> columnNames) throws TException { accessCount.incrementAndGet(); if (throwException) { throw new RuntimeException(); } if (!databaseName.equals(TEST_DATABASE) || !tableName.equals(TEST_TABLE) || !columnNames.equals(ImmutableList.of(TEST_COLUMN))) { throw new NoSuchObjectException(); } return ImmutableList.of(createTestStats()); }
Example #14
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testDecimalStatsToColumnStatistics() { DecimalColumnStatsData decimalColumnStatsData = new DecimalColumnStatsData(); BigDecimal low = new BigDecimal("0"); decimalColumnStatsData.setLowValue(toMetastoreDecimal(low)); BigDecimal high = new BigDecimal("100"); decimalColumnStatsData.setHighValue(toMetastoreDecimal(high)); decimalColumnStatsData.setNumNulls(1); decimalColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(decimalColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.of(low), Optional.of(high)))); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19)); }
Example #15
Source File: PartitionedTableMetadataMirrorReplicationTest.java From circus-train with Apache License 2.0 | 6 votes |
@Test public void noMatchingPartitions() throws Exception { PartitionsAndStatistics emptyPartitionsAndStats = new PartitionsAndStatistics(sourceTable.getPartitionKeys(), Collections.<Partition>emptyList(), Collections.<String, List<ColumnStatisticsObj>>emptyMap()); when(source.getPartitions(sourceTable, PARTITION_PREDICATE, MAX_PARTITIONS)).thenReturn(emptyPartitionsAndStats); when(source.getLocationManager(sourceTable, Collections.<Partition>emptyList(), EVENT_ID, copierOptions)) .thenReturn(sourceLocationManager); PartitionedTableMetadataMirrorReplication replication = new PartitionedTableMetadataMirrorReplication(DATABASE, TABLE, partitionPredicate, source, replica, eventIdFactory, DATABASE, TABLE); replication.replicate(); verify(replica).validateReplicaTable(DATABASE, TABLE); verify(replica) .updateMetadata(eq(EVENT_ID), eq(sourceTableAndStatistics), eq(DATABASE), eq(TABLE), any(ReplicaLocationManager.class)); }
Example #16
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testEmptyDoubleStatsToColumnStatistics() { DoubleColumnStatsData emptyDoubleColumnStatsData = new DoubleColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(emptyDoubleColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty()))); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
Example #17
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testDoubleStatsToColumnStatistics() { DoubleColumnStatsData doubleColumnStatsData = new DoubleColumnStatsData(); doubleColumnStatsData.setLowValue(0); doubleColumnStatsData.setHighValue(100); doubleColumnStatsData.setNumNulls(1); doubleColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(100)))); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19)); }
Example #18
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testEmptyLongStatsToColumnStatistics() { LongColumnStatsData emptyLongColumnStatsData = new LongColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BIGINT_TYPE_NAME, longStats(emptyLongColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.of(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty()))); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
Example #19
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testLongStatsToColumnStatistics() { LongColumnStatsData longColumnStatsData = new LongColumnStatsData(); longColumnStatsData.setLowValue(0); longColumnStatsData.setHighValue(100); longColumnStatsData.setNumNulls(1); longColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BIGINT_TYPE_NAME, longStats(longColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000)); assertEquals(actual.getIntegerStatistics(), Optional.of(new IntegerStatistics(OptionalLong.of(0), OptionalLong.of(100)))); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19)); }
Example #20
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testBooleanStatsToColumnStatistics() { BooleanColumnStatsData booleanColumnStatsData = new BooleanColumnStatsData(); booleanColumnStatsData.setNumTrues(100); booleanColumnStatsData.setNumFalses(10); booleanColumnStatsData.setNumNulls(0); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10)))); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(0)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
Example #21
Source File: MockThriftMetastoreClient.java From presto with Apache License 2.0 | 6 votes |
@Override public Map<String, List<ColumnStatisticsObj>> getPartitionColumnStatistics(String databaseName, String tableName, List<String> partitionNames, List<String> columnNames) throws TException { accessCount.incrementAndGet(); if (throwException) { throw new RuntimeException(); } if (!databaseName.equals(TEST_DATABASE) || !tableName.equals(TEST_TABLE) || !partitionNames.equals(ImmutableList.of(TEST_PARTITION1)) || !columnNames.equals(ImmutableList.of(TEST_COLUMN))) { throw new NoSuchObjectException(); } return ImmutableMap.of(TEST_PARTITION1, ImmutableList.of(createTestStats())); }
Example #22
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testImpalaGeneratedBooleanStatistics() { BooleanColumnStatsData statsData = new BooleanColumnStatsData(1L, -1L, 2L); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(statsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(2)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty()))); }
Example #23
Source File: TestThriftMetastoreUtil.java From presto with Apache License 2.0 | 6 votes |
@Test public void testEmptyBooleanStatsToColumnStatistics() { BooleanColumnStatsData emptyBooleanColumnStatsData = new BooleanColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(emptyBooleanColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty()))); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
Example #24
Source File: HiveEndpoint.java From circus-train with Apache License 2.0 | 5 votes |
public PartitionsAndStatistics getPartitions(Table table, String partitionPredicate, int maxPartitions) throws TException { try (CloseableMetaStoreClient client = metaStoreClientSupplier.get()) { List<Partition> partitions = null; if (Strings.isNullOrEmpty(partitionPredicate)) { partitions = client.listPartitions(table.getDbName(), table.getTableName(), (short) maxPartitions); } else { partitions = client.listPartitionsByFilter(table.getDbName(), table.getTableName(), partitionPredicate, (short) maxPartitions); } // Generate a list of partition names List<String> partitionNames = getPartitionNames(table.getPartitionKeys(), partitions); // Fetch the partition statistics List<String> columnNames = getColumnNames(table); Map<String, List<ColumnStatisticsObj>> statisticsByPartitionName = client .getPartitionColumnStatistics(table.getDbName(), table.getTableName(), partitionNames, columnNames); if (statisticsByPartitionName != null && !statisticsByPartitionName.isEmpty()) { log.debug("Retrieved column stats entries for {} partitions of table {}.{}", statisticsByPartitionName.size(), table.getDbName(), table.getTableName()); } else { log.debug("No partition column stats retrieved for table {}.{}", table.getDbName(), table.getTableName()); } return new PartitionsAndStatistics(table.getPartitionKeys(), partitions, statisticsByPartitionName); } }
Example #25
Source File: PartitionedTableMetadataUpdateReplicationTest.java From circus-train with Apache License 2.0 | 5 votes |
@Test public void noMatchingPartitions() throws Exception { PartitionsAndStatistics emptyPartitionsAndStats = new PartitionsAndStatistics(sourceTable.getPartitionKeys(), Collections.<Partition>emptyList(), Collections.<String, List<ColumnStatisticsObj>>emptyMap()); when(source.getPartitions(sourceTable, PARTITION_PREDICATE, MAX_PARTITIONS)).thenReturn(emptyPartitionsAndStats); PartitionedTableMetadataUpdateReplication replication = new PartitionedTableMetadataUpdateReplication(DATABASE, TABLE, partitionPredicate, source, replica, eventIdFactory, replicaLocation, DATABASE, TABLE); replication.replicate(); verify(replica).validateReplicaTable(DATABASE, TABLE); verify(replica) .updateMetadata(eq(EVENT_ID), eq(sourceTableAndStatistics), eq(DATABASE), eq(TABLE), any(MetadataUpdateReplicaLocationManager.class)); }
Example #26
Source File: ReplicaTableFactoryTest.java From circus-train with Apache License 2.0 | 5 votes |
@Test public void newReplicaPartitionStatistics() throws MetaException { sourceTable.setPartitionKeys( Arrays.asList(new FieldSchema("one", "string", null), new FieldSchema("two", "string", null))); Partition replicaPartition = new Partition(sourcePartition); replicaPartition.setDbName(MAPPED_DB_NAME); replicaPartition.setTableName(MAPPED_TABLE_NAME); replicaPartition.setValues(Arrays.asList("A", "B")); ColumnStatisticsObj columnStatisticsObj1 = new ColumnStatisticsObj(); ColumnStatisticsObj columnStatisticsObj2 = new ColumnStatisticsObj(); List<ColumnStatisticsObj> columnStatisticsObjs = Arrays.asList(columnStatisticsObj1, columnStatisticsObj2); ColumnStatisticsDesc columnStatisticsDesc = new ColumnStatisticsDesc(false, DB_NAME, TABLE_NAME); columnStatisticsDesc .setPartName(Warehouse.makePartName(sourceTable.getPartitionKeys(), replicaPartition.getValues())); ColumnStatistics sourcePartitionStatistics = new ColumnStatistics(columnStatisticsDesc, columnStatisticsObjs); ColumnStatistics replicaPartitionStatistics = factory.newReplicaPartitionStatistics(sourceTable, replicaPartition, sourcePartitionStatistics); assertThat(replicaPartitionStatistics.getStatsDesc().getDbName(), is(MAPPED_DB_NAME)); assertThat(replicaPartitionStatistics.getStatsDesc().getTableName(), is(MAPPED_TABLE_NAME)); assertThat(replicaPartitionStatistics.getStatsDesc().getPartName(), is("one=A/two=B")); assertThat(replicaPartitionStatistics.getStatsObj().size(), is(2)); assertThat(replicaPartitionStatistics.getStatsObj().get(0), is(columnStatisticsObj1)); assertThat(replicaPartitionStatistics.getStatsObj().get(1), is(columnStatisticsObj2)); }
Example #27
Source File: ReplicaTableFactoryTest.java From circus-train with Apache License 2.0 | 5 votes |
@Test public void newReplicaPartitionStatisticsWithTransformation() throws MetaException { sourceTable.setPartitionKeys( Arrays.asList(new FieldSchema("one", "string", null), new FieldSchema("two", "string", null))); Partition replicaPartition = new Partition(sourcePartition); replicaPartition.setDbName(MAPPED_DB_NAME); replicaPartition.setTableName(MAPPED_TABLE_NAME); replicaPartition.setValues(Arrays.asList("A", "B")); ColumnStatisticsObj columnStatisticsObj1 = new ColumnStatisticsObj(); ColumnStatisticsObj columnStatisticsObj2 = new ColumnStatisticsObj(); List<ColumnStatisticsObj> columnStatisticsObjs = Arrays.asList(columnStatisticsObj1, columnStatisticsObj2); ColumnStatisticsDesc columnStatisticsDesc = new ColumnStatisticsDesc(false, DB_NAME, TABLE_NAME); columnStatisticsDesc .setPartName(Warehouse.makePartName(sourceTable.getPartitionKeys(), replicaPartition.getValues())); ColumnStatistics sourcePartitionStatistics = new ColumnStatistics(columnStatisticsDesc, columnStatisticsObjs); ReplicaTableFactory factory = new ReplicaTableFactory(SOURCE_META_STORE_URIS, TableTransformation.IDENTITY, PartitionTransformation.IDENTITY, COLUMN_STATISTICS_TRANSFORMATION); ColumnStatistics replicaPartitionStatistics = factory.newReplicaPartitionStatistics(sourceTable, replicaPartition, sourcePartitionStatistics); assertThat(replicaPartitionStatistics.getStatsDesc().getDbName(), is("new_db")); assertThat(replicaPartitionStatistics.getStatsDesc().getTableName(), is("new_table")); assertThat(replicaPartitionStatistics.getStatsDesc().getPartName(), is("part=newPart")); assertThat(replicaPartitionStatistics.getStatsObj().size(), is(2)); assertThat(replicaPartitionStatistics.getStatsObj().get(0), is(columnStatisticsObj1)); assertThat(replicaPartitionStatistics.getStatsObj().get(1), is(columnStatisticsObj2)); }
Example #28
Source File: SourceTest.java From circus-train with Apache License 2.0 | 5 votes |
@Test public void getTableNoStats() throws Exception { when(metaStoreClient.getTable(DATABASE, TABLE)).thenReturn(table); when(metaStoreClient.getTableColumnStatistics(DATABASE, TABLE, COLUMN_NAMES)) .thenReturn(Collections.<ColumnStatisticsObj> emptyList()); TableAndStatistics sourceTable = source.getTableAndStatistics(DATABASE, TABLE); assertThat(sourceTable.getTable(), is(table)); assertThat(sourceTable.getStatistics(), is(nullValue())); }
Example #29
Source File: SourceTest.java From circus-train with Apache License 2.0 | 5 votes |
@Test public void getPartitionsNoStats() throws Exception { when(metaStoreClient.listPartitionsByFilter(DATABASE, TABLE, PARTITION_PREDICATE, (short) MAX_PARTITIONS)) .thenReturn(partitions); when(metaStoreClient.getPartitionColumnStatistics(DATABASE, TABLE, PARTITION_NAMES, COLUMN_NAMES)) .thenReturn(Collections.<String, List<ColumnStatisticsObj>> emptyMap()); PartitionsAndStatistics partitionsAndStatistics = source.getPartitions(table, PARTITION_PREDICATE, MAX_PARTITIONS); assertThat(partitionsAndStatistics.getPartitions(), is(partitions)); assertThat(partitionsAndStatistics.getStatisticsForPartition(partition), is(nullValue())); }
Example #30
Source File: TestUtils.java From circus-train with Apache License 2.0 | 5 votes |
public static Table createUnpartitionedTable( HiveMetaStoreClient metaStoreClient, String database, String table, URI location) throws TException { Table hiveTable = new Table(); hiveTable.setDbName(database); hiveTable.setTableName(table); hiveTable.setTableType(TableType.EXTERNAL_TABLE.name()); hiveTable.putToParameters("EXTERNAL", "TRUE"); StorageDescriptor sd = new StorageDescriptor(); sd.setCols(DATA_COLUMNS); sd.setLocation(location.toString()); sd.setParameters(new HashMap<String, String>()); sd.setInputFormat(TextInputFormat.class.getName()); sd.setOutputFormat(TextOutputFormat.class.getName()); sd.setSerdeInfo(new SerDeInfo()); sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde"); hiveTable.setSd(sd); metaStoreClient.createTable(hiveTable); ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table); ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L)); ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData); List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1); metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj)); return hiveTable; }