org.apache.hadoop.hive.metastore.api.Table#getPartitionKeys

Source File: MetastoreClientUtils.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0

6 votes

/**
 * Taken from HiveMetaStore#create_table_core
 * https://github.com/apache/hive/blob/rel/release-2.3.0/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java#L1370-L1383
 */
public static void validateTableObject(Table table, Configuration conf) throws InvalidObjectException {
  checkNotNull(table, "table cannot be null");
  checkNotNull(table.getSd(), "Table#StorageDescriptor cannot be null");

  if (!hiveShims.validateTableName(table.getTableName(), conf)) {
    throw new InvalidObjectException(table.getTableName() + " is not a valid object name");
  }
  String validate = MetaStoreUtils.validateTblColumns(table.getSd().getCols());
  if (validate != null) {
    throw new InvalidObjectException("Invalid column " + validate);
  }

  if (table.getPartitionKeys() != null) {
    validate = MetaStoreUtils.validateTblColumns(table.getPartitionKeys());
    if (validate != null) {
      throw new InvalidObjectException("Invalid partition column " + validate);
    }
  }
}

Source File: HiveClientWrapper.java From pxf with Apache License 2.0

6 votes

/**
 * Populates the given metadata object with the given table's fields and partitions,
 * The partition fields are added at the end of the table schema.
 * Throws an exception if the table contains unsupported field types.
 * Supported HCatalog types: TINYINT,
 * SMALLINT, INT, BIGINT, BOOLEAN, FLOAT, DOUBLE, STRING, BINARY, TIMESTAMP,
 * DATE, DECIMAL, VARCHAR, CHAR.
 *
 * @param tbl      Hive table
 * @param metadata schema of given table
 */
public void getSchema(Table tbl, Metadata metadata) {

    int hiveColumnsSize = tbl.getSd().getColsSize();
    int hivePartitionsSize = tbl.getPartitionKeysSize();

    LOG.debug("Hive table: {} fields. {} partitions.", hiveColumnsSize, hivePartitionsSize);

    // check hive fields
    try {
        List<FieldSchema> hiveColumns = tbl.getSd().getCols();
        for (FieldSchema hiveCol : hiveColumns) {
            metadata.addField(HiveUtilities.mapHiveType(hiveCol));
        }
        // check partition fields
        List<FieldSchema> hivePartitions = tbl.getPartitionKeys();
        for (FieldSchema hivePart : hivePartitions) {
            metadata.addField(HiveUtilities.mapHiveType(hivePart));
        }
    } catch (UnsupportedTypeException e) {
        String errorMsg = "Failed to retrieve metadata for table " + metadata.getItem() + ". " +
                e.getMessage();
        throw new UnsupportedTypeException(errorMsg);
    }
}

Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0

6 votes

public static List<PartitionValue> getPartitionValues(Table table, Partition partition, boolean enforceVarcharWidth) {
  if (partition == null) {
    return Collections.emptyList();
  }

  final List<String> partitionValues = partition.getValues();
  final List<PartitionValue> output = new ArrayList<>();
  final List<FieldSchema> partitionKeys = table.getPartitionKeys();
  for (int i = 0; i < partitionKeys.size(); i++) {
    final PartitionValue value = getPartitionValue(partitionKeys.get(i), partitionValues.get(i), enforceVarcharWidth);
    if (value != null) {
      output.add(value);
    }
  }
  return output;
}

Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0

6 votes

public static List<PartitionValue> getPartitionValues(Table table, Partition partition, boolean enableVarcharWidth) {
  if (partition == null) {
    return Collections.emptyList();
  }

  final List<String> partitionValues = partition.getValues();
  final List<PartitionValue> output = new ArrayList<>();
  final List<FieldSchema> partitionKeys = table.getPartitionKeys();
  for (int i = 0; i < partitionKeys.size(); i++) {
    final PartitionValue value = getPartitionValue(partitionKeys.get(i), partitionValues.get(i), enableVarcharWidth);
    if (value != null) {
      output.add(value);
    }
  }
  return output;
}

Source File: PartitionUtil.java From metacat with Apache License 2.0

6 votes

/**
 * Retrieves the partition values from the partition name. This method also validates the partition keys to that
 * of the table.
 *
 * @param tableQName  table name
 * @param table       table
 * @param partName    partition name
 * @return list of partition values
 */
public static List<String> getPartValuesFromPartName(final QualifiedName tableQName, final Table table,
    final String partName) {
    if (Strings.isNullOrEmpty(partName)) {
        throw new InvalidMetaException(tableQName, partName, null);
    }
    final LinkedHashMap<String, String> partSpec = new LinkedHashMap<>();
    Warehouse.makeSpecFromName(partSpec, new Path(partName));
    final List<String> values = new ArrayList<>();
    for (FieldSchema field : table.getPartitionKeys()) {
        final String key = field.getName();
        final String val = partSpec.get(key);
        if (val == null) {
            throw new InvalidMetaException(tableQName, partName, null);
        }
        values.add(val);
    }
    return values;
}

Source File: AvroHiveTableStrategyTest.java From data-highway with Apache License 2.0

5 votes

@Test
public void newHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table result = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("1"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}

Source File: AvroHiveTableStrategyTest.java From data-highway with Apache License 2.0

5 votes

@Test
public void alterHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  when(uriResolver.resolve(schema2, TABLE, 2))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table table = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  Table result = underTest.alterHiveTable(table, schema2, 2);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("2"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}

Source File: HiveDataFragmenter.java From pxf with Apache License 2.0

5 votes

/**
 * Verifies that all the Greenplum defined columns are present in the Hive
 * table schema. Then return a list of indexes corresponding to the
 * matching columns in Greenplum, ordered by the Greenplum schema order.
 *
 * @param tbl the hive table
 * @return a list of indexes
 */
List<Integer> verifySchema(Table tbl) {

    List<Integer> indexes = new ArrayList<>();
    List<FieldSchema> hiveColumns = tbl.getSd().getCols();
    List<FieldSchema> hivePartitions = tbl.getPartitionKeys();

    Set<String> columnAndPartitionNames =
            Stream.concat(hiveColumns.stream(), hivePartitions.stream())
                    .map(FieldSchema::getName)
                    .collect(Collectors.toSet());

    Map<String, Integer> columnNameToColsIndexMap =
            IntStream.range(0, hiveColumns.size())
                    .boxed()
                    .collect(Collectors.toMap(i -> hiveColumns.get(i).getName(), i -> i));

    for (ColumnDescriptor cd : context.getTupleDescription()) {
        if (!columnAndPartitionNames.contains(cd.columnName()) &&
                !columnAndPartitionNames.contains(cd.columnName().toLowerCase())) {
            throw new IllegalArgumentException(
                    String.format("Column '%s' does not exist in the Hive schema. " +
                                    "Ensure the column exists and check the column name spelling and case",
                            cd.columnName()));
        }

        // The index of the column on the Hive schema
        Integer index =
                defaultIfNull(columnNameToColsIndexMap.get(cd.columnName()),
                        columnNameToColsIndexMap.get(cd.columnName().toLowerCase()));
        indexes.add(index);
    }
    return indexes;
}

Source File: HiveEndpoint.java From circus-train with Apache License 2.0

5 votes

public PartitionsAndStatistics getPartitions(Table table, String partitionPredicate, int maxPartitions)
  throws TException {
  try (CloseableMetaStoreClient client = metaStoreClientSupplier.get()) {
    List<Partition> partitions = null;
    if (Strings.isNullOrEmpty(partitionPredicate)) {
      partitions = client.listPartitions(table.getDbName(), table.getTableName(), (short) maxPartitions);
    } else {
      partitions = client.listPartitionsByFilter(table.getDbName(), table.getTableName(), partitionPredicate,
          (short) maxPartitions);
    }

    // Generate a list of partition names
    List<String> partitionNames = getPartitionNames(table.getPartitionKeys(), partitions);
    // Fetch the partition statistics
    List<String> columnNames = getColumnNames(table);

    Map<String, List<ColumnStatisticsObj>> statisticsByPartitionName = client
        .getPartitionColumnStatistics(table.getDbName(), table.getTableName(), partitionNames, columnNames);
    if (statisticsByPartitionName != null && !statisticsByPartitionName.isEmpty()) {
      log.debug("Retrieved column stats entries for {} partitions of table {}.{}", statisticsByPartitionName.size(),
          table.getDbName(), table.getTableName());
    } else {
      log.debug("No partition column stats retrieved for table {}.{}", table.getDbName(), table.getTableName());
    }

    return new PartitionsAndStatistics(table.getPartitionKeys(), partitions, statisticsByPartitionName);
  }
}

Source File: DestructiveReplica.java From circus-train with Apache License 2.0

5 votes

private void dropAndDeletePartitions(CloseableMetaStoreClient client, Predicate<String> shouldDelete)
  throws MetaException, TException, NoSuchObjectException {
  Table replicaTable = client.getTable(databaseName, tableName);
  List<FieldSchema> partitionKeys = replicaTable.getPartitionKeys();
  if (partitionKeys == null || partitionKeys.isEmpty()) {
    // unpartitioned table nothing to delete
    return;
  }
  PartitionIterator partitionIterator = new PartitionIterator(client, replicaTable, (short) 1000);
  while (partitionIterator.hasNext()) {
    Partition replicaPartition = partitionIterator.next();
    List<String> values = replicaPartition.getValues();
    String partitionName = Warehouse.makePartName(partitionKeys, values);
    if (shouldDelete.apply(partitionName)) {
      log
          .info("Dropping partition for replica table: "
              + databaseName
              + "."
              + tableName
              + ", partition value: '"
              + partitionName
              + "'");
      client.dropPartition(databaseName, tableName, partitionName, DELETE_DATA);
      Path oldLocation = locationAsPath(replicaPartition);
      String oldEventId = replicaPartition.getParameters().get(REPLICATION_EVENT.parameterName());
      cleanupLocationManager.addCleanupLocation(oldEventId, oldLocation);
    }
  }
}

Source File: EventUtils.java From circus-train with Apache License 2.0

5 votes

public static EventPartitions toEventPartitions(Table table, List<Partition> partitions) {
  LinkedHashMap<String, String> partitionKeyTypes = new LinkedHashMap<>();
  List<FieldSchema> partitionKeys = table.getPartitionKeys();
  for (FieldSchema partitionKey : partitionKeys) {
    partitionKeyTypes.put(partitionKey.getName(), partitionKey.getType());
  }
  EventPartitions eventPartitions = new EventPartitions(partitionKeyTypes);
  if (partitions != null) {
    for (Partition partition : partitions) {
      eventPartitions.add(new EventPartition(partition.getValues(),
          LocationUtils.hasLocation(partition) ? LocationUtils.locationAsUri(partition) : null));
    }
  }
  return eventPartitions;
}

Source File: HiveConvertersImpl.java From metacat with Apache License 2.0

5 votes

/**
 * {@inheritDoc}
 */
@Override
public TableDto hiveToMetacatTable(final QualifiedName name, final Table table) {
    final TableDto dto = new TableDto();
    dto.setSerde(toStorageDto(table.getSd(), table.getOwner()));
    dto.setAudit(new AuditDto());
    dto.setName(name);
    if (table.isSetCreateTime()) {
        dto.getAudit().setCreatedDate(epochSecondsToDate(table.getCreateTime()));
    }
    dto.setMetadata(table.getParameters());

    final List<FieldSchema> nonPartitionColumns = table.getSd().getCols();
    final List<FieldSchema> partitionColumns = table.getPartitionKeys();
    final List<FieldDto> allFields =
        Lists.newArrayListWithCapacity(nonPartitionColumns.size() + partitionColumns.size());
    nonPartitionColumns.stream()
        .map(field -> this.hiveToMetacatField(field, false))
        .forEachOrdered(allFields::add);
    partitionColumns.stream()
        .map(field -> this.hiveToMetacatField(field, true))
        .forEachOrdered(allFields::add);
    dto.setFields(allFields);
    dto.setView(new ViewDto(table.getViewOriginalText(),
        table.getViewExpandedText()));
    return dto;
}

Source File: TableTypeFilter.java From incubator-gobblin with Apache License 2.0

5 votes

@Override
public boolean apply(@Nullable Table input) {
  if (input == null) {
    return false;
  }

  switch (tableType) {
    case SNAPSHOT:
      return input.getPartitionKeys() == null || input.getPartitionKeys().size() == 0;
    case PARTITIONED:
      return input.getPartitionKeys() != null && input.getPartitionKeys().size() > 0;
    default:
      throw new UnsupportedOperationException("Invalid type: " + tableType);
  }
}

Source File: HiveUtils.java From kite with Apache License 2.0

5 votes

private static List<FieldSchema> getPartCols(Table table) {
  List<FieldSchema> partKeys = table.getPartitionKeys();
  if (partKeys == null) {
    partKeys = new ArrayList<FieldSchema>();
    table.setPartitionKeys(partKeys);
  }
  return partKeys;
}

Source File: HiveConnectorInfoConverter.java From metacat with Apache License 2.0

4 votes

/**
 * Converts to TableDto.
 *
 * @param table connector table
 * @return Metacat table Info
 */
@Override
public TableInfo toTableInfo(final QualifiedName name, final Table table) {
    final List<FieldSchema> nonPartitionColumns =
        (table.getSd() != null) ? table.getSd().getCols() : Collections.emptyList();
    // add the data fields to the nonPartitionColumns
    //ignore all exceptions
    try {
        if (nonPartitionColumns.isEmpty()) {
            for (StructField field : HiveTableUtil.getTableStructFields(table)) {
                final FieldSchema fieldSchema = new FieldSchema(field.getFieldName(),
                    field.getFieldObjectInspector().getTypeName(),
                    field.getFieldComment());
                nonPartitionColumns.add(fieldSchema);
            }
        }
    } catch (final Exception e) {
        log.error(e.getMessage(), e);
    }

    final List<FieldSchema> partitionColumns = table.getPartitionKeys();
    final Date creationDate = table.isSetCreateTime() ? epochSecondsToDate(table.getCreateTime()) : null;
    final List<FieldInfo> allFields =
        Lists.newArrayListWithCapacity(nonPartitionColumns.size() + partitionColumns.size());
    nonPartitionColumns.stream()
        .map(field -> hiveToMetacatField(field, false))
        .forEachOrdered(allFields::add);
    partitionColumns.stream()
        .map(field -> hiveToMetacatField(field, true))
        .forEachOrdered(allFields::add);
    final AuditInfo auditInfo = AuditInfo.builder().createdDate(creationDate).build();
    if (null != table.getTableType() && table.getTableType().equals(TableType.VIRTUAL_VIEW.name())) {
        return TableInfo.builder()
            .serde(toStorageInfo(table.getSd(), table.getOwner())).fields(allFields)
            .metadata(table.getParameters()).name(name).auditInfo(auditInfo)
            .view(ViewInfo.builder().
                viewOriginalText(table.getViewOriginalText())
                .viewExpandedText(table.getViewExpandedText()).build()
            ).build();
    } else {
        return TableInfo.builder()
            .serde(toStorageInfo(table.getSd(), table.getOwner())).fields(allFields)
            .metadata(table.getParameters()).name(name).auditInfo(auditInfo)
            .build();
    }
}

Java Code Examples for org.apache.hadoop.hive.metastore.api.Table#getPartitionKeys()