org.apache.hadoop.hive.metastore.api.Partition#getSd

Source File: CopyPartitionsOperation.java From circus-train with Apache License 2.0

6 votes

/**
 * Copies partitions from oldTable to newTable, partitions copied are modified to take the schema of newTable
 */
public void execute(CloseableMetaStoreClient client, Table oldTable, Table newTable) throws TException {
  int count = 0;
  String databaseName = newTable.getDbName();
  String tableName = newTable.getTableName();
  PartitionIterator partitionIterator = new PartitionIterator(client, oldTable, partitionBatchSize);
  while (partitionIterator.hasNext()) {
    List<Partition> batch = new ArrayList<>();
    for (int i = 0; i < partitionBatchSize && partitionIterator.hasNext(); i++) {
      Partition partition = partitionIterator.next();
      count++;
      Partition copy = new Partition(partition);
      copy.setDbName(databaseName);
      copy.setTableName(tableName);
      StorageDescriptor sd = new StorageDescriptor(partition.getSd());
      sd.setCols(newTable.getSd().getCols());
      copy.setSd(sd);
      batch.add(copy);
    }
    LOG.info("Copying batch of size {} to {}.{}", batch.size(), databaseName, tableName);
    client.add_partitions(batch);
  }
  LOG.info("Copied {} partitions to {}.{}", count, databaseName, tableName);
}

Source File: SentryMetastorePostEventListener.java From incubator-sentry with Apache License 2.0

6 votes

@Override
public void onAddPartition(AddPartitionEvent partitionEvent)
    throws MetaException {

  // don't sync path if the operation has failed
  if (!partitionEvent.getStatus()) {
    LOGGER.debug("Skip syncing path with Sentry server for onAddPartition event," +
      " since the operation failed. \n");
    return;
  }

  for (Partition part : partitionEvent.getPartitions()) {
    if (part.getSd() != null && part.getSd().getLocation() != null) {
      String authzObj = part.getDbName() + "." + part.getTableName();
      String path = part.getSd().getLocation();
      for (SentryMetastoreListenerPlugin plugin : sentryPlugins) {
        plugin.addPath(authzObj, path);
      }
    }
  }
  super.onAddPartition(partitionEvent);
}

Source File: SentryMetastorePostEventListenerV2.java From incubator-sentry with Apache License 2.0

6 votes

@Override
public void onAddPartition(AddPartitionEvent partitionEvent)
    throws MetaException {
  if (partitionEvent != null && partitionEvent.getPartitionIterator() != null) {
    Iterator<Partition> it = partitionEvent.getPartitionIterator();
    while (it.hasNext()) {
      Partition part = it.next();
      if (part.getSd() != null && part.getSd().getLocation() != null) {
        String authzObj = part.getDbName() + "." + part.getTableName();
        String path = part.getSd().getLocation();
        for (SentryMetastoreListenerPlugin plugin : sentryPlugins) {
          plugin.addPath(authzObj, path);
        }
      }
    }
  }
}

Source File: SentryMetastorePostEventListenerV2.java From incubator-sentry with Apache License 2.0

6 votes

@Override
public void onDropPartition(DropPartitionEvent partitionEvent)
    throws MetaException {
  if (partitionEvent != null && partitionEvent.getPartitionIterator() != null) {
    String authzObj = partitionEvent.getTable().getDbName() + "."
        + partitionEvent.getTable().getTableName();
    Iterator<Partition> it = partitionEvent.getPartitionIterator();
    while (it.hasNext()) {
      Partition part = it.next();
      if (part.getSd() != null && part.getSd().getLocation() != null) {
        String path = part.getSd().getLocation();
        for (SentryMetastoreListenerPlugin plugin : sentryPlugins) {
          plugin.removePath(authzObj, path);
        }
      }
    }
  }
}

Source File: ReplicaTableFactory.java From circus-train with Apache License 2.0

5 votes

Partition newReplicaPartition(
    String eventId,
    Table sourceTable,
    Partition sourcePartition,
    String replicaDatabaseName,
    String replicaTableName,
    Path replicaPartitionLocation,
    ReplicationMode replicationMode) {
  Partition replica = partitionTransformation.transform(new Partition(sourcePartition));
  replica.setDbName(replicaDatabaseName);
  replica.setTableName(replicaTableName);
  if (replica.getSd() != null) {
    replica.getSd().setLocation(toStringOrNull(replicaPartitionLocation));
  }

  String sourcePartitionLocation = sourcePartition.getSd() == null ? ""
      : toStringOrEmpty(sourcePartition.getSd().getLocation());

  // Statistic specific parameters
  replica.putToParameters(STATS_GENERATED_VIA_STATS_TASK, Boolean.TRUE.toString());
  replica.putToParameters(STATS_GENERATED, Boolean.TRUE.toString());
  replica.putToParameters(DO_NOT_UPDATE_STATS, Boolean.TRUE.toString());
  // Replication specific parameters
  replica.putToParameters(LAST_REPLICATED.parameterName(), DateTime.now(DateTimeZone.UTC).toString());
  replica.putToParameters(REPLICATION_EVENT.parameterName(), eventId);
  replica.putToParameters(SOURCE_LOCATION.parameterName(), sourcePartitionLocation);
  replica.putToParameters(SOURCE_TABLE.parameterName(), Warehouse.getQualifiedName(sourceTable));
  replica.putToParameters(SOURCE_METASTORE.parameterName(), sourceMetaStoreUris);
  replica.putToParameters(REPLICATION_MODE.parameterName(), replicationMode.name());
  return replica;
}

Source File: HiveConnectorPartitionService.java From metacat with Apache License 2.0

5 votes

private String getPartitionUri(final PartitionHolder partition) {
    String result = null;
    if (partition.getPartition() != null) {
        final Partition hivePartition = partition.getPartition();
        result = hivePartition.getSd() != null ? hivePartition.getSd().getLocation() : null;
    } else if (partition.getPartitionInfo() != null) {
        final PartitionInfo partitionInfo = partition.getPartitionInfo();
        result = partitionInfo.getSerde() != null ? partitionInfo.getSerde().getUri() : null;
    }
    return result;
}

Source File: HiveConnectorPartitionService.java From metacat with Apache License 2.0

5 votes

private void copyTableSdToPartitionSd(final List<Partition> hivePartitions, final Table table) {
    //
    // Update the partition info based on that of the table.
    //
    for (Partition partition : hivePartitions) {
        final StorageDescriptor sd = partition.getSd();
        final StorageDescriptor tableSdCopy = table.getSd().deepCopy();
        if (tableSdCopy.getSerdeInfo() == null) {
            final SerDeInfo serDeInfo = new SerDeInfo(null, null, new HashMap<>());
            tableSdCopy.setSerdeInfo(serDeInfo);
        }

        tableSdCopy.setLocation(sd.getLocation());
        if (!Strings.isNullOrEmpty(sd.getInputFormat())) {
            tableSdCopy.setInputFormat(sd.getInputFormat());
        }
        if (!Strings.isNullOrEmpty(sd.getOutputFormat())) {
            tableSdCopy.setOutputFormat(sd.getOutputFormat());
        }
        if (sd.getParameters() != null && !sd.getParameters().isEmpty()) {
            tableSdCopy.setParameters(sd.getParameters());
        }
        if (sd.getSerdeInfo() != null) {
            if (!Strings.isNullOrEmpty(sd.getSerdeInfo().getName())) {
                tableSdCopy.getSerdeInfo().setName(sd.getSerdeInfo().getName());
            }
            if (!Strings.isNullOrEmpty(sd.getSerdeInfo().getSerializationLib())) {
                tableSdCopy.getSerdeInfo().setSerializationLib(sd.getSerdeInfo().getSerializationLib());
            }
            if (sd.getSerdeInfo().getParameters() != null && !sd.getSerdeInfo().getParameters().isEmpty()) {
                tableSdCopy.getSerdeInfo().setParameters(sd.getSerdeInfo().getParameters());
            }
        }
        partition.setSd(tableSdCopy);
    }
}

Source File: HiveTableMetaStoreFactory.java From flink with Apache License 2.0

5 votes

private void alterPartition(LinkedHashMap<String, String> partitionSpec, Path partitionPath,
		Partition currentPartition) throws Exception {
	StorageDescriptor partSD = currentPartition.getSd();
	// the following logic copied from Hive::alterPartitionSpecInMemory
	partSD.setOutputFormat(sd.getOutputFormat());
	partSD.setInputFormat(sd.getInputFormat());
	partSD.getSerdeInfo().setSerializationLib(sd.getSerdeInfo().getSerializationLib());
	partSD.getSerdeInfo().setParameters(sd.getSerdeInfo().getParameters());
	partSD.setBucketCols(sd.getBucketCols());
	partSD.setNumBuckets(sd.getNumBuckets());
	partSD.setSortCols(sd.getSortCols());
	partSD.setLocation(partitionPath.toString());
	client.alter_partition(database, tableName, currentPartition);
}

Source File: HiveTableSource.java From flink with Apache License 2.0

5 votes

public static HiveTablePartition toHiveTablePartition(
		List<String> partitionKeys,
		String[] fieldNames,
		DataType[] fieldTypes,
		HiveShim shim,
		Properties tableProps,
		String defaultPartitionName,
		Partition partition) {
	StorageDescriptor sd = partition.getSd();
	Map<String, Object> partitionColValues = new HashMap<>();
	List<String> nameList = Arrays.asList(fieldNames);
	for (int i = 0; i < partitionKeys.size(); i++) {
		String partitionColName = partitionKeys.get(i);
		String partitionValue = partition.getValues().get(i);
		DataType type = fieldTypes[nameList.indexOf(partitionColName)];
		Object partitionObject;
		if (defaultPartitionName.equals(partitionValue)) {
			LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot();
			// while this is inline with Hive, seems it should be null for string columns as well
			partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null;
		} else {
			partitionObject = restorePartitionValueFromFromType(shim, partitionValue, type);
		}
		partitionColValues.put(partitionColName, partitionObject);
	}
	return new HiveTablePartition(sd, partitionColValues, tableProps);
}

Source File: HiveTableSource.java From flink with Apache License 2.0

4 votes

private void initAllPartitions() {
	allHivePartitions = new ArrayList<>();
	// Please note that the following directly accesses Hive metastore, which is only a temporary workaround.
	// Ideally, we need to go thru Catalog API to get all info we need here, which requires some major
	// refactoring. We will postpone this until we merge Blink to Flink.
	try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) {
		String dbName = tablePath.getDatabaseName();
		String tableName = tablePath.getObjectName();
		List<String> partitionColNames = catalogTable.getPartitionKeys();
		if (partitionColNames != null && partitionColNames.size() > 0) {
			final String defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
					HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
			List<Partition> partitions =
					client.listPartitions(dbName, tableName, (short) -1);
			for (Partition partition : partitions) {
				StorageDescriptor sd = partition.getSd();
				Map<String, Object> partitionColValues = new HashMap<>();
				Map<String, String> partitionSpec = new HashMap<>();
				for (int i = 0; i < partitionColNames.size(); i++) {
					String partitionColName = partitionColNames.get(i);
					String partitionValue = partition.getValues().get(i);
					partitionSpec.put(partitionColName, partitionValue);
					DataType type = catalogTable.getSchema().getFieldDataType(partitionColName).get();
					Object partitionObject;
					if (defaultPartitionName.equals(partitionValue)) {
						LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot();
						// while this is inline with Hive, seems it should be null for string columns as well
						partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null;
					} else {
						partitionObject = restorePartitionValueFromFromType(partitionValue, type);
					}
					partitionColValues.put(partitionColName, partitionObject);
				}
				HiveTablePartition hiveTablePartition = new HiveTablePartition(sd, partitionColValues);
				allHivePartitions.add(hiveTablePartition);
				partitionList.add(partitionSpec);
				partitionSpec2HiveTablePartition.put(partitionSpec, hiveTablePartition);
			}
		} else {
			allHivePartitions.add(new HiveTablePartition(client.getTable(dbName, tableName).getSd(), null));
		}
	} catch (TException e) {
		throw new FlinkHiveException("Failed to collect all partitions from hive metaStore", e);
	}
	initAllPartitions = true;
}

Source File: HiveBatchSource.java From Alink with Apache License 2.0

4 votes

private List<HiveTablePartition> initAllPartitions() {
    List<HiveTablePartition> allHivePartitions = new ArrayList<>();
    // Please note that the following directly accesses Hive metastore, which is only a temporary workaround.
    // Ideally, we need to go thru Catalog API to get all info we need here, which requires some major
    // refactoring. We will postpone this until we merge Blink to Flink.
    try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(new HiveConf(jobConf, HiveConf.class), hiveVersion)) {
        String dbName = tablePath.getDatabaseName();
        String tableName = tablePath.getObjectName();
        List<String> partitionColNames = catalogTable.getPartitionKeys();
        if (partitionColNames != null && partitionColNames.size() > 0) {
            final String defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
                HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
            List<Partition> partitions = new ArrayList<>();
            if (remainingPartitions != null) {
                for (Map<String, String> spec : remainingPartitions) {
                    partitions.add(client.getPartition(dbName, tableName, partitionSpecToValues(spec, partitionColNames)));
                }
            } else {
                partitions.addAll(client.listPartitions(dbName, tableName, (short) -1));
            }
            for (Partition partition : partitions) {
                StorageDescriptor sd = partition.getSd();
                Map<String, Object> partitionColValues = new HashMap<>();
                for (int i = 0; i < partitionColNames.size(); i++) {
                    String partitionColName = partitionColNames.get(i);
                    String partitionValue = partition.getValues().get(i);
                    DataType type = catalogTable.getSchema().getFieldDataType(partitionColName).get();
                    Object partitionObject;
                    if (defaultPartitionName.equals(partitionValue)) {
                        LogicalTypeRoot typeRoot = type.getLogicalType().getTypeRoot();
                        // while this is inline with Hive, seems it should be null for string columns as well
                        partitionObject = typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? defaultPartitionName : null;
                    } else {
                        partitionObject = restorePartitionValueFromFromType(partitionValue, type);
                    }
                    partitionColValues.put(partitionColName, partitionObject);
                }
                HiveTablePartition hiveTablePartition = new HiveTablePartition(sd, partitionColValues);
                allHivePartitions.add(hiveTablePartition);
            }
        } else {
            allHivePartitions.add(new HiveTablePartition(client.getTable(dbName, tableName).getSd()));
        }
    } catch (TException e) {
        throw new FlinkHiveException("Failed to collect all partitions from hive metaStore", e);
    }
    return allHivePartitions;
}

Source File: HiveConvertersImpl.java From metacat with Apache License 2.0

4 votes

/**
 * {@inheritDoc}
 */
@Override
public Partition metacatToHivePartition(final PartitionDto partitionDto, @Nullable final TableDto tableDto) {
    final Partition result = new Partition();

    final QualifiedName name = partitionDto.getName();
    List<String> values = Lists.newArrayListWithCapacity(16);
    String databaseName = null;
    String tableName = null;
    if (name != null) {
        if (name.getPartitionName() != null) {
            //
            // Unescape the partition name to get the right partition values.
            // Partition name always are escaped where as the parition values are not.
            //
            values = getPartValsFromName(tableDto, name.getPartitionName());
        }

        if (name.getDatabaseName() != null) {
            databaseName = name.getDatabaseName();
        }

        if (name.getTableName() != null) {
            tableName = name.getTableName();
        }
    }
    result.setValues(values);
    result.setDbName(databaseName);
    result.setTableName(tableName);

    Map<String, String> metadata = partitionDto.getMetadata();
    if (metadata == null) {
        metadata = Maps.newHashMap();
    }
    result.setParameters(metadata);

    result.setSd(fromStorageDto(partitionDto.getSerde(), tableName));
    final StorageDescriptor sd = result.getSd();
    if (tableDto != null) {
        if (sd.getSerdeInfo() != null && tableDto.getSerde() != null && Strings.isNullOrEmpty(
            sd.getSerdeInfo().getSerializationLib())) {
            sd.getSerdeInfo().setSerializationLib(tableDto.getSerde().getSerializationLib());
        }

        final List<FieldDto> fields = tableDto.getFields();
        if (fields == null) {
            sd.setCols(Collections.emptyList());
        } else {
            sd.setCols(fields.stream()
                .filter(field -> !field.isPartition_key())
                .map(this::metacatToHiveField)
                .collect(Collectors.toList()));
        }
    }

    final AuditDto auditDto = partitionDto.getAudit();
    if (auditDto != null) {
        if (auditDto.getCreatedDate() != null) {
            result.setCreateTime(dateToEpochSeconds(auditDto.getCreatedDate()));
        }
        if (auditDto.getLastModifiedDate() != null) {
            result.setLastAccessTime(dateToEpochSeconds(auditDto.getLastModifiedDate()));
        }
    }

    return result;
}

Java Code Examples for org.apache.hadoop.hive.metastore.api.Partition#getSd()