org.apache.hadoop.hive.metastore.api.StorageDescriptor Java Exaples

Source File: DynamoDBStorageHandlerTest.java From emr-dynamodb-connector with Apache License 2.0

6 votes

@Test
public void testCheckTableSchemaMappingMissingColumn() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$,hashMap:hashMap");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "tinyint", ""));
  cols.add(new FieldSchema("col3", "string", ""));
  cols.add(new FieldSchema("hashMap", "map<string,string>", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("Could not find column mapping for column: col2");
  storageHandler.checkTableSchemaMapping(description, table);
}

Source File: HiveTableSink.java From flink with Apache License 2.0

6 votes

private Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(String[] partitionColumns,
		StorageDescriptor sd) {
	String serLib = sd.getSerdeInfo().getSerializationLib().toLowerCase();
	int formatFieldCount = tableSchema.getFieldCount() - partitionColumns.length;
	String[] formatNames = new String[formatFieldCount];
	LogicalType[] formatTypes = new LogicalType[formatFieldCount];
	for (int i = 0; i < formatFieldCount; i++) {
		formatNames[i] = tableSchema.getFieldName(i).get();
		formatTypes[i] = tableSchema.getFieldDataType(i).get().getLogicalType();
	}
	RowType formatType = RowType.of(formatTypes, formatNames);
	Configuration formatConf = new Configuration(jobConf);
	sd.getSerdeInfo().getParameters().forEach(formatConf::set);
	if (serLib.contains("parquet")) {
		return Optional.of(ParquetRowDataBuilder.createWriterFactory(
				formatType, formatConf, hiveVersion.startsWith("3.")));
	} else if (serLib.contains("orc")) {
		TypeDescription typeDescription = OrcSplitReaderUtil.logicalTypeToOrcType(formatType);
		return Optional.of(hiveShim.createOrcBulkWriterFactory(
				formatConf, typeDescription.toString(), formatTypes));
	} else {
		return Optional.empty();
	}
}

Source File: HiveToCatalogConverter.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0

6 votes

public static com.amazonaws.services.glue.model.StorageDescriptor convertStorageDescriptor(
        StorageDescriptor hiveSd) {
  com.amazonaws.services.glue.model.StorageDescriptor catalogSd =
          new com.amazonaws.services.glue.model.StorageDescriptor();
  catalogSd.setNumberOfBuckets(hiveSd.getNumBuckets());
  catalogSd.setCompressed(hiveSd.isCompressed());
  catalogSd.setParameters(hiveSd.getParameters());
  catalogSd.setBucketColumns(hiveSd.getBucketCols());
  catalogSd.setColumns(convertFieldSchemaList(hiveSd.getCols()));
  catalogSd.setInputFormat(hiveSd.getInputFormat());
  catalogSd.setLocation(hiveSd.getLocation());
  catalogSd.setOutputFormat(hiveSd.getOutputFormat());
  catalogSd.setSerdeInfo(convertSerDeInfo(hiveSd.getSerdeInfo()));
  catalogSd.setSkewedInfo(convertSkewedInfo(hiveSd.getSkewedInfo()));
  catalogSd.setSortColumns(convertOrderList(hiveSd.getSortCols()));
  catalogSd.setStoredAsSubDirectories(hiveSd.isStoredAsSubDirectories());

  return catalogSd;
}

Source File: ReplicaTest.java From circus-train with Apache License 2.0

6 votes

private Table newTable() {
  Table table = new Table();
  table.setDbName(DB_NAME);
  table.setTableName(TABLE_NAME);
  table.setTableType(TableType.EXTERNAL_TABLE.name());

  StorageDescriptor sd = new StorageDescriptor();
  sd.setLocation(tableLocation);
  table.setSd(sd);

  HashMap<String, String> parameters = new HashMap<>();
  parameters.put(StatsSetupConst.ROW_COUNT, "1");
  table.setParameters(parameters);

  table.setPartitionKeys(PARTITIONS);
  return table;
}

Source File: DynamoDBStorageHandlerTest.java From emr-dynamodb-connector with Apache License 2.0

6 votes

@Test
public void testCheckTableSchemaTypeMappingInvalid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
          "col2:dynamo_col2#,hashKey:hashKey");
  parameters.put(DynamoDBConstants.DYNAMODB_TYPE_MAPPING, "col2:NS");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "bigint", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("The DynamoDB type NS does not support Hive type bigint");
  storageHandler.checkTableSchemaType(description, table);
}

Source File: ThriftMetastoreUtil.java From presto with Apache License 2.0

6 votes

public static Partition fromMetastoreApiPartition(org.apache.hadoop.hive.metastore.api.Partition partition, List<FieldSchema> schema)
{
    StorageDescriptor storageDescriptor = partition.getSd();
    if (storageDescriptor == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition);
    }

    Partition.Builder partitionBuilder = Partition.builder()
            .setDatabaseName(partition.getDbName())
            .setTableName(partition.getTableName())
            .setValues(partition.getValues())
            .setColumns(schema.stream()
                    .map(ThriftMetastoreUtil::fromMetastoreApiFieldSchema)
                    .collect(toImmutableList()))
            .setParameters(partition.getParameters());

    // TODO is bucketing_version set on partition level??
    fromMetastoreApiStorageDescriptor(
            partition.getParameters(),
            storageDescriptor,
            partitionBuilder.getStorageBuilder(),
            format("%s.%s", partition.getTableName(), partition.getValues()));

    return partitionBuilder.build();
}

Source File: DestructiveReplicaTest.java From circus-train with Apache License 2.0

6 votes

@Before
public void setUp() {
  SourceTable sourceTable = new SourceTable();
  sourceTable.setDatabaseName(DATABASE);
  sourceTable.setTableName(TABLE);
  tableReplication.setSourceTable(sourceTable);
  ReplicaTable replicaTable = new ReplicaTable();
  replicaTable.setDatabaseName(DATABASE);
  replicaTable.setTableName(REPLICA_TABLE);
  tableReplication.setReplicaTable(replicaTable);
  when(replicaMetaStoreClientSupplier.get()).thenReturn(client);
  replica = new DestructiveReplica(replicaMetaStoreClientSupplier, cleanupLocationManager, tableReplication);

  table = new Table();
  table.setDbName(DATABASE);
  table.setTableName(REPLICA_TABLE);
  table.setPartitionKeys(Lists.newArrayList(new FieldSchema("part1", "string", "")));
  Map<String, String> parameters = new HashMap<>();
  parameters.put(CircusTrainTableParameter.SOURCE_TABLE.parameterName(), DATABASE + "." + TABLE);
  parameters.put(REPLICATION_EVENT.parameterName(), EVENT_ID);
  table.setParameters(parameters);
  StorageDescriptor sd1 = new StorageDescriptor();
  sd1.setLocation(tableLocation.toString());
  table.setSd(sd1);
}

Source File: DynamoDBStorageHandlerTest.java From emr-dynamodb-connector with Apache License 2.0

6 votes

@Test
public void testCheckTableSchemaTypeInvalidType() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
      "col2:dynamo_col2#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "tinyint", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("The hive type tinyint is not supported in DynamoDB");
  storageHandler.checkTableSchemaType(description, table);
}

Source File: HiveUtils.java From kite with Apache License 2.0

6 votes

static Table createEmptyTable(String namespace, String name) {
  Table table = new Table();
  table.setDbName(namespace);
  table.setTableName(name);
  table.setPartitionKeys(new ArrayList<FieldSchema>());
  table.setParameters(new HashMap<String, String>());

  StorageDescriptor sd = new StorageDescriptor();
  sd.setSerdeInfo(new SerDeInfo());
  sd.setNumBuckets(-1);
  sd.setBucketCols(new ArrayList<String>());
  sd.setCols(new ArrayList<FieldSchema>());
  sd.setParameters(new HashMap<String, String>());
  sd.setSortCols(new ArrayList<Order>());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  SkewedInfo skewInfo = new SkewedInfo();
  skewInfo.setSkewedColNames(new ArrayList<String>());
  skewInfo.setSkewedColValues(new ArrayList<List<String>>());
  skewInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
  sd.setSkewedInfo(skewInfo);
  table.setSd(sd);

  return table;
}

Source File: HiveCatalog.java From flink with Apache License 2.0

6 votes

private Partition instantiateHivePartition(Table hiveTable, CatalogPartitionSpec partitionSpec, CatalogPartition catalogPartition)
		throws PartitionSpecInvalidException {
	List<String> partCols = getFieldNames(hiveTable.getPartitionKeys());
	List<String> partValues = getOrderedFullPartitionValues(
		partitionSpec, partCols, new ObjectPath(hiveTable.getDbName(), hiveTable.getTableName()));
	// validate partition values
	for (int i = 0; i < partCols.size(); i++) {
		if (StringUtils.isNullOrWhitespaceOnly(partValues.get(i))) {
			throw new PartitionSpecInvalidException(getName(), partCols,
				new ObjectPath(hiveTable.getDbName(), hiveTable.getTableName()), partitionSpec);
		}
	}
	// TODO: handle GenericCatalogPartition
	StorageDescriptor sd = hiveTable.getSd().deepCopy();
	sd.setLocation(catalogPartition.getProperties().remove(HiveCatalogConfig.PARTITION_LOCATION));

	Map<String, String> properties = new HashMap<>(catalogPartition.getProperties());
	properties.put(HiveCatalogConfig.COMMENT, catalogPartition.getComment());

	return HiveTableUtil.createHivePartition(
			hiveTable.getDbName(),
			hiveTable.getTableName(),
			partValues,
			sd,
			properties);
}

Source File: ThriftMetastoreUtil.java From presto with Apache License 2.0

6 votes

private static void fromMetastoreApiStorageDescriptor(
        Map<String, String> tableParameters,
        StorageDescriptor storageDescriptor,
        Storage.Builder builder,
        String tablePartitionName)
{
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    builder.setStorageFormat(StorageFormat.createNullable(serdeInfo.getSerializationLib(), storageDescriptor.getInputFormat(), storageDescriptor.getOutputFormat()))
            .setLocation(nullToEmpty(storageDescriptor.getLocation()))
            .setBucketProperty(HiveBucketProperty.fromStorageDescriptor(tableParameters, storageDescriptor, tablePartitionName))
            .setSkewed(storageDescriptor.isSetSkewedInfo() && storageDescriptor.getSkewedInfo().isSetSkewedColNames() && !storageDescriptor.getSkewedInfo().getSkewedColNames().isEmpty())
            .setSerdeParameters(serdeInfo.getParameters() == null ? ImmutableMap.of() : serdeInfo.getParameters());
}

Source File: DiffGeneratedPartitionPredicateTest.java From circus-train with Apache License 2.0

6 votes

private void setupHiveTables() throws TException, IOException {
  List<FieldSchema> partitionKeys = Lists.newArrayList(newFieldSchema("p1"), newFieldSchema("p2"));

  File tableLocation = new File("db1", "table1");
  StorageDescriptor sd = newStorageDescriptor(tableLocation, "col0");
  table1 = newTable("table1", "db1", partitionKeys, sd);
  Partition partition1 = newPartition(table1, "value1", "value2");
  Partition partition2 = newPartition(table1, "value11", "value22");
  table1Partitions = Arrays.asList(partition1, partition2); //
  table1PartitionNames = Arrays
      .asList(Warehouse.makePartName(partitionKeys, partition1.getValues()),
          Warehouse.makePartName(partitionKeys, partition2.getValues()));

  File tableLocation2 = new File("db2", "table2");
  StorageDescriptor sd2 = newStorageDescriptor(tableLocation2, "col0");
  table2 = newTable("table2", "db2", partitionKeys, sd2);
}

Source File: DynamoDBStorageHandlerTest.java From emr-dynamodb-connector with Apache License 2.0

6 votes

@Test
public void testCheckTableSchemaTypeValid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
      "col2:dynamo_col2#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "string", ""));
  cols.add(new FieldSchema("col2", "bigint", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);
  // This check is expected to pass for the given input
  storageHandler.checkTableSchemaType(description, table);
}

Source File: HiveTableUtil.java From flink with Apache License 2.0

6 votes

/**
 * Create properties info to initialize a SerDe.
 * @param storageDescriptor
 * @return
 */
public static Properties createPropertiesFromStorageDescriptor(StorageDescriptor storageDescriptor) {
	SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo();
	Map<String, String> parameters = serDeInfo.getParameters();
	Properties properties = new Properties();
	properties.setProperty(
			serdeConstants.SERIALIZATION_FORMAT,
			parameters.get(serdeConstants.SERIALIZATION_FORMAT));
	List<String> colTypes = new ArrayList<>();
	List<String> colNames = new ArrayList<>();
	List<FieldSchema> cols = storageDescriptor.getCols();
	for (FieldSchema col: cols){
		colTypes.add(col.getType());
		colNames.add(col.getName());
	}
	properties.setProperty(serdeConstants.LIST_COLUMNS, StringUtils.join(colNames, String.valueOf(SerDeUtils.COMMA)));
	// Note: serdeConstants.COLUMN_NAME_DELIMITER is not defined in previous Hive. We use a literal to save on shim
	properties.setProperty("column.name.delimite", String.valueOf(SerDeUtils.COMMA));
	properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, StringUtils.join(colTypes, DEFAULT_LIST_COLUMN_TYPES_SEPARATOR));
	properties.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
	properties.putAll(parameters);
	return properties;
}

Source File: HiveAvroCopyEntityHelper.java From incubator-gobblin with Apache License 2.0

6 votes

/**
 * @param entity, name of the entity to be changed, e.g. hive table or partition
 * @param sd, StorageDescriptor of the entity
 */
public static void updateAvroSchemaURL(String entity, StorageDescriptor sd, HiveCopyEntityHelper hiveHelper) {
  String oldAvroSchemaURL = sd.getSerdeInfo().getParameters().get(HIVE_TABLE_AVRO_SCHEMA_URL);
  if (oldAvroSchemaURL != null) {

    Path oldAvroSchemaPath = new Path(oldAvroSchemaURL);
    URI sourceFileSystemURI = hiveHelper.getDataset().getFs().getUri();

    if (PathUtils.isAbsoluteAndSchemeAuthorityNull(oldAvroSchemaPath)
        || (oldAvroSchemaPath.toUri().getScheme().equals(sourceFileSystemURI.getScheme())
        && oldAvroSchemaPath.toUri().getAuthority().equals(sourceFileSystemURI.getAuthority()))) {

      String newAvroSchemaURL = hiveHelper.getTargetPathHelper().getTargetPath(oldAvroSchemaPath, hiveHelper.getTargetFileSystem(),
          Optional.<Partition>absent(), true).toString();

      sd.getSerdeInfo().getParameters().put(HIVE_TABLE_AVRO_SCHEMA_URL, newAvroSchemaURL);
      log.info(String.format("For entity %s, change %s from %s to %s", entity,
          HIVE_TABLE_AVRO_SCHEMA_URL, oldAvroSchemaURL, newAvroSchemaURL));
    }
  }
}

Source File: HiveStatsUtil.java From flink with Apache License 2.0

6 votes

private static ColumnStatistics createHiveColumnStatistics(
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		StorageDescriptor sd,
		ColumnStatisticsDesc desc) {
	List<ColumnStatisticsObj> colStatsList = new ArrayList<>();

	for (FieldSchema field : sd.getCols()) {
		String hiveColName = field.getName();
		String hiveColType = field.getType();
		CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName());
		if (null != flinkColStat) {
			ColumnStatisticsData statsData =
					getColumnStatisticsData(HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)), flinkColStat);
			ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData);
			colStatsList.add(columnStatisticsObj);
		}
	}

	return new ColumnStatistics(desc, colStatsList);
}

Source File: DynamoDBStorageHandlerTest.java From emr-dynamodb-connector with Apache License 2.0

6 votes

@Test
public void testCheckStructTableSchemaTypeInvalid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
      "col2:dynamo_col2#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "struct<bignum:bigint,smallnum:tinyint>", ""));
  cols.add(new FieldSchema("col2", "array<map<string,bigint>>", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);

  exceptionRule.expect(MetaException.class);
  exceptionRule.expectMessage("The hive type struct<bignum:bigint,smallnum:tinyint> is not " +
      "supported in DynamoDB");
  storageHandler.checkTableSchemaType(description, table);
}

Source File: MultipleHiveFragmentsPerFileFragmenter.java From pxf with Apache License 2.0

6 votes

private String getFilePath(Table tbl) throws Exception {

        StorageDescriptor descTable = tbl.getSd();

        InputFormat<?, ?> fformat = HiveDataFragmenter.makeInputFormat(descTable.getInputFormat(), jobConf);

        FileInputFormat.setInputPaths(jobConf, new Path(descTable.getLocation()));

        InputSplit[] splits;
        try {
            splits = fformat.getSplits(jobConf, 1);
        } catch (org.apache.hadoop.mapred.InvalidInputException e) {
            LOG.debug("getSplits failed on " + e.getMessage());
            throw new RuntimeException("Unable to get file path for table.");
        }

        for (InputSplit split : splits) {
            FileSplit fsp = (FileSplit) split;
            String[] hosts = fsp.getLocations();
            String filepath = fsp.getPath().toString();
            return filepath;
        }
        throw new RuntimeException("Unable to get file path for table.");
    }

Source File: HiveServer2CoreTest.java From beeju with Apache License 2.0

6 votes

private Table createPartitionedTable(String databaseName, String tableName, HiveServer2Core server) throws Exception {
  Table table = new Table();
  table.setDbName(DATABASE);
  table.setTableName(tableName);
  table.setPartitionKeys(Arrays.asList(new FieldSchema("partcol", "int", null)));
  table.setSd(new StorageDescriptor());
  table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
  table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
  table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
  table.getSd().setSerdeInfo(new SerDeInfo());
  table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
  HiveMetaStoreClient client = server.getCore().newClient();
  client.createTable(table);
  client.close();
  return table;
}

Source File: AbstractMetastoreTestWithStaticConfiguration.java From incubator-sentry with Apache License 2.0

6 votes

public Table makeMetastoreTableObject(HiveMetaStoreClient client,
    String dbName, String tabName, List<FieldSchema> cols) throws Exception {
  Table tbl = new Table();
  tbl.setDbName(dbName);
  tbl.setTableName(tabName);
  StorageDescriptor sd = new StorageDescriptor();
  tbl.setSd(sd);
  tbl.setParameters(new HashMap<String, String>());
  sd.setCols(cols);
  sd.setCompressed(false);
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setName(tbl.getTableName());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  sd.getSerdeInfo().getParameters()
      .put(serdeConstants.SERIALIZATION_FORMAT, "1");
  sd.setSortCols(new ArrayList<Order>());
  return tbl;
}

Source File: HiveServer2CoreTest.java From beeju with Apache License 2.0

6 votes

private Table createUnpartitionedTable(String databaseName, String tableName, HiveServer2Core server)
    throws Exception {
  Table table = new Table();
  table.setDbName(databaseName);
  table.setTableName(tableName);
  table.setSd(new StorageDescriptor());
  table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
  table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
  table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
  table.getSd().setSerdeInfo(new SerDeInfo());
  table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
  HiveMetaStoreClient client = server.getCore().newClient();
  client.createTable(table);
  client.close();
  return table;
}

Source File: DynamoDBStorageHandlerTest.java From emr-dynamodb-connector with Apache License 2.0

6 votes

@Test
public void testCheckListTableSchemaTypeValid() throws MetaException {
  TableDescription description = getHashRangeTable();

  Table table = new Table();
  Map<String, String> parameters = Maps.newHashMap();
  parameters.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, "col1:dynamo_col1$," +
          "col2:dynamo_col2#,col3:dynamo_col3#,col4:dynamo_col4#,col5:dynamo_col5#," +
          "col6:dynamo_col6#,col7:dynamo_col7#,hashKey:hashKey");
  table.setParameters(parameters);
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = Lists.newArrayList();
  cols.add(new FieldSchema("col1", "map<string,bigint>", ""));
  cols.add(new FieldSchema("col2", "array<map<string,bigint>>", ""));
  cols.add(new FieldSchema("col3", "array<map<string,double>>", ""));
  cols.add(new FieldSchema("col4", "array<map<string,string>>", ""));
  cols.add(new FieldSchema("col5", "array<bigint>", ""));
  cols.add(new FieldSchema("col6", "array<double>", ""));
  cols.add(new FieldSchema("col7", "array<string>", ""));
  cols.add(new FieldSchema("hashKey", "string", ""));
  sd.setCols(cols);
  table.setSd(sd);
  // This check is expected to pass for the given input
  storageHandler.checkTableSchemaType(description, table);
}

Source File: HiveConnectorTableService.java From metacat with Apache License 2.0

6 votes

private HiveStorageFormat extractHiveStorageFormat(final Table table) throws MetaException {
    final StorageDescriptor descriptor = table.getSd();
    if (descriptor == null) {
        throw new MetaException("Table is missing storage descriptor");
    }
    final SerDeInfo serdeInfo = descriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new MetaException(
            "Table storage descriptor is missing SerDe info");
    }
    final String outputFormat = descriptor.getOutputFormat();
    final String serializationLib = serdeInfo.getSerializationLib();

    for (HiveStorageFormat format : HiveStorageFormat.values()) {
        if (format.getOutputFormat().equals(outputFormat) && format.getSerde().equals(serializationLib)) {
            return format;
        }
    }
    throw new MetaException(
        String.format("Output format %s with SerDe %s is not supported", outputFormat, serializationLib));
}

Source File: DestructiveReplicaTest.java From circus-train with Apache License 2.0

5 votes

private Partition newPartition(String partitionValue, Path location1) {
  Partition partition = new Partition();
  partition.setValues(Lists.newArrayList(partitionValue));
  StorageDescriptor sd1 = new StorageDescriptor();
  sd1.setLocation(location1.toString());
  partition.setSd(sd1);
  Map<String, String> parameters = new HashMap<>();
  parameters.put(REPLICATION_EVENT.parameterName(), EVENT_ID);
  partition.setParameters(parameters);
  return partition;
}

Source File: ComparisonToolIntegrationTest.java From circus-train with Apache License 2.0

5 votes

private Partition newPartition(
    String table,
    StorageDescriptor tableStorageDescriptor,
    List<String> values,
    File location) {
  Partition partition = new Partition();
  partition.setDbName(DATABASE);
  partition.setTableName(table);
  partition.setValues(values);
  partition.setSd(new StorageDescriptor(tableStorageDescriptor));
  partition.getSd().setLocation(location.toURI().toString());
  return partition;
}

Source File: HiveTableMetaStoreFactory.java From flink with Apache License 2.0

5 votes

private void alterPartition(LinkedHashMap<String, String> partitionSpec, Path partitionPath,
		Partition currentPartition) throws Exception {
	StorageDescriptor partSD = currentPartition.getSd();
	// the following logic copied from Hive::alterPartitionSpecInMemory
	partSD.setOutputFormat(sd.getOutputFormat());
	partSD.setInputFormat(sd.getInputFormat());
	partSD.getSerdeInfo().setSerializationLib(sd.getSerdeInfo().getSerializationLib());
	partSD.getSerdeInfo().setParameters(sd.getSerdeInfo().getParameters());
	partSD.setBucketCols(sd.getBucketCols());
	partSD.setNumBuckets(sd.getNumBuckets());
	partSD.setSortCols(sd.getSortCols());
	partSD.setLocation(partitionPath.toString());
	client.alter_partition(database, tableName, currentPartition);
}

Source File: HdfsSnapshotLocationManagerTest.java From circus-train with Apache License 2.0

5 votes

@Test
public void calculateSubPathsUriEncodedPathAndPartition() {
  StorageDescriptor sd = new StorageDescriptor();
  sd.setLocation("hdfs://sandboxcluster/a%25b/partition1=url%25encoded.%3A");
  partition1.setSd(sd);

  List<Path> subPaths = HdfsSnapshotLocationManager.calculateSubPaths(Collections.singletonList(partition1),
      "hdfs://sandboxcluster/a%25b", "/b%25c");
  assertThat(subPaths.get(0).toString(), is("/b%25c/partition1=url%25encoded.%3A"));
}

Source File: HiveConnectorFastPartitionService.java From metacat with Apache License 2.0

5 votes

private void copyTableSdToPartitionInfoSd(final PartitionInfo partitionInfo, final Table table) {
    StorageInfo sd = partitionInfo.getSerde();
    //
    // Partitions can be provided in the request without the storage information.
    //
    if (sd == null) {
        sd = new StorageInfo();
        partitionInfo.setSerde(sd);
    }
    final StorageDescriptor tableSd = table.getSd();

    if (StringUtils.isBlank(sd.getInputFormat())) {
        sd.setInputFormat(tableSd.getInputFormat());
    }
    if (StringUtils.isBlank(sd.getOutputFormat())) {
        sd.setOutputFormat(tableSd.getOutputFormat());
    }
    if (sd.getParameters() == null || sd.getParameters().isEmpty()) {
        sd.setParameters(tableSd.getParameters());
    }
    final SerDeInfo tableSerde = tableSd.getSerdeInfo();
    if (tableSerde != null) {
        if (StringUtils.isBlank(sd.getSerializationLib())) {
            sd.setSerializationLib(tableSerde.getSerializationLib());
        }
        if (sd.getSerdeInfoParameters() == null || sd.getSerdeInfoParameters().isEmpty()) {
            sd.setSerdeInfoParameters(tableSerde.getParameters());
        }
    }
}

Source File: DatePatternUpdateProviderTest.java From incubator-gobblin with Apache License 2.0

5 votes

public static Partition createMockPartitionWithLocation(String location) {
  Partition mockPartition = Mockito.mock(Partition.class, Mockito.RETURNS_SMART_NULLS);
  org.apache.hadoop.hive.metastore.api.Partition mockTPartition =
      Mockito.mock(org.apache.hadoop.hive.metastore.api.Partition.class, Mockito.RETURNS_SMART_NULLS);
  StorageDescriptor mockSd = Mockito.mock(StorageDescriptor.class, Mockito.RETURNS_SMART_NULLS);
  Mockito.when(mockSd.getLocation()).thenReturn(location);
  Mockito.when(mockTPartition.getSd()).thenReturn(mockSd);
  Mockito.when(mockPartition.getTPartition()).thenReturn(mockTPartition);
  return mockPartition;
}

Source File: MetastoreAuthzBinding.java From incubator-sentry with Apache License 2.0

5 votes

private String getSdLocation(StorageDescriptor sd) {
  if (sd == null) {
    return "";
  } else {
    return sd.getLocation();
  }
}

org.apache.hadoop.hive.metastore.api.StorageDescriptor Java Examples