Java Code Examples for org.apache.hadoop.hive.metastore.api.Partition#putToParameters()
The following examples show how to use
org.apache.hadoop.hive.metastore.api.Partition#putToParameters() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroSerDeTransformation.java From circus-train with Apache License 2.0 | 6 votes |
Partition apply(Partition partition, String avroSchemaDestination, String eventId) throws Exception { if (avroSchemaDestination == null) { return partition; } avroSchemaDestination = addTrailingSlash(avroSchemaDestination); avroSchemaDestination += eventId; String avroSchemaSource = partition.getParameters().get(AVRO_SCHEMA_URL_PARAMETER); copy(avroSchemaSource, avroSchemaDestination); partition.putToParameters(AVRO_SCHEMA_URL_PARAMETER, avroSchemaDestination + "/" + getAvroSchemaFileName(avroSchemaSource)); LOG.info("Avro SerDe transformation has been applied to partition '{}'", partition.toString()); return partition; }
Example 2
Source File: HivePartitionManager.java From data-highway with Apache License 2.0 | 5 votes |
private Partition newHivePartition( String tableName, List<String> partitionValues, String location, Map<String, String> parameters) { Partition partition = new Partition(); partition.setDbName(databaseName); partition.setTableName(tableName); partition.setValues(partitionValues); parameters.forEach((key, value) -> partition.putToParameters(key, value)); partition.putToParameters(DATA_HIGHWAY_VERSION, DataHighwayVersion.VERSION); partition.putToParameters(DATA_HIGHWAY_LAST_REVISION, ISO_OFFSET_DATE_TIME.withZone(UTC).format(clock.instant())); partition.setSd(AvroStorageDescriptorFactory.create(location)); return partition; }
Example 3
Source File: AddCheckSumReplicaTableFactory.java From circus-train with Apache License 2.0 | 5 votes |
@Override Partition newReplicaPartition( String eventId, Table sourceTable, Partition sourcePartition, String replicaDatabaseName, String replicaTableName, Path replicaPartitionLocation, ReplicationMode replicationMode) { Partition replica = super.newReplicaPartition(eventId, sourceTable, sourcePartition, replicaDatabaseName, replicaTableName, replicaPartitionLocation, replicationMode); String checksum = checksumFunction.apply(locationAsPath(sourcePartition)); replica.putToParameters(PARTITION_CHECKSUM.parameterName(), checksum); return replica; }
Example 4
Source File: ReplicaTableFactory.java From circus-train with Apache License 2.0 | 5 votes |
Partition newReplicaPartition( String eventId, Table sourceTable, Partition sourcePartition, String replicaDatabaseName, String replicaTableName, Path replicaPartitionLocation, ReplicationMode replicationMode) { Partition replica = partitionTransformation.transform(new Partition(sourcePartition)); replica.setDbName(replicaDatabaseName); replica.setTableName(replicaTableName); if (replica.getSd() != null) { replica.getSd().setLocation(toStringOrNull(replicaPartitionLocation)); } String sourcePartitionLocation = sourcePartition.getSd() == null ? "" : toStringOrEmpty(sourcePartition.getSd().getLocation()); // Statistic specific parameters replica.putToParameters(STATS_GENERATED_VIA_STATS_TASK, Boolean.TRUE.toString()); replica.putToParameters(STATS_GENERATED, Boolean.TRUE.toString()); replica.putToParameters(DO_NOT_UPDATE_STATS, Boolean.TRUE.toString()); // Replication specific parameters replica.putToParameters(LAST_REPLICATED.parameterName(), DateTime.now(DateTimeZone.UTC).toString()); replica.putToParameters(REPLICATION_EVENT.parameterName(), eventId); replica.putToParameters(SOURCE_LOCATION.parameterName(), sourcePartitionLocation); replica.putToParameters(SOURCE_TABLE.parameterName(), Warehouse.getQualifiedName(sourceTable)); replica.putToParameters(SOURCE_METASTORE.parameterName(), sourceMetaStoreUris); replica.putToParameters(REPLICATION_MODE.parameterName(), replicationMode.name()); return replica; }
Example 5
Source File: HiveDifferencesIntegrationTest.java From circus-train with Apache License 2.0 | 5 votes |
@Test public void replicaPartitionHasChangedButIgnorableParamter() throws Exception { Partition replicaPartition1 = catalog.client().getPartition(DATABASE, REPLICA_TABLE, "part=1"); replicaPartition1.putToParameters("DO_NOT_UPDATE_STATS", "true"); replicaPartition1.putToParameters("STATS_GENERATED_VIA_STATS_TASK", "true"); replicaPartition1.putToParameters("STATS_GENERATED", "true"); catalog.client().alter_partition(DATABASE, REPLICA_TABLE, replicaPartition1); Table sourceTable = catalog.client().getTable(DATABASE, SOURCE_TABLE); Table replicaTable = catalog.client().getTable(DATABASE, REPLICA_TABLE); replicaPartition1.putToParameters("DO_NOT_UPDATE_STATS", "true"); replicaPartition1.putToParameters("STATS_GENERATED_VIA_STATS_TASK", "true"); replicaPartition1.putToParameters("STATS_GENERATED", "true"); catalog.client().alter_table(DATABASE, REPLICA_TABLE, replicaTable); HiveDifferences .builder(diffListener) .comparatorRegistry(comparatorRegistry) .source(configuration, sourceTable, new PartitionIterator(catalog.client(), sourceTable, PARTITION_BATCH_SIZE)) .replica(Optional.of(replicaTable), Optional.of(new BufferedPartitionFetcher(catalog.client(), replicaTable, PARTITION_BATCH_SIZE))) .checksumFunction(checksumFunction) .build() .run(); verify(diffListener, never()).onChangedTable(anyList()); verify(diffListener, never()).onNewPartition(anyString(), any(Partition.class)); verify(diffListener, never()).onChangedPartition(anyString(), any(Partition.class), anyList()); verify(diffListener, never()).onDataChanged(anyString(), any(Partition.class)); }
Example 6
Source File: CircusTrainHdfsHdfsIntegrationTest.java From circus-train with Apache License 2.0 | 4 votes |
@Test public void partitionedTableMetadataMirror() throws Exception { helper.createManagedPartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE)); LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE)); // adjusting the sourceTable, mimicking the change we want to update Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE); sourceTable.putToParameters("paramToUpdate", "updated"); sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable); Partition partition = sourceCatalog .client() .getPartition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, "continent=Asia/country=China"); partition.putToParameters("partition_paramToUpdate", "updated"); sourceCatalog.client().alter_partition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, partition); exit.expectSystemExitWithStatus(0); File config = dataFolder.getFile("partitioned-single-table-mirror.yml"); CircusTrainRunner runner = CircusTrainRunner .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation) .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(), sourceCatalog.driverClassName()) .replicaMetaStore(replicaCatalog.getThriftConnectionUri()) .build(); exit.checkAssertionAfterwards(new Assertion() { @Override public void checkAssertion() throws Exception { Table hiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE); assertThat(hiveTable.getDbName(), is(DATABASE)); assertThat(hiveTable.getTableName(), is(TARGET_PARTITIONED_MANAGED_TABLE)); // MIRRORED table should be set to EXTERNAL assertThat(isExternalTable(hiveTable), is(true)); assertThat(hiveTable.getSd().getCols(), is(DATA_COLUMNS)); assertThat(hiveTable.getParameters().get("paramToUpdate"), is("updated")); File sameAsSourceLocation = new File(sourceWarehouseUri, DATABASE + "/" + SOURCE_MANAGED_PARTITIONED_TABLE); assertThat(hiveTable.getSd().getLocation() + "/", is(sameAsSourceLocation.toURI().toString())); List<Partition> listPartitions = replicaCatalog .client() .listPartitions(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, (short) 50); assertThat(listPartitions.size(), is(2)); assertThat(listPartitions.get(0).getSd().getLocation(), is(sameAsSourceLocation.toURI().toString() + "continent=Asia/country=China")); assertThat(listPartitions.get(0).getParameters().get("partition_paramToUpdate"), is("updated")); assertThat(listPartitions.get(1).getSd().getLocation(), is(sameAsSourceLocation.toURI().toString() + "continent=Europe/country=UK")); } }); runner.run(config.getAbsolutePath()); }
Example 7
Source File: CircusTrainHdfsHdfsIntegrationTest.java From circus-train with Apache License 2.0 | 4 votes |
@Test public void partitionedTableMetadataUpdate() throws Exception { helper.createManagedPartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE)); LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE)); // creating replicaTable final URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE); TestUtils .createPartitionedTable(replicaCatalog.client(), DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, replicaLocation); Table table = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE); table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID"); URI partitionAsia = URI.create(replicaLocation + "/dummyEventID/continent=Asia"); final URI partitionChina = URI.create(partitionAsia + "/country=China"); replicaCatalog .client() .add_partitions(Arrays.asList(newTablePartition(table, Arrays.asList("Asia", "China"), partitionChina))); replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table); // adjusting the sourceTable, mimicking the change we want to update Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE); sourceTable.putToParameters("paramToUpdate", "updated"); sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable); Partition partition = sourceCatalog .client() .getPartition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, "continent=Asia/country=China"); partition.putToParameters("partition_paramToUpdate", "updated"); sourceCatalog.client().alter_partition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, partition); exit.expectSystemExitWithStatus(0); File config = dataFolder.getFile("partitioned-single-table-metadata-update.yml"); CircusTrainRunner runner = CircusTrainRunner .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation) .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(), sourceCatalog.driverClassName()) .replicaMetaStore(replicaCatalog.getThriftConnectionUri()) .build(); exit.checkAssertionAfterwards(new Assertion() { @Override public void checkAssertion() throws Exception { Table hiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE); assertThat(hiveTable.getDbName(), is(DATABASE)); assertThat(hiveTable.getTableName(), is(TARGET_PARTITIONED_MANAGED_TABLE)); // dummyEventID should be overridden assertThat(hiveTable.getParameters().get(REPLICATION_EVENT.parameterName()), startsWith("ctp-")); assertThat(hiveTable.getParameters().get("paramToUpdate"), is("updated")); assertThat(isExternalTable(hiveTable), is(true)); assertThat(hiveTable.getSd().getCols(), is(DATA_COLUMNS)); assertThat(hiveTable.getSd().getLocation(), is(replicaLocation.toString())); List<Partition> listPartitions = replicaCatalog .client() .listPartitions(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, (short) 50); assertThat(listPartitions.size(), is(1)); // Only previously replicated partitions are updated, no NEW partitions are created assertThat(listPartitions.get(0).getSd().getLocation(), is(partitionChina.toString())); assertThat(listPartitions.get(0).getParameters().get("partition_paramToUpdate"), is("updated")); } }); runner.run(config.getAbsolutePath()); }
Example 8
Source File: CircusTrainHdfsHdfsIntegrationTest.java From circus-train with Apache License 2.0 | 4 votes |
@Test public void partitionedTableMetadataUpdateAvroSchema() throws Exception { helper.createManagedPartitionedTable(toUri(sourceWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE)); LOG.info(">>>> Table {} ", sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE)); java.nio.file.Path sourceAvroSchemaPath = Paths.get(sourceWarehouseUri.toString() + "/avro-schema-file.test"); Files.write(sourceAvroSchemaPath, AVRO_SCHEMA_CONTENT.getBytes()); String avroSchemaUrl = sourceAvroSchemaPath.toString(); URI replicaLocation = toUri(replicaWarehouseUri, DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE); TestUtils .createPartitionedTable(replicaCatalog.client(), DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, replicaLocation); Table table = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE); table.putToParameters(REPLICATION_EVENT.parameterName(), "dummyEventID"); URI partitionAsia = URI.create(replicaLocation + "/dummyEventID/continent=Asia"); URI partitionChina = URI.create(partitionAsia + "/country=China"); replicaCatalog .client() .add_partitions(Arrays.asList(newTablePartition(table, Arrays.asList("Asia", "China"), partitionChina))); replicaCatalog.client().alter_table(table.getDbName(), table.getTableName(), table); Table sourceTable = sourceCatalog.client().getTable(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE); sourceTable.putToParameters("avro.schema.url", avroSchemaUrl); sourceCatalog.client().alter_table(sourceTable.getDbName(), sourceTable.getTableName(), sourceTable); Partition partition = sourceCatalog .client() .getPartition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, "continent=Asia/country=China"); partition.putToParameters("avro.schema.url", avroSchemaUrl); sourceCatalog.client().alter_partition(DATABASE, SOURCE_MANAGED_PARTITIONED_TABLE, partition); exit.expectSystemExitWithStatus(0); File config = dataFolder.getFile("partitioned-single-table-avro-schema-metadata-update.yml"); CircusTrainRunner runner = CircusTrainRunner .builder(DATABASE, sourceWarehouseUri, replicaWarehouseUri, housekeepingDbLocation) .sourceMetaStore(sourceCatalog.getThriftConnectionUri(), sourceCatalog.connectionURL(), sourceCatalog.driverClassName()) .replicaMetaStore(replicaCatalog.getThriftConnectionUri()) .build(); exit.checkAssertionAfterwards(new Assertion() { @Override public void checkAssertion() throws Exception { Table replicaHiveTable = replicaCatalog.client().getTable(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE); String expectedReplicaSchemaUrl = replicaWarehouseUri.toURI().toString() + "ct_database/"; String transformedAvroUrl = replicaHiveTable.getParameters().get("avro.schema.url"); assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl)); List<Partition> listPartitions = replicaCatalog .client() .listPartitions(DATABASE, TARGET_PARTITIONED_MANAGED_TABLE, (short) 50); transformedAvroUrl = listPartitions.get(0).getParameters().get("avro.schema.url"); assertThat(transformedAvroUrl, startsWith(expectedReplicaSchemaUrl)); } }); runner.run(config.getAbsolutePath()); }