org.apache.hadoop.hive.ql.metadata.Partition Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.metadata.Partition.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RegistrationTimeSkipPredicateTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
public HivePartitionFileSet createPartitionCopy(Path location, long registrationGenerationTime, boolean targetPartitionExists) { HivePartitionFileSet partitionCopy = Mockito.mock(HivePartitionFileSet.class); Partition partition = Mockito.mock(Partition.class); Mockito.doReturn(location).when(partition).getDataLocation(); Mockito.doReturn(partition).when(partitionCopy).getPartition(); if (targetPartitionExists) { Partition targetPartition = Mockito.mock(Partition.class); Map<String, String> parameters = Maps.newHashMap(); parameters.put(HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS, Long.toString(registrationGenerationTime)); Mockito.doReturn(parameters).when(targetPartition).getParameters(); Mockito.doReturn(Optional.of(targetPartition)).when(partitionCopy).getExistingTargetPartition(); } else { Mockito.doReturn(Optional.absent()).when(partitionCopy).getExistingTargetPartition(); } return partitionCopy; }
Example #2
Source File: HiveSource.java From incubator-gobblin with Apache License 2.0 | 6 votes |
@VisibleForTesting public static long getCreateTime(Partition partition) { // If create time is set, use it. // .. this is always set if HiveJDBC or Hive mestastore is used to create partition. // .. it might not be set (ie. equals 0) if Thrift API call is used to create partition. if (partition.getTPartition().getCreateTime() > 0) { return TimeUnit.MILLISECONDS.convert(partition.getTPartition().getCreateTime(), TimeUnit.SECONDS); } // Try to use distcp-ng registration generation time if it is available else if (partition.getTPartition().isSetParameters() && partition.getTPartition().getParameters().containsKey(DISTCP_REGISTRATION_GENERATION_TIME_KEY)) { log.debug("Did not find createTime in Hive partition, used distcp registration generation time."); return Long.parseLong(partition.getTPartition().getParameters().get(DISTCP_REGISTRATION_GENERATION_TIME_KEY)); } else { log.warn(String.format("Could not find create time for partition %s. Will return createTime as 0", partition.getCompleteName())); return 0; } }
Example #3
Source File: DatePartitionedHiveVersionFinderTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
@Test public void testUserDefinedDatePattern() throws Exception { String tableName = "VfTb2"; Config conf = ConfigFactory.parseMap(ImmutableMap.<String, String> of(DatePartitionHiveVersionFinder.PARTITION_KEY_NAME_KEY, "field1", DatePartitionHiveVersionFinder.PARTITION_VALUE_DATE_TIME_PATTERN_KEY, "yyyy/MM/dd/HH")); DatePartitionHiveVersionFinder versionFinder = new DatePartitionHiveVersionFinder(this.fs, conf); Table tbl = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, ImmutableList.of("field1")); org.apache.hadoop.hive.metastore.api.Partition tp = this.hiveMetastoreTestUtils.addTestPartition(tbl, ImmutableList.of("2016/01/01/20"), (int) System.currentTimeMillis()); Partition partition = new Partition(new org.apache.hadoop.hive.ql.metadata.Table(tbl), tp); Assert.assertEquals(URLDecoder.decode(partition.getName(), "UTF-8"), "field1=2016/01/01/20"); TimestampedHiveDatasetVersion dv = versionFinder.getDatasetVersion(partition); Assert.assertEquals(dv.getDateTime(), formatter.parseDateTime("2016/01/01/20")); }
Example #4
Source File: DatePartitionHiveVersionFinder.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Create a {@link TimestampedHiveDatasetVersion} from a {@link Partition}. The hive table is expected * to be date partitioned by {@link #partitionKeyName}. The partition value format must be {@link #pattern} * * @throws IllegalArgumentException when {@link #partitionKeyName} is not found in the <code></code> * @throws IllegalArgumentException when a value can not be found for {@link #partitionKeyName} in the <code>partition</code> * @throws IllegalArgumentException if the partition value can not be parsed with {@link #pattern} * {@inheritDoc} */ @Override protected TimestampedHiveDatasetVersion getDatasetVersion(Partition partition) { int index = Iterables.indexOf(partition.getTable().getPartitionKeys(), this.partitionKeyNamePredicate); if (index == -1) { throw new IllegalArgumentException(String .format("Failed to find partition key %s in the table %s", this.partitionKeyName, partition.getTable().getCompleteName())); } if (index >= partition.getValues().size()) { throw new IllegalArgumentException(String .format("Failed to find partition value for key %s in the partition %s", this.partitionKeyName, partition.getName())); } return new TimestampedHiveDatasetVersion( this.formatter.parseDateTime(partition.getValues().get(index).trim().substring(0, this.pattern.length())), partition); }
Example #5
Source File: HivePartitionVersionFinder.java From incubator-gobblin with Apache License 2.0 | 6 votes |
private void setVersions(final String name, final State state) throws IOException { try { UserGroupInformation loginUser = UserGroupInformation.getLoginUser(); loginUser.doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws IOException { synchronized (lock) { List<Partition> partitions = null; for (String tableName : ComplianceRetentionJob.tableNamesList) { for (String pattern : patterns) { if (tableName.contains(pattern)) { partitions = getPartitions(tableName); addPartitionsToVersions(versions, name, partitions); } } } } return null; } }); } catch (InterruptedException | IOException e) { throw new IOException(e); } }
Example #6
Source File: HivePartitionVersionFinder.java From incubator-gobblin with Apache License 2.0 | 6 votes |
private static List<Partition> getPartitions(String completeTableName) { List<String> tableList = At_SPLITTER.splitToList(completeTableName); if (tableList.size() != 2) { log.warn("Invalid table name " + completeTableName); return Collections.EMPTY_LIST; } try (AutoReturnableObject<IMetaStoreClient> client = ComplianceRetentionJob.pool.getClient()) { Table table = client.get().getTable(tableList.get(0), tableList.get(1)); HiveDataset dataset = new HiveDataset(FileSystem.newInstance(new Configuration()), ComplianceRetentionJob.pool, new org.apache.hadoop.hive.ql.metadata.Table(table), new Properties()); return dataset.getPartitionsFromDataset(); } catch (IOException | TException e) { log.warn("Unable to get Partitions for table " + completeTableName + " " + e.getMessage()); } return Collections.EMPTY_LIST; }
Example #7
Source File: HdfsModifiedTimeHiveVersionFinder.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Create a {@link TimestampedHiveDatasetVersion} from a {@link Partition} based on the Modified time of underlying * hdfs data location * @throws IllegalArgumentException when argument is null * @throws IllegalArgumentException when data location of partition is null * @throws IllegalArgumentException when data location of partition doesn't exist * {@inheritDoc} */ @Override protected TimestampedHiveDatasetVersion getDatasetVersion(Partition partition) { try { Preconditions.checkArgument(partition != null, "Argument to method "); Path dataLocation = partition.getDataLocation(); Preconditions .checkArgument(dataLocation != null, "Data location is null for partition " + partition.getCompleteName()); boolean exists = this.fs.exists(dataLocation); Preconditions.checkArgument(exists, "Data location doesn't exist for partition " + partition.getCompleteName()); long modificationTS = this.fs.getFileStatus(dataLocation).getModificationTime(); return new TimestampedHiveDatasetVersion(new DateTime(modificationTS), partition); } catch (IOException e) { throw new RuntimeException(e); } }
Example #8
Source File: HiveAvroCopyEntityHelper.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * @param entity, name of the entity to be changed, e.g. hive table or partition * @param sd, StorageDescriptor of the entity */ public static void updateAvroSchemaURL(String entity, StorageDescriptor sd, HiveCopyEntityHelper hiveHelper) { String oldAvroSchemaURL = sd.getSerdeInfo().getParameters().get(HIVE_TABLE_AVRO_SCHEMA_URL); if (oldAvroSchemaURL != null) { Path oldAvroSchemaPath = new Path(oldAvroSchemaURL); URI sourceFileSystemURI = hiveHelper.getDataset().getFs().getUri(); if (PathUtils.isAbsoluteAndSchemeAuthorityNull(oldAvroSchemaPath) || (oldAvroSchemaPath.toUri().getScheme().equals(sourceFileSystemURI.getScheme()) && oldAvroSchemaPath.toUri().getAuthority().equals(sourceFileSystemURI.getAuthority()))) { String newAvroSchemaURL = hiveHelper.getTargetPathHelper().getTargetPath(oldAvroSchemaPath, hiveHelper.getTargetFileSystem(), Optional.<Partition>absent(), true).toString(); sd.getSerdeInfo().getParameters().put(HIVE_TABLE_AVRO_SCHEMA_URL, newAvroSchemaURL); log.info(String.format("For entity %s, change %s from %s to %s", entity, HIVE_TABLE_AVRO_SCHEMA_URL, oldAvroSchemaURL, newAvroSchemaURL)); } } }
Example #9
Source File: PartitionLevelWatermarkerTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
@Test public void testDroppedPartitions() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn"); previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus .setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015-01", 100l, "2015-02", 101l))); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state); Table table = mockTable("test_dataset_urn"); Mockito.when(table.getPartitionKeys()).thenReturn(ImmutableList.of(new FieldSchema("year", "string", ""))); Partition partition2015 = mockPartition(table, ImmutableList.of("2015")); // partition 2015 replaces 2015-01 and 2015-02 Mockito.when(partition2015.getParameters()).thenReturn( ImmutableMap.of(AbstractAvroToOrcConverter.REPLACED_PARTITIONS_HIVE_METASTORE_KEY, "2015-01|2015-02")); watermarker.onPartitionProcessBegin(partition2015, 0l, 0l); Assert.assertEquals(watermarker.getExpectedHighWatermarks().get("db@test_dataset_urn"), ImmutableMap.of("2015", 0l)); }
Example #10
Source File: AbstractAvroToOrcConverter.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Parse the {@link #REPLACED_PARTITIONS_HIVE_METASTORE_KEY} from partition parameters to returns DDLs for all the partitions to be * dropped. * * @return A {@link List} of partitions to be dropped. Each element of the list is a {@link Map} which maps a partition's * key and value. * */ public static List<Map<String, String>> getDropPartitionsDDLInfo(Partition hivePartition) { List<Map<String, String>> replacedPartitionsDDLInfo = Lists.newArrayList(); List<FieldSchema> partitionKeys = hivePartition.getTable().getPartitionKeys(); if (StringUtils.isNotBlank(hivePartition.getParameters().get(REPLACED_PARTITIONS_HIVE_METASTORE_KEY))) { // Partitions are separated by "|" for (String partitionsInfoString : Splitter.on("|").omitEmptyStrings().split(hivePartition.getParameters().get(REPLACED_PARTITIONS_HIVE_METASTORE_KEY))) { // Values for a partition are separated by "," List<String> partitionValues = Splitter.on(",").omitEmptyStrings().trimResults().splitToList(partitionsInfoString); // Do not drop the partition being processed. Sometimes a partition may have replaced another partition of the same values. if (!partitionValues.equals(hivePartition.getValues())) { ImmutableMap.Builder<String, String> partitionDDLInfoMap = ImmutableMap.builder(); for (int i = 0; i < partitionKeys.size(); i++) { partitionDDLInfoMap.put(partitionKeys.get(i).getName(), partitionValues.get(i)); } replacedPartitionsDDLInfo.add(partitionDDLInfoMap.build()); } } } return replacedPartitionsDDLInfo; }
Example #11
Source File: TestSentryHiveAuthorizationTaskFactory.java From incubator-sentry with Apache License 2.0 | 6 votes |
@Before public void setup() throws Exception { conf = new HiveConf(); baseDir = Files.createTempDir(); baseDir.setWritable(true, false); conf.setVar(HiveConf.ConfVars.SCRATCHDIR, baseDir.getAbsolutePath()); SessionState.start(conf); conf.setVar(ConfVars.HIVE_AUTHORIZATION_TASK_FACTORY, SentryHiveAuthorizationTaskFactoryImpl.class.getName()); db = Mockito.mock(Hive.class); table = new Table(DB, TABLE); partition = new Partition(table); context = new Context(conf); parseDriver = new ParseDriver(); analyzer = new DDLSemanticAnalyzer(conf, db); SessionState.start(conf); Mockito.when(db.getTable(TABLE, false)).thenReturn(table); Mockito.when(db.getPartition(table, new HashMap<String, String>(), false)) .thenReturn(partition); HadoopDefaultAuthenticator auth = new HadoopDefaultAuthenticator(); auth.setConf(conf); currentUser = auth.getUserName(); }
Example #12
Source File: HivePartitionFinder.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Will find all datasets according to whitelist, except the backup, trash and staging tables. */ @Override public List<HivePartitionDataset> findDatasets() throws IOException { List<HivePartitionDataset> list = new ArrayList<>(); for (HiveDataset hiveDataset : this.hiveDatasets) { for (Partition partition : hiveDataset.getPartitionsFromDataset()) { list.add(new HivePartitionDataset(partition)); } } String selectionPolicyString = this.state.getProp(ComplianceConfigurationKeys.DATASET_SELECTION_POLICY_CLASS, ComplianceConfigurationKeys.DEFAULT_DATASET_SELECTION_POLICY_CLASS); Policy<HivePartitionDataset> selectionPolicy = GobblinConstructorUtils.invokeConstructor(Policy.class, selectionPolicyString); return selectionPolicy.selectedList(list); }
Example #13
Source File: PartitionLevelWatermarkerTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
@Test public void testGetPreviousHighWatermarkForPartition() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn"); previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l))); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state); Table table = mockTable("test_dataset_urn"); Partition partition2015 = mockPartition(table, ImmutableList.of("2015")); Partition partition2016 = mockPartition(table, ImmutableList.of("2016")); Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2015), new LongWatermark(100l)); Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2016), new LongWatermark(101l)); }
Example #14
Source File: HiveDataset.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * This method returns a sorted list of partitions. */ public List<Partition> getPartitionsFromDataset() throws IOException{ try (AutoReturnableObject<IMetaStoreClient> client = getClientPool().getClient()) { List<Partition> partitions = HiveUtils.getPartitions(client.get(), getTable(), Optional.<String>absent()); return sortPartitions(partitions); } }
Example #15
Source File: HivePartitionVersionFinder.java From incubator-gobblin with Apache License 2.0 | 5 votes |
private void addPartitionsToVersions(List<HivePartitionVersion> versions, String name, List<Partition> partitions) throws IOException { for (Partition partition : partitions) { if (partition.getName().equalsIgnoreCase(name)) { versions.add(new HivePartitionRetentionVersion(partition)); } } }
Example #16
Source File: HiveAvroCopyEntityHelper.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * Currently updated the {@link #HIVE_TABLE_AVRO_SCHEMA_URL} location for new hive partitions * @param targetTable, new Table to be registered in hive * @param sourcePartitions, source partitions * @throws IOException */ public static void updatePartitionAttributesIfAvro(Table targetTable, Map<List<String>, Partition> sourcePartitions, HiveCopyEntityHelper hiveHelper) throws IOException { if (isHiveTableAvroType(targetTable)) { for (Map.Entry<List<String>, Partition> partition : sourcePartitions.entrySet()) { updateAvroSchemaURL(partition.getValue().getCompleteName(), partition.getValue().getTPartition().getSd(), hiveHelper); } } }
Example #17
Source File: HiveTargetPathHelperTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Test public void testRelocateFilesPartitioned() { Properties properties = new Properties(); properties.setProperty(HiveTargetPathHelper.RELOCATE_DATA_FILES_KEY, Boolean.toString(true)); properties.setProperty(HiveTargetPathHelper.COPY_TARGET_TABLE_ROOT, "/target"); HiveTargetPathHelper helper = createTestTargetPathHelper(properties); Path source = new Path(TABLE_ROOT, "partition/file1"); Partition partition = Mockito.mock(Partition.class); Mockito.when(partition.getValues()).thenReturn(Lists.newArrayList("part", "123")); Assert.assertEquals(helper.getTargetPath(source, this.fs, Optional.of(partition), true), new Path("/target/tableName/part/123/file1")); }
Example #18
Source File: ComplianceRetentionJob.java From incubator-gobblin with Apache License 2.0 | 5 votes |
public void initDatasetFinder(Properties properties) throws IOException { Preconditions.checkArgument(properties.containsKey(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS), "Missing required propety " + GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS); String finderClass = properties.getProperty(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS); this.finder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, finderClass, new State(properties)); Iterator<HiveDataset> datasetsIterator = new HiveDatasetFinder(FileSystem.newInstance(new Configuration()), properties).getDatasetsIterator(); while (datasetsIterator.hasNext()) { // Drop partitions from empty tables if property is set, otherwise skip the table HiveDataset hiveDataset = datasetsIterator.next(); List<Partition> partitionsFromDataset = hiveDataset.getPartitionsFromDataset(); String completeTableName = hiveDataset.getTable().getCompleteName(); if (!partitionsFromDataset.isEmpty()) { this.tableNamesList.add(completeTableName); continue; } if (!Boolean.parseBoolean(properties.getProperty(ComplianceConfigurationKeys.SHOULD_DROP_EMPTY_TABLES, ComplianceConfigurationKeys.DEFAULT_SHOULD_DROP_EMPTY_TABLES))) { continue; } if (completeTableName.contains(ComplianceConfigurationKeys.TRASH) || completeTableName .contains(ComplianceConfigurationKeys.BACKUP) || completeTableName .contains(ComplianceConfigurationKeys.STAGING)) { this.tablesToDrop.add(hiveDataset); } } }
Example #19
Source File: HiveMetaStoreBridgeTest.java From atlas with Apache License 2.0 | 5 votes |
@Test public void testImportWhenPartitionKeysAreNull() throws Exception { setupDB(hiveClient, TEST_DB_NAME); List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME); Table hiveTable = hiveTables.get(0); returnExistingDatabase(TEST_DB_NAME, atlasClientV2, METADATA_NAMESPACE); when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(), Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getTableQualifiedName(METADATA_NAMESPACE, TEST_DB_NAME, TEST_TABLE_NAME)))) .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo( getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))); String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(METADATA_NAMESPACE, hiveTable); when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_PROCESS.getName(), Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQualifiedName))) .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo( getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))); when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")) .thenReturn(createTableReference()); Partition partition = mock(Partition.class); when(partition.getTable()).thenReturn(hiveTable); List partitionValues = Arrays.asList(new String[]{}); when(partition.getValues()).thenReturn(partitionValues); when(hiveClient.getPartitions(hiveTable)).thenReturn(Arrays.asList(new Partition[]{partition})); HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(METADATA_NAMESPACE, hiveClient, atlasClientV2); try { bridge.importHiveMetadata(null, null, true); } catch (Exception e) { Assert.fail("Partition with null key caused import to fail with exception ", e); } }
Example #20
Source File: PartitionLevelWatermarker.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * Adds an expected high watermark for this {@link Partition}. Also removes any watermarks for partitions being replaced. * Replace partitions are read using partition parameter {@link AbstractAvroToOrcConverter#REPLACED_PARTITIONS_HIVE_METASTORE_KEY}. * Uses the <code>partitionUpdateTime</code> as the high watermark for this <code>partition</code> * * {@inheritDoc} * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#onPartitionProcessBegin(org.apache.hadoop.hive.ql.metadata.Partition, long, long) */ @Override public void onPartitionProcessBegin(Partition partition, long partitionProcessTime, long partitionUpdateTime) { Preconditions.checkNotNull(partition); Preconditions.checkNotNull(partition.getTable()); if (!this.expectedHighWatermarks.hasPartitionWatermarks(tableKey(partition.getTable()))) { throw new IllegalStateException(String.format( "onPartitionProcessBegin called before onTableProcessBegin for table: %s, partitions: %s", tableKey(partition.getTable()), partitionKey(partition))); } // Remove dropped partitions Collection<String> droppedPartitions = Collections2.transform(AbstractAvroToOrcConverter.getDropPartitionsDDLInfo(partition), new Function<Map<String, String>, String>() { @Override public String apply(Map<String, String> input) { return PARTITION_VALUES_JOINER.join(input.values()); } }); this.expectedHighWatermarks.removePartitionWatermarks(tableKey(partition.getTable()), droppedPartitions); this.expectedHighWatermarks.addPartitionWatermark(tableKey(partition.getTable()), partitionKey(partition), partitionUpdateTime); }
Example #21
Source File: PartitionLevelWatermarker.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * Return the previous high watermark if found in previous state. Else returns 0 * {@inheritDoc} * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#getPreviousHighWatermark(org.apache.hadoop.hive.ql.metadata.Partition) */ @Override public LongWatermark getPreviousHighWatermark(Partition partition) { if (this.previousWatermarks.hasPartitionWatermarks(tableKey(partition.getTable()))) { // If partition has a watermark return. if (this.previousWatermarks.get(tableKey(partition.getTable())).containsKey(partitionKey(partition))) { return new LongWatermark(this.previousWatermarks.getPartitionWatermark(tableKey(partition.getTable()), partitionKey(partition))); } } return new LongWatermark(0); }
Example #22
Source File: HiveTargetPathHelperTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Test public void testReplacePrefix() { Properties properties = new Properties(); properties.setProperty(HiveTargetPathHelper.COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED, "/table"); properties.setProperty(HiveTargetPathHelper.COPY_TARGET_TABLE_PREFIX_REPLACEMENT, "/replaced"); HiveTargetPathHelper helper = createTestTargetPathHelper(properties); Path source = new Path(TABLE_ROOT, "partition/file1"); Assert.assertEquals(helper.getTargetPath(source, this.fs, Optional.<Partition>absent(), true), new Path("/replaced/path/partition/file1")); }
Example #23
Source File: HiveUtils.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * @param client an {@link IMetaStoreClient} for the correct metastore. * @param table the {@link Table} for which we should get partitions. * @param filter an optional filter for partitions as would be used in Hive. Can only filter on String columns. * (e.g. "part = \"part1\"" or "date > \"2015\"". * @return a map of values to {@link Partition} for input {@link Table} filtered and non-nullified. */ public static Map<List<String>, Partition> getPartitionsMap(IMetaStoreClient client, Table table, Optional<String> filter, Optional<? extends HivePartitionExtendedFilter> hivePartitionExtendedFilterOptional) throws IOException { return Maps.uniqueIndex(getPartitions(client, table, filter, hivePartitionExtendedFilterOptional), new Function<Partition, List<String>>() { @Override public List<String> apply(@Nullable Partition partition) { if (partition == null) { return null; } return partition.getValues(); } }); }
Example #24
Source File: BackfillHiveSource.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Override public boolean shouldCreateWorkunit(Partition sourcePartition, LongWatermark lowWatermark) { // If a whitelist is provided only create workunits for those partitions if (!this.partitionsWhitelist.isEmpty()) { return this.partitionsWhitelist.contains(sourcePartition.getCompleteName()); } // If no whitelist is set, all partitions of a dataset are backfilled return true; }
Example #25
Source File: BackfillHiveSource.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Override public boolean isOlderThanLookback(Partition partition) { // If partition whitelist is provided, ignore lookback if (!this.partitionsWhitelist.isEmpty()) { return false; } else { return super.isOlderThanLookback(partition); } }
Example #26
Source File: DatePatternUpdateProviderTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
public static Partition createMockPartitionWithLocation(String location) { Partition mockPartition = Mockito.mock(Partition.class, Mockito.RETURNS_SMART_NULLS); org.apache.hadoop.hive.metastore.api.Partition mockTPartition = Mockito.mock(org.apache.hadoop.hive.metastore.api.Partition.class, Mockito.RETURNS_SMART_NULLS); StorageDescriptor mockSd = Mockito.mock(StorageDescriptor.class, Mockito.RETURNS_SMART_NULLS); Mockito.when(mockSd.getLocation()).thenReturn(location); Mockito.when(mockTPartition.getSd()).thenReturn(mockSd); Mockito.when(mockPartition.getTPartition()).thenReturn(mockTPartition); return mockPartition; }
Example #27
Source File: HiveSource.java From incubator-gobblin with Apache License 2.0 | 5 votes |
protected HiveWorkUnit workUnitForPartition(HiveDataset hiveDataset, Partition partition, boolean disableAvroCheck) throws IOException { HiveWorkUnit hiveWorkUnit = new HiveWorkUnit(hiveDataset, partition); if (disableAvroCheck || isAvro(hiveDataset.getTable())) { hiveWorkUnit.setTableSchemaUrl(this.avroSchemaManager.getSchemaUrl(hiveDataset.getTable())); hiveWorkUnit.setPartitionSchemaUrl(this.avroSchemaManager.getSchemaUrl(partition)); } return hiveWorkUnit; }
Example #28
Source File: HiveTargetPathHelperTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Test public void testReplicatePaths() { Properties properties = new Properties(); HiveTargetPathHelper helper = createTestTargetPathHelper(properties); Path source = new Path(TABLE_ROOT, "partition/file1"); Assert.assertEquals(helper.getTargetPath(source, this.fs, Optional.<Partition>absent(), true), new Path(TABLE_ROOT, "partition/file1")); }
Example #29
Source File: EventWorkunitUtils.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * Set SLA event metadata in the workunit. The publisher will use this metadta to publish sla events */ public static void setPartitionSlaEventMetadata(WorkUnit state, Table table, Partition partition, long updateTime, long lowWatermark, long beginGetWorkunitsTime) { state.setProp(SlaEventKeys.DATASET_URN_KEY, state.getProp(ConfigurationKeys.DATASET_URN_KEY)); state.setProp(SlaEventKeys.PARTITION_KEY, partition.getName()); state.setProp(SlaEventKeys.UPSTREAM_TS_IN_MILLI_SECS_KEY, String.valueOf(updateTime)); // Time when the workunit was created state.setProp(SlaEventKeys.ORIGIN_TS_IN_MILLI_SECS_KEY, System.currentTimeMillis()); state.setProp(EventConstants.WORK_UNIT_CREATE_TIME, state.getProp(SlaEventKeys.ORIGIN_TS_IN_MILLI_SECS_KEY)); state.setProp(SlaEventKeys.PREVIOUS_PUBLISH_TS_IN_MILLI_SECS_KEY, lowWatermark); state.setProp(EventConstants.BEGIN_GET_WORKUNITS_TIME, beginGetWorkunitsTime); state.setProp(EventConstants.SOURCE_DATA_LOCATION, partition.getDataLocation()); }
Example #30
Source File: HdfsBasedUpdateProvider.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * Get the update time of a {@link Partition} * * @return the update time if available, 0 otherwise * * {@inheritDoc} * @see HiveUnitUpdateProvider#getUpdateTime(org.apache.hadoop.hive.ql.metadata.Partition) */ @Override public long getUpdateTime(Partition partition) throws UpdateNotFoundException { try { return getUpdateTime(partition.getDataLocation()); } catch (IOException e) { throw new UpdateNotFoundException(String.format("Failed to get update time for %s", partition.getCompleteName()), e); } }