org.apache.hadoop.hive.metastore.Warehouse Java Examples
The following examples show how to use
org.apache.hadoop.hive.metastore.Warehouse.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ViewTransformation.java From circus-train with Apache License 2.0 | 6 votes |
@Override public Table transform(Table table) { if (!MetaStoreUtils.isView(table)) { return table; } LOG.info("Translating HQL of view {}.{}", table.getDbName(), table.getTableName()); String tableQualifiedName = Warehouse.getQualifiedName(table); String hql = hqlTranslator.translate(tableQualifiedName, table.getViewOriginalText()); String expandedHql = hqlTranslator.translate(tableQualifiedName, table.getViewExpandedText()); Table transformedView = new Table(table); transformedView.setViewOriginalText(hql); transformedView.setViewExpandedText(expandedHql); if (!replicaHiveConf.getBoolean(SKIP_TABLE_EXIST_CHECKS, false)) { LOG .info("Validating that tables used by the view {}.{} exist in the replica catalog", table.getDbName(), table.getTableName()); validateReferencedTables(transformedView); } return transformedView; }
Example #2
Source File: AWSCatalogMetastoreClient.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
@Override public org.apache.hadoop.hive.metastore.api.Partition getPartitionWithAuthInfo( String databaseName, String tableName, List<String> values, String userName, List<String> groupNames) throws MetaException, UnknownTableException, NoSuchObjectException, TException { // TODO move this into the service org.apache.hadoop.hive.metastore.api.Partition partition = getPartition(databaseName, tableName, values); org.apache.hadoop.hive.metastore.api.Table table = getTable(databaseName, tableName); if ("TRUE".equalsIgnoreCase(table.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))) { String partName = Warehouse.makePartName(table.getPartitionKeys(), values); HiveObjectRef obj = new HiveObjectRef(); obj.setObjectType(HiveObjectType.PARTITION); obj.setDbName(databaseName); obj.setObjectName(tableName); obj.setPartValues(values); org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet privilegeSet = this.get_privilege_set(obj, userName, groupNames); partition.setPrivileges(privilegeSet); } return partition; }
Example #3
Source File: HiveConnectorFastServiceConfig.java From metacat with Apache License 2.0 | 6 votes |
/** * create hive connector fast partition service. * * @param metacatHiveClient hive client * @param warehouse hive warehouse * @param hiveMetacatConverter metacat converter * @param connectorContext connector config * @param directSqlGetPartition service to get partitions * @param directSqlSavePartition service to save partitions * @param icebergTableHandler iceberg table handler * @return HiveConnectorPartitionService */ @Bean public HiveConnectorPartitionService partitionService( final IMetacatHiveClient metacatHiveClient, final Warehouse warehouse, final HiveConnectorInfoConverter hiveMetacatConverter, final ConnectorContext connectorContext, final DirectSqlGetPartition directSqlGetPartition, final DirectSqlSavePartition directSqlSavePartition, final IcebergTableHandler icebergTableHandler ) { return new HiveConnectorFastPartitionService( connectorContext, metacatHiveClient, warehouse, hiveMetacatConverter, directSqlGetPartition, directSqlSavePartition, icebergTableHandler ); }
Example #4
Source File: PartitionUtil.java From metacat with Apache License 2.0 | 6 votes |
/** * Retrieves the partition values from the partition name. This method also validates the partition keys to that * of the table. * * @param tableQName table name * @param table table * @param partName partition name * @return list of partition values */ public static List<String> getPartValuesFromPartName(final QualifiedName tableQName, final Table table, final String partName) { if (Strings.isNullOrEmpty(partName)) { throw new InvalidMetaException(tableQName, partName, null); } final LinkedHashMap<String, String> partSpec = new LinkedHashMap<>(); Warehouse.makeSpecFromName(partSpec, new Path(partName)); final List<String> values = new ArrayList<>(); for (FieldSchema field : table.getPartitionKeys()) { final String key = field.getName(); final String val = partSpec.get(key); if (val == null) { throw new InvalidMetaException(tableQName, partName, null); } values.add(val); } return values; }
Example #5
Source File: AWSCatalogMetastoreClient.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
public AWSCatalogMetastoreClient(HiveConf conf, HiveMetaHookLoader hook) throws MetaException { this.conf = conf; glueClient = new AWSGlueClientFactory(this.conf).newClient(); // TODO preserve existing functionality for HiveMetaHook wh = new Warehouse(this.conf); AWSGlueMetastore glueMetastore = new AWSGlueMetastoreFactory().newMetastore(conf); glueMetastoreClientDelegate = new GlueMetastoreClientDelegate(this.conf, glueMetastore, wh); snapshotActiveConf(); catalogId = MetastoreClientUtils.getCatalogId(conf); if (!doesDefaultDBExist()) { createDefaultDatabase(); } }
Example #6
Source File: GlueMetastoreClientDelegate.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
/** * Taken from HiveMetaStore#append_partition_common */ private org.apache.hadoop.hive.metastore.api.Partition buildPartitionFromValues( org.apache.hadoop.hive.metastore.api.Table table, List<String> values) throws MetaException { org.apache.hadoop.hive.metastore.api.Partition partition = new org.apache.hadoop.hive.metastore.api.Partition(); partition.setDbName(table.getDbName()); partition.setTableName(table.getTableName()); partition.setValues(values); partition.setSd(table.getSd().deepCopy()); Path partLocation = new Path(table.getSd().getLocation(), Warehouse.makePartName(table.getPartitionKeys(), values)); partition.getSd().setLocation(partLocation.toString()); long timeInSecond = System.currentTimeMillis() / MILLISECOND_TO_SECOND_FACTOR; partition.setCreateTime((int) timeInSecond); partition.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(timeInSecond)); return partition; }
Example #7
Source File: GlueMetastoreClientDelegate.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
public List<String> listPartitionNames( String databaseName, String tableName, List<String> values, short max ) throws TException { String expression = null; org.apache.hadoop.hive.metastore.api.Table table = getTable(databaseName, tableName); if (values != null) { expression = ExpressionHelper.buildExpressionFromPartialSpecification(table, values); } List<String> names = Lists.newArrayList(); List<org.apache.hadoop.hive.metastore.api.Partition> partitions = getPartitions(databaseName, tableName, expression, max); for(org.apache.hadoop.hive.metastore.api.Partition p : partitions) { names.add(Warehouse.makePartName(table.getPartitionKeys(), p.getValues())); } return names; }
Example #8
Source File: GlueMetastoreClientDelegateTest.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
@Before public void setup() throws Exception { conf = new HiveConf(); glueClient = mock(AWSGlue.class); wh = mock(Warehouse.class); metastoreClientDelegate = new GlueMetastoreClientDelegate(conf, new DefaultAWSGlueMetastore(conf, glueClient), wh); // Create a client delegate with CatalogId hiveConfCatalogId = new HiveConf(); hiveConfCatalogId.set(GlueMetastoreClientDelegate.CATALOG_ID_CONF, CATALOG_ID); metastoreClientDelegateCatalogId = new GlueMetastoreClientDelegate(hiveConfCatalogId, new DefaultAWSGlueMetastore(hiveConfCatalogId, glueClient), wh); testDb = getTestDatabase(); testTbl= getTestTable(testDb.getName()); setupMockWarehouseForPath(new Path(testTbl.getStorageDescriptor().getLocation().toString()), false, true); }
Example #9
Source File: HiveConnectorFastPartitionService.java From metacat with Apache License 2.0 | 6 votes |
/** * Constructor. * * @param context connector context * @param metacatHiveClient hive client * @param warehouse hive warehouse * @param hiveMetacatConverters hive converter * @param directSqlGetPartition service to get partitions * @param directSqlSavePartition service to save partitions * @param icebergTableHandler iceberg table handler */ public HiveConnectorFastPartitionService( final ConnectorContext context, final IMetacatHiveClient metacatHiveClient, final Warehouse warehouse, final HiveConnectorInfoConverter hiveMetacatConverters, final DirectSqlGetPartition directSqlGetPartition, final DirectSqlSavePartition directSqlSavePartition, final IcebergTableHandler icebergTableHandler ) { super(context, metacatHiveClient, hiveMetacatConverters); this.warehouse = warehouse; this.directSqlGetPartition = directSqlGetPartition; this.directSqlSavePartition = directSqlSavePartition; this.registry = context.getRegistry(); this.icebergTableHandler = icebergTableHandler; }
Example #10
Source File: MetastoreClientPartitionIntegrationTest.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
@BeforeClass public static void setUpForClass() throws MetaException { HiveConf conf = mock(HiveConf.class); Warehouse wh = mock(Warehouse.class); when(conf.get(HiveConf.ConfVars.USERS_IN_ADMIN_ROLE.varname,"")).thenReturn(""); glueClient = new GlueTestClientFactory().newClient(); GlueClientFactory clientFactory = mock(GlueClientFactory.class); when(clientFactory.newClient()).thenReturn(glueClient); metastoreClient = new AWSCatalogMetastoreClient.Builder().withHiveConf(conf).withWarehouse(wh) .withClientFactory(clientFactory).build(); catalogDatabase = getTestDatabase(); glueClient.createDatabase(new CreateDatabaseRequest().withDatabaseInput( GlueInputConverter.convertToDatabaseInput(catalogDatabase))); catalogTable = getTestTable(); }
Example #11
Source File: HiveConnectorClientConfig.java From metacat with Apache License 2.0 | 6 votes |
/** * create warehouse for file system calls. * * @param connectorContext connector config context * @return WareHouse */ @Bean public Warehouse warehouse(final ConnectorContext connectorContext) { try { final HiveConf conf = this.getDefaultConf(connectorContext); connectorContext.getConfiguration().forEach(conf::set); return new Warehouse(conf); } catch (Exception e) { throw new IllegalArgumentException( String.format( "Failed creating the hive warehouse for catalog: %s", connectorContext.getCatalogName() ), e ); } }
Example #12
Source File: MetastoreClientDatabaseIntegrationTest.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
@Before public void setup() throws MetaException { conf = mock(HiveConf.class); wh = mock(Warehouse.class); tmpPath = new Path("/db"); when(wh.getDefaultDatabasePath(anyString())).thenReturn(tmpPath); when(wh.getDnsPath(any(Path.class))).thenReturn(tmpPath); when(wh.isDir(any(Path.class))).thenReturn(true); when(conf.get(HiveConf.ConfVars.USERS_IN_ADMIN_ROLE.varname,"")).thenReturn(""); glueClient = new GlueTestClientFactory().newClient(); GlueClientFactory clientFactory = mock(GlueClientFactory.class); when(clientFactory.newClient()).thenReturn(glueClient); metastoreClient = new AWSCatalogMetastoreClient.Builder().withHiveConf(conf).withWarehouse(wh) .withClientFactory(clientFactory).build(); catalogDB = getTestDatabase(); hiveDB = CatalogToHiveConverter.convertDatabase(catalogDB); additionalDbForCleanup = Lists.newArrayList(); }
Example #13
Source File: AWSCatalogMetastoreClientTest.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
@Test public void testAppendPartitionByName() throws Exception { List<String> values = Arrays.asList("foo"); when(glueClient.getTable(any(GetTableRequest.class))) .thenReturn(new GetTableResult().withTable(HiveToCatalogConverter.convertTable(testTable))); Path partLocation = new Path(testTable.getSd().getLocation(), Warehouse .makePartName(testTable.getPartitionKeys(), values)); setupMockWarehouseForPath(partLocation, false, true); mockBatchCreatePartitionsSucceed(); org.apache.hadoop.hive.metastore.api.Partition res = metastoreClient.appendPartition( testDB.getName(), testTable.getTableName(), testTable.getPartitionKeys().get(0).getName() + "=foo"); assertThat(res.getValues(), is(values)); assertDaemonThreadPools(); }
Example #14
Source File: HiveTableOutputFormat.java From flink with Apache License 2.0 | 6 votes |
private void loadPartition(Path srcDir, Table table, Map<String, String> partSpec, HiveMetastoreClientWrapper client) throws TException, IOException { Path tblLocation = new Path(table.getSd().getLocation()); String dbName = tablePath.getDatabaseName(); String tableName = tablePath.getObjectName(); List<Partition> existingPart = client.listPartitions(dbName, tableName, new ArrayList<>(partSpec.values()), (short) 1); Path destDir = existingPart.isEmpty() ? new Path(tblLocation, Warehouse.makePartPath(partSpec)) : new Path(existingPart.get(0).getSd().getLocation()); moveFiles(srcDir, destDir); // register new partition if it doesn't exist if (existingPart.isEmpty()) { StorageDescriptor sd = new StorageDescriptor(hiveTablePartition.getStorageDescriptor()); sd.setLocation(destDir.toString()); Partition partition = HiveTableUtil.createHivePartition(dbName, tableName, new ArrayList<>(partSpec.values()), sd, new HashMap<>()); partition.setValues(new ArrayList<>(partSpec.values())); client.add_partition(partition); } }
Example #15
Source File: MockThriftMetastoreClient.java From presto with Apache License 2.0 | 6 votes |
@Override public List<Partition> getPartitionsByNames(String dbName, String tableName, List<String> names) throws TException { accessCount.incrementAndGet(); if (throwException) { throw new RuntimeException(); } if (!dbName.equals(TEST_DATABASE) || !tableName.equals(TEST_TABLE) || !ImmutableSet.of(TEST_PARTITION1, TEST_PARTITION2).containsAll(names)) { throw new NoSuchObjectException(); } return Lists.transform(names, name -> { try { return new Partition(ImmutableList.copyOf(Warehouse.getPartValuesFromPartName(name)), TEST_DATABASE, TEST_TABLE, 0, 0, DEFAULT_STORAGE_DESCRIPTOR, ImmutableMap.of()); } catch (MetaException e) { throw new RuntimeException(e); } }); }
Example #16
Source File: BufferedPartitionFetcher.java From circus-train with Apache License 2.0 | 6 votes |
@VisibleForTesting void bufferPartitions(int firstPartition) { int totalPartitionsToLoad = Math.min(partitionNames.size(), firstPartition + bufferSize); List<String> partitionsToLoad = partitionNames.subList(firstPartition, totalPartitionsToLoad); try { LOG.debug("Fetching {} partitions.", totalPartitionsToLoad); List<Partition> partitions = metastore.getPartitionsByNames(table.getDbName(), table.getTableName(), partitionsToLoad); LOG.debug("Fetched {} partitions for table {}.", partitions.size(), Warehouse.getQualifiedName(table)); buffer = new HashMap<>(partitions.size()); for (Partition partition : partitions) { buffer.put(Warehouse.makePartName(table.getPartitionKeys(), partition.getValues()), partition); } } catch (TException e) { throw new RuntimeException("Unable to fetch partitions of table " + Warehouse.getQualifiedName(table), e); } }
Example #17
Source File: PartitionedTableMetadataUpdateReplication.java From circus-train with Apache License 2.0 | 6 votes |
private PartitionsAndStatistics filterOnReplicatedPartitions( CloseableMetaStoreClient replicaClient, PartitionsAndStatistics sourcePartitionsAndStatistics, List<FieldSchema> partitionKeys) throws TException { Map<Partition, ColumnStatistics> statisticsByPartition = new LinkedHashMap<>(); for (Partition partition : sourcePartitionsAndStatistics.getPartitions()) { try { replicaClient.getPartition(replicaDatabaseName, replicaTableName, partition.getValues()); statisticsByPartition.put(partition, sourcePartitionsAndStatistics.getStatisticsForPartition(partition)); } catch (NoSuchObjectException e) { LOG.debug("Partition {} doesn't exist, skipping it...", Warehouse.getQualifiedName(partition)); } } return new PartitionsAndStatistics(partitionKeys, statisticsByPartition); }
Example #18
Source File: AWSCatalogMetastoreClient.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
@Override public org.apache.hadoop.hive.metastore.api.Partition getPartitionWithAuthInfo( String databaseName, String tableName, List<String> values, String userName, List<String> groupNames) throws MetaException, UnknownTableException, NoSuchObjectException, TException { // TODO move this into the service org.apache.hadoop.hive.metastore.api.Partition partition = getPartition(databaseName, tableName, values); org.apache.hadoop.hive.metastore.api.Table table = getTable(databaseName, tableName); if ("TRUE".equalsIgnoreCase(table.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))) { String partName = Warehouse.makePartName(table.getPartitionKeys(), values); HiveObjectRef obj = new HiveObjectRef(); obj.setObjectType(HiveObjectType.PARTITION); obj.setDbName(databaseName); obj.setObjectName(tableName); obj.setPartValues(values); org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet privilegeSet = this.get_privilege_set(obj, userName, groupNames); partition.setPrivileges(privilegeSet); } return partition; }
Example #19
Source File: AWSCatalogMetastoreClient.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
public AWSCatalogMetastoreClient(HiveConf conf, HiveMetaHookLoader hook) throws MetaException { this.conf = conf; glueClient = new AWSGlueClientFactory(this.conf).newClient(); // TODO preserve existing functionality for HiveMetaHook wh = new Warehouse(this.conf); AWSGlueMetastore glueMetastore = new AWSGlueMetastoreFactory().newMetastore(conf); glueMetastoreClientDelegate = new GlueMetastoreClientDelegate(this.conf, glueMetastore, wh); snapshotActiveConf(); catalogId = MetastoreClientUtils.getCatalogId(conf); if (!doesDefaultDBExist()) { createDefaultDatabase(); } }
Example #20
Source File: DiffGeneratedPartitionPredicateTest.java From circus-train with Apache License 2.0 | 6 votes |
private void setupHiveTables() throws TException, IOException { List<FieldSchema> partitionKeys = Lists.newArrayList(newFieldSchema("p1"), newFieldSchema("p2")); File tableLocation = new File("db1", "table1"); StorageDescriptor sd = newStorageDescriptor(tableLocation, "col0"); table1 = newTable("table1", "db1", partitionKeys, sd); Partition partition1 = newPartition(table1, "value1", "value2"); Partition partition2 = newPartition(table1, "value11", "value22"); table1Partitions = Arrays.asList(partition1, partition2); // table1PartitionNames = Arrays .asList(Warehouse.makePartName(partitionKeys, partition1.getValues()), Warehouse.makePartName(partitionKeys, partition2.getValues())); File tableLocation2 = new File("db2", "table2"); StorageDescriptor sd2 = newStorageDescriptor(tableLocation2, "col0"); table2 = newTable("table2", "db2", partitionKeys, sd2); }
Example #21
Source File: AWSCatalogMetastoreClientTest.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
@Before public void setUp() throws Exception { testDB = CatalogToHiveConverter.convertDatabase(getTestDatabase()); testTable = CatalogToHiveConverter.convertTable(getTestTable(), testDB.getName()); testIndex = getTestHiveIndex(testDB.getName()); testPartition = CatalogToHiveConverter.convertPartition( getTestPartition(testDB.getName(), testTable.getTableName(), Lists.newArrayList("val1"))); testFunction = CatalogToHiveConverter.convertFunction(testDB.getName(), getCatalogTestFunction()); defaultWhPath = new Path("/tmp"); partitionPath = new Path(testPartition.getSd().getLocation()); wh = mock(Warehouse.class); setupMockWarehouseForPath(defaultWhPath, true, true); setupMockWarehouseForPath(partitionPath, false, false); conf = spy(new HiveConf()); conf.setInt(GlueMetastoreClientDelegate.NUM_PARTITION_SEGMENTS_CONF, 1); glueClient = spy(AWSGlue.class); clientFactory = mock(GlueClientFactory.class); metastoreFactory = mock(AWSGlueMetastoreFactory.class); when(clientFactory.newClient()).thenReturn(glueClient); when(metastoreFactory.newMetastore(conf)).thenReturn(new DefaultAWSGlueMetastore(conf, glueClient)); metastoreClient = new AWSCatalogMetastoreClient.Builder().withClientFactory(clientFactory) .withMetastoreFactory(metastoreFactory).withWarehouse(wh).createDefaults(false).withHiveConf(conf).build(); }
Example #22
Source File: MetastoreClientTableIntegrationTest.java From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 | 6 votes |
@BeforeClass public static void setup() throws MetaException { conf = mock(HiveConf.class); wh = mock(Warehouse.class); tmpPath = new Path("/db"); when(wh.getDefaultDatabasePath(anyString())).thenReturn(tmpPath); when(wh.getDnsPath(any(Path.class))).thenReturn(tmpPath); when(wh.isDir(any(Path.class))).thenReturn(true); when(conf.get(HiveConf.ConfVars.USERS_IN_ADMIN_ROLE.varname,"")).thenReturn(""); glueClient = new GlueTestClientFactory().newClient(); GlueClientFactory clientFactory = mock(GlueClientFactory.class); when(clientFactory.newClient()).thenReturn(glueClient); metastoreClient = new AWSCatalogMetastoreClient.Builder().withHiveConf(conf).withWarehouse(wh) .withClientFactory(clientFactory).build(); catalogDB = getTestDatabase(); hiveDB = CatalogToHiveConverter.convertDatabase(catalogDB); glueClient.createDatabase(new CreateDatabaseRequest() .withDatabaseInput(GlueInputConverter.convertToDatabaseInput(catalogDB))); }
Example #23
Source File: HiveConvertersImpl.java From metacat with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override public List<String> getPartValsFromName(@Nullable final TableDto tableDto, final String partName) { // Unescape the partition name final LinkedHashMap<String, String> hm; try { hm = Warehouse.makeSpecFromName(partName); } catch (MetaException e) { throw new IllegalArgumentException("Invalid partition name", e); } // Get the partition keys. List<String> partitionKeys = null; if (tableDto != null) { partitionKeys = tableDto.getPartition_keys(); } // If table has not been provided, return the values without validating. if (partitionKeys != null) { final List<String> partVals = Lists.newArrayListWithCapacity(partitionKeys.size()); for (String key : partitionKeys) { final String val = hm.get(key); if (val == null) { throw new IllegalArgumentException("Invalid partition name - missing " + key); } partVals.add(val); } return partVals; } else { return Lists.newArrayList(hm.values()); } }
Example #24
Source File: PartitionsAndStatistics.java From circus-train with Apache License 2.0 | 5 votes |
private static String getPartitionName(List<FieldSchema> partitionKeys, Partition partition) { try { return Warehouse.makePartName(partitionKeys, partition.getValues()); } catch (MetaException e) { throw new RuntimeException(e); } }
Example #25
Source File: HiveEndpoint.java From circus-train with Apache License 2.0 | 5 votes |
private List<String> getPartitionNames(List<FieldSchema> partitionKeys, List<Partition> partitions) throws MetaException { List<String> partitionNames = new ArrayList<>(partitions.size()); for (Partition partition : partitions) { partitionNames.add(Warehouse.makePartName(partitionKeys, partition.getValues())); } return partitionNames; }
Example #26
Source File: OrcMapreduceRecordReader.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { OrcNewSplit orcNewSplit = (OrcNewSplit) inputSplit; Configuration configuration = taskAttemptContext.getConfiguration(); double maxMergeDistance = configuration.getDouble(MAX_MERGE_DISTANCE,MAX_MERGE_DISTANCE_DEFAULT); double maxReadSize = configuration.getDouble(MAX_READ_SIZE,MAX_READ_SIZE_DEFAULT); double streamBufferSize = configuration.getDouble(STREAM_BUFFER_SIZE,STREAM_BUFFER_SIZE_DEFAULT); Path path = orcNewSplit.getPath(); FileSystem fileSystem = FileSystem.get(path.toUri(),configuration); long size = fileSystem.getFileStatus(path).getLen(); FSDataInputStream inputStream = fileSystem.open(path); rowStruct = getRowStruct(configuration); predicate = getSplicePredicate(configuration); List<Integer> partitions = getPartitionIds(configuration); List<Integer> columnIds = getColumnIds(configuration); List<String> values = null; try { values = Warehouse.getPartValuesFromPartName(((OrcNewSplit) inputSplit).getPath().toString()); } catch (MetaException me) { throw new IOException(me); } OrcDataSource orcDataSource = new HdfsOrcDataSource(path.toString(), size, new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE), new DataSize(maxReadSize, DataSize.Unit.MEGABYTE), new DataSize(streamBufferSize, DataSize.Unit.MEGABYTE), inputStream); OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(), new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE), new DataSize(maxReadSize, DataSize.Unit.MEGABYTE)); orcRecordReader = orcReader.createRecordReader(getColumnsAndTypes(columnIds, rowStruct), predicate, orcNewSplit.getStart(), orcNewSplit.getLength(), HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext(), partitions, values); }
Example #27
Source File: BufferedPartitionFetcher.java From circus-train with Apache License 2.0 | 5 votes |
public BufferedPartitionFetcher(IMetaStoreClient metastore, Table table, short bufferSize) { try { LOG.debug("Fetching all partition names."); partitionNames = metastore.listPartitionNames(table.getDbName(), table.getTableName(), NO_LIMIT); LOG.debug("Fetched {} partition names for table {}.", partitionNames.size(), Warehouse.getQualifiedName(table)); } catch (TException e) { throw new RuntimeException("Unable to fetch partition names of table " + Warehouse.getQualifiedName(table), e); } this.table = table; this.metastore = metastore; this.bufferSize = bufferSize; buffer = Collections.emptyMap(); }
Example #28
Source File: FilterGeneratorImpl.java From circus-train with Apache License 2.0 | 5 votes |
@Override public void run() throws CircusTrainException { out.println(String.format("Source catalog: %s", source.getName())); out.println(String.format("Source MetaStore URIs: %s", source.getMetaStoreUris())); out.println(String.format("Source table: %s", Warehouse.getQualifiedName(sourceTable))); out.println(String.format("Partition expression: %s", partitionFilter)); String parsedPartitionFilter = partitionPredicate.getPartitionPredicate(); if (!Objects.equals(partitionFilter, parsedPartitionFilter)) { LOG.info("Evaluated expression to: {}", parsedPartitionFilter); } try { LOG.info("Executing filter with limit {} on: {}:{} ({})", partitionLimit, source.getName(), Warehouse.getQualifiedName(sourceTable), source.getMetaStoreUris()); PartitionsAndStatistics partitions = source.getPartitions(sourceTable, parsedPartitionFilter, partitionLimit); LOG.info("Retrieved {} partition(s):", partitions.getPartitions().size()); SortedSet<Partition> sorted = new TreeSet<>(PARTITION_COMPARATOR); sorted.addAll(partitions.getPartitions()); List<List<String>> vals = new ArrayList<>(); for (Partition partition : sorted) { vals.add(partition.getValues()); LOG.info("{}", partition.getValues()); } out.println(String.format("Partition filter: %s", parsedPartitionFilter)); out.println(String.format("Partition limit: %s", partitionLimit)); out.println(String.format("Partition(s) fetched: %s", vals)); } catch (TException e) { throw new CircusTrainException("Could not fetch partitions for filter: '" + parsedPartitionFilter + "'.", e); } }
Example #29
Source File: CatalogThriftHiveMetastore.java From metacat with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override @SuppressWarnings("unchecked") public Map<String, String> partition_name_to_spec(final String partName) throws TException { return requestWrapper("partition_name_to_spec", new Object[]{partName}, () -> { if (Strings.isNullOrEmpty(partName)) { return (Map<String, String>) Collections.EMPTY_MAP; } return Warehouse.makeSpecFromName(partName); }); }
Example #30
Source File: CatalogThriftHiveMetastore.java From metacat with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override @SuppressWarnings("unchecked") public List<String> partition_name_to_vals(final String partName) throws TException { return requestWrapper("partition_name_to_vals", new Object[]{partName}, () -> { if (Strings.isNullOrEmpty(partName)) { return (List<String>) Collections.EMPTY_LIST; } final Map<String, String> spec = Warehouse.makeSpecFromName(partName); final List<String> vals = Lists.newArrayListWithCapacity(spec.size()); vals.addAll(spec.values()); return vals; }); }