org.apache.hadoop.hive.ql.plan.TableDesc Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.plan.TableDesc.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JdbcStorageHandler.java From HiveJdbcStorageHandler with Apache License 2.0 | 6 votes |
private void configureJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { if(LOG.isDebugEnabled()) { LOG.debug("tabelDesc: " + tableDesc); LOG.debug("jobProperties: " + jobProperties); } String tblName = tableDesc.getTableName(); Properties tblProps = tableDesc.getProperties(); String columnNames = tblProps.getProperty(Constants.LIST_COLUMNS); jobProperties.put(DBConfiguration.INPUT_CLASS_PROPERTY, DbRecordWritable.class.getName()); jobProperties.put(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tblName); jobProperties.put(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tblName); jobProperties.put(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, columnNames); jobProperties.put(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, columnNames); for(String key : tblProps.stringPropertyNames()) { if(key.startsWith("mapred.jdbc.")) { String value = tblProps.getProperty(key); jobProperties.put(key, value); } } }
Example #2
Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
private void init(TableDesc tableDesc, boolean read) { Configuration cfg = getConf(); // NB: we can't just merge the table properties in, we need to save them per input/output otherwise clashes occur which confuse Hive Settings settings = HadoopSettingsManager.loadFrom(cfg); //settings.setProperty((read ? HiveConstants.INPUT_TBL_PROPERTIES : HiveConstants.OUTPUT_TBL_PROPERTIES), IOUtils.propsToString(tableDesc.getProperties())); if (read) { // no generic setting } else { // replace the default committer when using the old API HadoopCfgUtils.setOutputCommitterClass(cfg, EsOutputFormat.EsOutputCommitter.class.getName()); } Assert.hasText(tableDesc.getProperties().getProperty(TABLE_LOCATION), String.format( "no table location [%s] declared by Hive resulting in abnormal execution;", TABLE_LOCATION)); }
Example #3
Source File: SolrStorageHandler.java From hive-solr with MIT License | 5 votes |
@Override public void configureOutputJobProperties(TableDesc tbl, Map<String, String> jobProperties) { final Properties properties = tbl.getProperties(); //设置属性到运行时的jobconf里面 Conf.copyProperties(properties,jobProperties); }
Example #4
Source File: TestAzureTableHiveStorageHandler.java From azure-tables-hadoop with Apache License 2.0 | 5 votes |
@Test public void testConfigureInputProperties() { AzureTableHiveStorageHandler handler = new AzureTableHiveStorageHandler(); TableDesc tableDesc = new TableDesc(); tableDesc.setProperties(new Properties()); tableDesc.getProperties().put(Keys.TABLE_NAME.getKey(), "t"); tableDesc.getProperties().put(Keys.ACCOUNT_URI.getKey(), "http://fakeUri"); tableDesc.getProperties().put(Keys.STORAGE_KEY.getKey(), "fakeKey"); Map<String, String> jobProperties = new HashMap<String, String>(); handler.configureInputJobProperties(tableDesc, jobProperties); assertEquals("t", jobProperties.get(Keys.TABLE_NAME.getKey())); assertEquals("http://fakeUri", jobProperties.get(Keys.ACCOUNT_URI.getKey())); assertNull(jobProperties.get(Keys.PARTITIONER_CLASS.getKey())); }
Example #5
Source File: SMStorageHandler.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@Override public void configureOutputJobProperties( TableDesc tableDesc, Map<String, String> jobProperties) { try { configureTableJobProperties(tableDesc, jobProperties, false); } catch (Exception e) { Log.error(e); System.exit(1); } }
Example #6
Source File: SMStorageHandler.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@Override public void configureInputJobProperties( TableDesc tableDesc, Map<String, String> jobProperties) { try { configureTableJobProperties(tableDesc, jobProperties, true); } catch (Exception e) { Log.error(e); System.exit(1); } }
Example #7
Source File: SMStorageHandler.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
public void configureTableJobProperties(TableDesc tableDesc, Map<String, String> jobProperties, boolean isInputJob) throws Exception { Properties tableProperties = tableDesc.getProperties(); String tableName = null; String connStr = tableProperties.getProperty(MRConstants.SPLICE_JDBC_STR); if (connStr == null) throw new Exception("Error: wrong param. Did you mean '" + MRConstants.SPLICE_JDBC_STR + "'?"); // TODO JL if (sqlUtil == null) sqlUtil = SMSQLUtil.getInstance(connStr); if (isInputJob) { tableName = tableProperties.getProperty(MRConstants.SPLICE_TABLE_NAME); if (tableName == null) throw new Exception("Error: wrong param. Did you mean '" + MRConstants.SPLICE_TABLE_NAME + "'?"); } else { tableName = tableProperties.getProperty(MRConstants.SPLICE_TABLE_NAME); if (tableName == null) throw new Exception("Error: wrong param. Did you mean '" + MRConstants.SPLICE_TABLE_NAME + "'?"); } tableName = tableName.trim(); if (parentConn == null) { parentTxnId = startWriteJobParentTxn(connStr, tableName); } jobProperties.put(MRConstants.SPLICE_TRANSACTION_ID, parentTxnId); jobProperties.put(MRConstants.SPLICE_TABLE_NAME, tableName); jobProperties.put(MRConstants.SPLICE_JDBC_STR, connStr); }
Example #8
Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { if (log.isDebugEnabled()) { log.debug("Configuring job credentials for Elasticsearch"); } Settings settings = new CompositeSettings(Arrays.asList( HadoopSettingsManager.loadFrom(tableDesc.getProperties()), HadoopSettingsManager.loadFrom(jobConf) )); InitializationUtils.setUserProviderIfNotSet(settings, HadoopUserProvider.class, log); UserProvider userProvider = UserProvider.create(settings); if (userProvider.isEsKerberosEnabled()) { User user = userProvider.getUser(); ClusterInfo clusterInfo = settings.getClusterInfoOrNull(); RestClient bootstrap = new RestClient(settings); try { // first get ES main action info if it's missing if (clusterInfo == null) { clusterInfo = bootstrap.mainInfo(); } // Add the token to the job TokenUtil.addTokenForJobConf(bootstrap, clusterInfo.getClusterName(), user, jobConf); } catch (EsHadoopException ex) { throw new EsHadoopIllegalArgumentException(String.format("Cannot detect ES version - " + "typically this happens if the network/Elasticsearch cluster is not accessible or when targeting " + "a WAN/Cloud instance without the proper setting '%s'", ConfigurationOptions.ES_NODES_WAN_ONLY), ex); } finally { bootstrap.close(); } } else { if (log.isDebugEnabled()) { log.debug("Ignoring Elasticsearch credentials since Kerberos Auth is not enabled."); } } }
Example #9
Source File: AccumuloStorageHandler.java From accumulo-hive-storage-manager with Apache License 2.0 | 5 votes |
@Override public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> properties) { Properties props = tableDesc.getProperties(); properties.put(AccumuloSerde.COLUMN_MAPPINGS, props.getProperty(AccumuloSerde.COLUMN_MAPPINGS)); properties.put(AccumuloSerde.TABLE_NAME, props.getProperty(AccumuloSerde.TABLE_NAME)); String useIterators = props.getProperty(AccumuloSerde.NO_ITERATOR_PUSHDOWN); if(useIterators != null) { properties.put(AccumuloSerde.NO_ITERATOR_PUSHDOWN, useIterators); } }
Example #10
Source File: AccumuloStorageHandler.java From accumulo-hive-storage-manager with Apache License 2.0 | 5 votes |
/** * * @param desc table description * @param jobProps */ @Override public void configureTableJobProperties(TableDesc desc, Map<String, String> jobProps) { Properties tblProperties = desc.getProperties(); jobProps.put(AccumuloSerde.COLUMN_MAPPINGS, tblProperties.getProperty(AccumuloSerde.COLUMN_MAPPINGS)); String tableName = tblProperties.getProperty(AccumuloSerde.TABLE_NAME); jobProps.put(AccumuloSerde.TABLE_NAME, tableName); String useIterators = tblProperties.getProperty(AccumuloSerde.NO_ITERATOR_PUSHDOWN); if(useIterators != null) { jobProps.put(AccumuloSerde.NO_ITERATOR_PUSHDOWN, useIterators); } }
Example #11
Source File: KafkaStorageHandler.java From HiveKa with Apache License 2.0 | 5 votes |
@Override public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { Properties tableProperties = tableDesc.getProperties(); new KafkaBackedTableProperties().initialize(tableProperties, jobProperties, tableDesc); }
Example #12
Source File: KafkaStorageHandler.java From HiveKa with Apache License 2.0 | 5 votes |
@Override public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { Properties tableProperties = tableDesc.getProperties(); new KafkaBackedTableProperties().initialize(tableProperties, jobProperties, tableDesc); }
Example #13
Source File: DynamoDBStorageHandler.java From emr-dynamodb-connector with Apache License 2.0 | 5 votes |
private void useExplicitThroughputIfRequired(Map<String, String> jobProperties, TableDesc tableDesc) { String userRequiredReadThroughput = tableDesc.getProperties().getProperty(DynamoDBConstants.READ_THROUGHPUT); if (userRequiredReadThroughput != null) { jobProperties.put(DynamoDBConstants.READ_THROUGHPUT, userRequiredReadThroughput); } String userRequiredWriteThroughput = tableDesc.getProperties().getProperty(DynamoDBConstants.WRITE_THROUGHPUT); if (userRequiredWriteThroughput != null) { jobProperties.put(DynamoDBConstants.WRITE_THROUGHPUT, userRequiredWriteThroughput); } }
Example #14
Source File: DynamoDBStorageHandler.java From emr-dynamodb-connector with Apache License 2.0 | 5 votes |
@Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { Map<String, String> jobProperties = new HashMap<>(); configureTableJobProperties(tableDesc, jobProperties); for (Entry<String, String> entry : jobProperties.entrySet()) { jobConf.set(entry.getKey(), entry.getValue()); } }
Example #15
Source File: AccumuloStorageHandler.java From accumulo-hive-storage-manager with Apache License 2.0 | 4 votes |
@Override public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> map) { //TODO: implement for serialization to Accumulo }
Example #16
Source File: SolrStorageHandler.java From hive-solr with MIT License | 4 votes |
@Override public void configureInputJobProperties(TableDesc tbl, Map<String, String> jobProperties) { final Properties properties = tbl.getProperties(); //设置属性到运行时的jobconf里面 Conf.copyProperties(properties,jobProperties); }
Example #17
Source File: DynamoDBStorageHandler.java From emr-dynamodb-connector with Apache License 2.0 | 4 votes |
@Override public void configureTableJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { DynamoDBClient client = new DynamoDBClient(conf, tableDesc.getProperties().getProperty(DynamoDBConstants.REGION)); try { String tableName = HiveDynamoDBUtil.getDynamoDBTableName(tableDesc.getProperties() .getProperty(DynamoDBConstants.TABLE_NAME), tableDesc.getTableName()); TableDescription description = client.describeTable(tableName); Double averageItemSize = DynamoDBUtil.calculateAverageItemSize(description); log.info("Average item size: " + averageItemSize); String endpoint = conf.get(DynamoDBConstants.ENDPOINT); if (!Strings.isNullOrEmpty(tableDesc.getProperties().getProperty(DynamoDBConstants .ENDPOINT))) { endpoint = tableDesc.getProperties().getProperty(DynamoDBConstants.ENDPOINT); } if (!Strings.isNullOrEmpty(endpoint)) { jobProperties.put(DynamoDBConstants.ENDPOINT, endpoint); } if (!Strings.isNullOrEmpty(tableDesc.getProperties().getProperty(DynamoDBConstants.REGION))) { jobProperties.put(DynamoDBConstants.REGION, tableDesc.getProperties().getProperty(DynamoDBConstants.REGION)); } jobProperties.put(DynamoDBConstants.OUTPUT_TABLE_NAME, tableName); jobProperties.put(DynamoDBConstants.INPUT_TABLE_NAME, tableName); jobProperties.put(DynamoDBConstants.TABLE_NAME, tableName); Map<String, String> hiveToDynamoDBSchemaMapping = HiveDynamoDBUtil .getHiveToDynamoDBMapping(tableDesc.getProperties().getProperty(DynamoDBConstants .DYNAMODB_COLUMN_MAPPING)); // Column map can be null if only full backup is being used if (hiveToDynamoDBSchemaMapping != null) { jobProperties.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, HiveDynamoDBUtil .toJsonString(hiveToDynamoDBSchemaMapping)); } Map<String, String> hiveToDynamoDBTypeMapping = HiveDynamoDBUtil .getHiveToDynamoDBMapping(tableDesc.getProperties().getProperty(DynamoDBConstants .DYNAMODB_TYPE_MAPPING)); if (hiveToDynamoDBSchemaMapping != null) { jobProperties.put(DynamoDBConstants.DYNAMODB_TYPE_MAPPING, HiveDynamoDBUtil .toJsonString(hiveToDynamoDBTypeMapping)); } boolean hiveToDynamoDBNullSerialization = Boolean .parseBoolean(tableDesc.getProperties().getProperty(DynamoDBConstants.DYNAMODB_NULL_SERIALIZATION)); jobProperties.put(DynamoDBConstants.DYNAMODB_NULL_SERIALIZATION, Boolean.toString(hiveToDynamoDBNullSerialization)); if (tableDesc.getProperties().getProperty(DynamoDBConstants.THROUGHPUT_READ_PERCENT) != null) { jobProperties.put(DynamoDBConstants.THROUGHPUT_READ_PERCENT, tableDesc.getProperties() .getProperty(DynamoDBConstants.THROUGHPUT_READ_PERCENT)); } if (tableDesc.getProperties().getProperty(DynamoDBConstants.THROUGHPUT_WRITE_PERCENT) != null) { jobProperties.put(DynamoDBConstants.THROUGHPUT_WRITE_PERCENT, tableDesc.getProperties() .getProperty(DynamoDBConstants.THROUGHPUT_WRITE_PERCENT)); } if (description.getBillingModeSummary() == null || description.getBillingModeSummary().getBillingMode() .equals(DynamoDBConstants.BILLING_MODE_PROVISIONED)) { useExplicitThroughputIfRequired(jobProperties, tableDesc); } else { // If not specified at the table level, set default value jobProperties.put(DynamoDBConstants.READ_THROUGHPUT, tableDesc.getProperties() .getProperty(DynamoDBConstants.READ_THROUGHPUT, DynamoDBConstants.DEFAULT_CAPACITY_FOR_ON_DEMAND.toString())); jobProperties.put(DynamoDBConstants.WRITE_THROUGHPUT, tableDesc.getProperties() .getProperty(DynamoDBConstants.WRITE_THROUGHPUT, DynamoDBConstants.DEFAULT_CAPACITY_FOR_ON_DEMAND.toString())); } jobProperties.put(DynamoDBConstants.ITEM_COUNT, description.getItemCount().toString()); jobProperties.put(DynamoDBConstants.TABLE_SIZE_BYTES, description.getTableSizeBytes() .toString()); jobProperties.put(DynamoDBConstants.AVG_ITEM_SIZE, averageItemSize.toString()); log.info("Average item size: " + averageItemSize); log.info("Item count: " + description.getItemCount()); log.info("Table size: " + description.getTableSizeBytes()); log.info("Read throughput: " + jobProperties.get(DynamoDBConstants.READ_THROUGHPUT)); log.info("Write throughput: " + jobProperties.get(DynamoDBConstants.WRITE_THROUGHPUT)); } finally { client.close(); } }
Example #18
Source File: DynamoDBStorageHandler.java From emr-dynamodb-connector with Apache License 2.0 | 4 votes |
@Override public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { configureTableJobProperties(tableDesc, jobProperties); }
Example #19
Source File: DynamoDBStorageHandler.java From emr-dynamodb-connector with Apache License 2.0 | 4 votes |
@Override public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { configureTableJobProperties(tableDesc, jobProperties); }
Example #20
Source File: JdbcStorageHandler.java From HiveJdbcStorageHandler with Apache License 2.0 | 4 votes |
@Override public void configureTableJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { configureJobProperties(tableDesc, jobProperties); }
Example #21
Source File: JdbcStorageHandler.java From HiveJdbcStorageHandler with Apache License 2.0 | 4 votes |
@Override public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { configureJobProperties(tableDesc, jobProperties); }
Example #22
Source File: JdbcStorageHandler.java From HiveJdbcStorageHandler with Apache License 2.0 | 4 votes |
@Override public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { configureJobProperties(tableDesc, jobProperties); }
Example #23
Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Override @Deprecated public void configureTableJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { throw new UnsupportedOperationException(); }
Example #24
Source File: TestHoodieCombineHiveInputFormat.java From hudi with Apache License 2.0 | 4 votes |
@Test @Disabled public void testHoodieRealtimeCombineHoodieInputFormat() throws Exception { Configuration conf = new Configuration(); // initial commit Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema()); HoodieTestUtils.init(hadoopConf, tempDir.toAbsolutePath().toString(), HoodieTableType.MERGE_ON_READ); String commitTime = "100"; final int numRecords = 1000; // Create 3 parquet files with 1000 records each File partitionDir = InputFormatTestUtil.prepareParquetTable(tempDir, schema, 3, numRecords, commitTime); InputFormatTestUtil.commit(tempDir, commitTime); // insert 1000 update records to log file 0 String newCommitTime = "101"; HoodieLogFormat.Writer writer = InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid0", commitTime, newCommitTime, numRecords, numRecords, 0); writer.close(); // insert 1000 update records to log file 1 writer = InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid1", commitTime, newCommitTime, numRecords, numRecords, 0); writer.close(); // insert 1000 update records to log file 2 writer = InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid2", commitTime, newCommitTime, numRecords, numRecords, 0); writer.close(); TableDesc tblDesc = Utilities.defaultTd; // Set the input format tblDesc.setInputFileFormatClass(HoodieCombineHiveInputFormat.class); PartitionDesc partDesc = new PartitionDesc(tblDesc, null); LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>(); pt.put(new Path(tempDir.toAbsolutePath().toString()), partDesc); MapredWork mrwork = new MapredWork(); mrwork.getMapWork().setPathToPartitionInfo(pt); Path mapWorkPath = new Path(tempDir.toAbsolutePath().toString()); Utilities.setMapRedWork(conf, mrwork, mapWorkPath); jobConf = new JobConf(conf); // Add the paths FileInputFormat.setInputPaths(jobConf, partitionDir.getPath()); jobConf.set(HAS_MAP_WORK, "true"); // The following config tells Hive to choose ExecMapper to read the MAP_WORK jobConf.set(MAPRED_MAPPER_CLASS, ExecMapper.class.getName()); // setting the split size to be 3 to create one split for 3 file groups jobConf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MAXSIZE, "3"); HoodieCombineHiveInputFormat combineHiveInputFormat = new HoodieCombineHiveInputFormat(); String tripsHiveColumnTypes = "double,string,string,string,double,double,double,double,double"; InputFormatTestUtil.setPropsForInputFormat(jobConf, schema, tripsHiveColumnTypes); InputSplit[] splits = combineHiveInputFormat.getSplits(jobConf, 1); // Since the SPLIT_SIZE is 3, we should create only 1 split with all 3 file groups assertEquals(1, splits.length); RecordReader<NullWritable, ArrayWritable> recordReader = combineHiveInputFormat.getRecordReader(splits[0], jobConf, null); NullWritable nullWritable = recordReader.createKey(); ArrayWritable arrayWritable = recordReader.createValue(); int counter = 0; while (recordReader.next(nullWritable, arrayWritable)) { // read over all the splits counter++; } // should read out 3 splits, each for file0, file1, file2 containing 1000 records each assertEquals(3000, counter); }
Example #25
Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Override public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { init(tableDesc, false); copyToJobProperties(jobProperties, tableDesc.getProperties()); setUserProviderIfNotSet(jobProperties); }
Example #26
Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Override public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { init(tableDesc, true); copyToJobProperties(jobProperties, tableDesc.getProperties()); setUserProviderIfNotSet(jobProperties); }
Example #27
Source File: LWStorageHandler.java From hive-solr with Apache License 2.0 | 4 votes |
@Override public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { setProperties(tableDesc.getProperties(), jobProperties); }
Example #28
Source File: KuduStorageHandler.java From HiveKudu-Handler with Apache License 2.0 | 4 votes |
@Override public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { configureJobProperties(tableDesc, jobProperties); }
Example #29
Source File: KuduStorageHandler.java From HiveKudu-Handler with Apache License 2.0 | 4 votes |
@Override public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { configureJobProperties(tableDesc, jobProperties); }
Example #30
Source File: CassandraStorageHandler.java From Hive-Cassandra with Apache License 2.0 | 4 votes |
public void configureOutputJobProperties(TableDesc inTableDesc, Map<String, String> inStringStringMap) { configureTableJobProperties(inTableDesc, inStringStringMap); }