org.apache.hadoop.hive.ql.plan.TableDesc Java Exaples

Source File: JdbcStorageHandler.java From HiveJdbcStorageHandler with Apache License 2.0

6 votes

private void configureJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    if(LOG.isDebugEnabled()) {
        LOG.debug("tabelDesc: " + tableDesc);
        LOG.debug("jobProperties: " + jobProperties);
    }

    String tblName = tableDesc.getTableName();
    Properties tblProps = tableDesc.getProperties();
    String columnNames = tblProps.getProperty(Constants.LIST_COLUMNS);
    jobProperties.put(DBConfiguration.INPUT_CLASS_PROPERTY, DbRecordWritable.class.getName());
    jobProperties.put(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tblName);
    jobProperties.put(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tblName);
    jobProperties.put(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, columnNames);
    jobProperties.put(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, columnNames);

    for(String key : tblProps.stringPropertyNames()) {
        if(key.startsWith("mapred.jdbc.")) {
            String value = tblProps.getProperty(key);
            jobProperties.put(key, value);
        }
    }
}

Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0

6 votes

private void init(TableDesc tableDesc, boolean read) {
    Configuration cfg = getConf();
    // NB: we can't just merge the table properties in, we need to save them per input/output otherwise clashes occur which confuse Hive

    Settings settings = HadoopSettingsManager.loadFrom(cfg);
    //settings.setProperty((read ? HiveConstants.INPUT_TBL_PROPERTIES : HiveConstants.OUTPUT_TBL_PROPERTIES), IOUtils.propsToString(tableDesc.getProperties()));
    if (read) {
        // no generic setting
    }
    else {
        // replace the default committer when using the old API
        HadoopCfgUtils.setOutputCommitterClass(cfg, EsOutputFormat.EsOutputCommitter.class.getName());
    }

    Assert.hasText(tableDesc.getProperties().getProperty(TABLE_LOCATION), String.format(
            "no table location [%s] declared by Hive resulting in abnormal execution;", TABLE_LOCATION));
}

Source File: SolrStorageHandler.java From hive-solr with MIT License

5 votes

@Override
public void configureOutputJobProperties(TableDesc tbl, Map<String, String> jobProperties) {
    final Properties properties = tbl.getProperties();

    //设置属性到运行时的jobconf里面
    Conf.copyProperties(properties,jobProperties);

}

Source File: TestAzureTableHiveStorageHandler.java From azure-tables-hadoop with Apache License 2.0

5 votes

@Test
public void testConfigureInputProperties() {
	AzureTableHiveStorageHandler handler = new AzureTableHiveStorageHandler();
	TableDesc tableDesc = new TableDesc();
	tableDesc.setProperties(new Properties());
	tableDesc.getProperties().put(Keys.TABLE_NAME.getKey(), "t");
	tableDesc.getProperties().put(Keys.ACCOUNT_URI.getKey(), "http://fakeUri");
	tableDesc.getProperties().put(Keys.STORAGE_KEY.getKey(), "fakeKey");
	Map<String, String> jobProperties = new HashMap<String, String>();
	handler.configureInputJobProperties(tableDesc, jobProperties);
	assertEquals("t", jobProperties.get(Keys.TABLE_NAME.getKey()));
	assertEquals("http://fakeUri", jobProperties.get(Keys.ACCOUNT_URI.getKey()));
	assertNull(jobProperties.get(Keys.PARTITIONER_CLASS.getKey()));
}

Source File: SMStorageHandler.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Override
public void configureOutputJobProperties(
        TableDesc tableDesc,
        Map<String, String> jobProperties) {
    try {
        configureTableJobProperties(tableDesc, jobProperties, false);
    } catch (Exception e) {

        Log.error(e);
        System.exit(1);
    }
}

Source File: SMStorageHandler.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Override
public void configureInputJobProperties(
        TableDesc tableDesc,
        Map<String, String> jobProperties) {

    try {
        configureTableJobProperties(tableDesc, jobProperties, true);
    } catch (Exception e) {

        Log.error(e);
        System.exit(1);
    }
}

Source File: SMStorageHandler.java From spliceengine with GNU Affero General Public License v3.0

5 votes

public void configureTableJobProperties(TableDesc tableDesc,
                                        Map<String, String> jobProperties, boolean isInputJob) throws Exception {
    Properties tableProperties = tableDesc.getProperties();
    String tableName = null;
    String connStr = tableProperties.getProperty(MRConstants.SPLICE_JDBC_STR);
    if (connStr == null)
        throw new Exception("Error: wrong param. Did you mean '" + MRConstants.SPLICE_JDBC_STR + "'?"); // TODO JL
    if (sqlUtil == null)
        sqlUtil = SMSQLUtil.getInstance(connStr);
    if (isInputJob) {
        tableName = tableProperties.getProperty(MRConstants.SPLICE_TABLE_NAME);
        if (tableName == null)
            throw new Exception("Error: wrong param. Did you mean '" + MRConstants.SPLICE_TABLE_NAME + "'?");
    } else {
        tableName = tableProperties.getProperty(MRConstants.SPLICE_TABLE_NAME);
        if (tableName == null)
            throw new Exception("Error: wrong param. Did you mean '" + MRConstants.SPLICE_TABLE_NAME + "'?");
    }

    tableName = tableName.trim();

    if (parentConn == null) {
        parentTxnId = startWriteJobParentTxn(connStr, tableName);
    }
    jobProperties.put(MRConstants.SPLICE_TRANSACTION_ID, parentTxnId);
    jobProperties.put(MRConstants.SPLICE_TABLE_NAME, tableName);
    jobProperties.put(MRConstants.SPLICE_JDBC_STR, connStr);
}

Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0

5 votes

@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
    if (log.isDebugEnabled()) {
        log.debug("Configuring job credentials for Elasticsearch");
    }
    Settings settings = new CompositeSettings(Arrays.asList(
            HadoopSettingsManager.loadFrom(tableDesc.getProperties()),
            HadoopSettingsManager.loadFrom(jobConf)
    ));
    InitializationUtils.setUserProviderIfNotSet(settings, HadoopUserProvider.class, log);
    UserProvider userProvider = UserProvider.create(settings);
    if (userProvider.isEsKerberosEnabled()) {
        User user = userProvider.getUser();
        ClusterInfo clusterInfo = settings.getClusterInfoOrNull();
        RestClient bootstrap = new RestClient(settings);
        try {
            // first get ES main action info if it's missing
            if (clusterInfo == null) {
                clusterInfo = bootstrap.mainInfo();
            }
            // Add the token to the job
            TokenUtil.addTokenForJobConf(bootstrap, clusterInfo.getClusterName(), user, jobConf);
        } catch (EsHadoopException ex) {
            throw new EsHadoopIllegalArgumentException(String.format("Cannot detect ES version - "
                    + "typically this happens if the network/Elasticsearch cluster is not accessible or when targeting "
                    + "a WAN/Cloud instance without the proper setting '%s'", ConfigurationOptions.ES_NODES_WAN_ONLY), ex);
        } finally {
            bootstrap.close();
        }
    } else {
        if (log.isDebugEnabled()) {
            log.debug("Ignoring Elasticsearch credentials since Kerberos Auth is not enabled.");
        }
    }
}

Source File: AccumuloStorageHandler.java From accumulo-hive-storage-manager with Apache License 2.0

5 votes

@Override
public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> properties) {
    Properties props = tableDesc.getProperties();
    properties.put(AccumuloSerde.COLUMN_MAPPINGS,
            props.getProperty(AccumuloSerde.COLUMN_MAPPINGS));
    properties.put(AccumuloSerde.TABLE_NAME,
            props.getProperty(AccumuloSerde.TABLE_NAME));
    String useIterators = props.getProperty(AccumuloSerde.NO_ITERATOR_PUSHDOWN);
    if(useIterators != null) {
        properties.put(AccumuloSerde.NO_ITERATOR_PUSHDOWN, useIterators);
    }
}

Source File: AccumuloStorageHandler.java From accumulo-hive-storage-manager with Apache License 2.0

5 votes

/**
 *
 * @param desc table description
 * @param jobProps
 */
@Override
public void configureTableJobProperties(TableDesc desc,
                                        Map<String, String> jobProps) {
    Properties tblProperties = desc.getProperties();
    jobProps.put(AccumuloSerde.COLUMN_MAPPINGS,
            tblProperties.getProperty(AccumuloSerde.COLUMN_MAPPINGS));
    String tableName = tblProperties.getProperty(AccumuloSerde.TABLE_NAME);
    jobProps.put(AccumuloSerde.TABLE_NAME, tableName);
    String useIterators = tblProperties.getProperty(AccumuloSerde.NO_ITERATOR_PUSHDOWN);
    if(useIterators != null) {
        jobProps.put(AccumuloSerde.NO_ITERATOR_PUSHDOWN, useIterators);
    }

}

Source File: KafkaStorageHandler.java From HiveKa with Apache License 2.0

5 votes

@Override
public void configureOutputJobProperties(TableDesc tableDesc, Map<String,
    String> jobProperties) {
  Properties tableProperties = tableDesc.getProperties();
  new KafkaBackedTableProperties().initialize(tableProperties, jobProperties, tableDesc);

}

Source File: KafkaStorageHandler.java From HiveKa with Apache License 2.0

5 votes

@Override
public void configureInputJobProperties(TableDesc tableDesc, Map<String,
    String> jobProperties) {
  Properties tableProperties = tableDesc.getProperties();
  new KafkaBackedTableProperties().initialize(tableProperties, jobProperties, tableDesc);

}

Source File: DynamoDBStorageHandler.java From emr-dynamodb-connector with Apache License 2.0

5 votes

private void useExplicitThroughputIfRequired(Map<String, String> jobProperties, TableDesc tableDesc) {
  String userRequiredReadThroughput = tableDesc.getProperties().getProperty(DynamoDBConstants.READ_THROUGHPUT);
  if (userRequiredReadThroughput != null) {
    jobProperties.put(DynamoDBConstants.READ_THROUGHPUT, userRequiredReadThroughput);
  }

  String userRequiredWriteThroughput = tableDesc.getProperties().getProperty(DynamoDBConstants.WRITE_THROUGHPUT);
  if (userRequiredWriteThroughput != null) {
    jobProperties.put(DynamoDBConstants.WRITE_THROUGHPUT, userRequiredWriteThroughput);
  }
}

Source File: DynamoDBStorageHandler.java From emr-dynamodb-connector with Apache License 2.0

5 votes

@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
  Map<String, String> jobProperties = new HashMap<>();
  configureTableJobProperties(tableDesc, jobProperties);
  for (Entry<String, String> entry : jobProperties.entrySet()) {
    jobConf.set(entry.getKey(), entry.getValue());
  }
}

Source File: AccumuloStorageHandler.java From accumulo-hive-storage-manager with Apache License 2.0

4 votes

@Override
public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> map) {
    //TODO: implement for serialization to Accumulo
}

Source File: SolrStorageHandler.java From hive-solr with MIT License

4 votes

@Override
public void configureInputJobProperties(TableDesc tbl, Map<String, String> jobProperties) {
    final Properties properties = tbl.getProperties();
    //设置属性到运行时的jobconf里面
    Conf.copyProperties(properties,jobProperties);
}

Source File: DynamoDBStorageHandler.java From emr-dynamodb-connector with Apache License 2.0

4 votes

@Override
public void configureTableJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
  DynamoDBClient client =
      new DynamoDBClient(conf, tableDesc.getProperties().getProperty(DynamoDBConstants.REGION));

  try {
    String tableName = HiveDynamoDBUtil.getDynamoDBTableName(tableDesc.getProperties()
        .getProperty(DynamoDBConstants.TABLE_NAME), tableDesc.getTableName());
    TableDescription description = client.describeTable(tableName);
    Double averageItemSize = DynamoDBUtil.calculateAverageItemSize(description);
    log.info("Average item size: " + averageItemSize);

    String endpoint = conf.get(DynamoDBConstants.ENDPOINT);
    if (!Strings.isNullOrEmpty(tableDesc.getProperties().getProperty(DynamoDBConstants
        .ENDPOINT))) {
      endpoint = tableDesc.getProperties().getProperty(DynamoDBConstants.ENDPOINT);
    }

    if (!Strings.isNullOrEmpty(endpoint)) {
      jobProperties.put(DynamoDBConstants.ENDPOINT, endpoint);
    }

    if (!Strings.isNullOrEmpty(tableDesc.getProperties().getProperty(DynamoDBConstants.REGION))) {
      jobProperties.put(DynamoDBConstants.REGION,
          tableDesc.getProperties().getProperty(DynamoDBConstants.REGION));
    }

    jobProperties.put(DynamoDBConstants.OUTPUT_TABLE_NAME, tableName);
    jobProperties.put(DynamoDBConstants.INPUT_TABLE_NAME, tableName);
    jobProperties.put(DynamoDBConstants.TABLE_NAME, tableName);

    Map<String, String> hiveToDynamoDBSchemaMapping = HiveDynamoDBUtil
        .getHiveToDynamoDBMapping(tableDesc.getProperties().getProperty(DynamoDBConstants
            .DYNAMODB_COLUMN_MAPPING));

    // Column map can be null if only full backup is being used
    if (hiveToDynamoDBSchemaMapping != null) {
      jobProperties.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, HiveDynamoDBUtil
          .toJsonString(hiveToDynamoDBSchemaMapping));
    }

    Map<String, String> hiveToDynamoDBTypeMapping = HiveDynamoDBUtil
        .getHiveToDynamoDBMapping(tableDesc.getProperties().getProperty(DynamoDBConstants
            .DYNAMODB_TYPE_MAPPING));

    if (hiveToDynamoDBSchemaMapping != null) {
      jobProperties.put(DynamoDBConstants.DYNAMODB_TYPE_MAPPING, HiveDynamoDBUtil
          .toJsonString(hiveToDynamoDBTypeMapping));
    }

    boolean hiveToDynamoDBNullSerialization = Boolean
        .parseBoolean(tableDesc.getProperties().getProperty(DynamoDBConstants.DYNAMODB_NULL_SERIALIZATION));
    jobProperties.put(DynamoDBConstants.DYNAMODB_NULL_SERIALIZATION,
        Boolean.toString(hiveToDynamoDBNullSerialization));

    if (tableDesc.getProperties().getProperty(DynamoDBConstants.THROUGHPUT_READ_PERCENT)
        != null) {
      jobProperties.put(DynamoDBConstants.THROUGHPUT_READ_PERCENT, tableDesc.getProperties()
          .getProperty(DynamoDBConstants.THROUGHPUT_READ_PERCENT));
    }

    if (tableDesc.getProperties().getProperty(DynamoDBConstants.THROUGHPUT_WRITE_PERCENT)
        != null) {
      jobProperties.put(DynamoDBConstants.THROUGHPUT_WRITE_PERCENT, tableDesc.getProperties()
          .getProperty(DynamoDBConstants.THROUGHPUT_WRITE_PERCENT));
    }

    if (description.getBillingModeSummary() == null
        || description.getBillingModeSummary().getBillingMode()
        .equals(DynamoDBConstants.BILLING_MODE_PROVISIONED)) {
      useExplicitThroughputIfRequired(jobProperties, tableDesc);
    } else {
      // If not specified at the table level, set default value
      jobProperties.put(DynamoDBConstants.READ_THROUGHPUT, tableDesc.getProperties()
          .getProperty(DynamoDBConstants.READ_THROUGHPUT,
              DynamoDBConstants.DEFAULT_CAPACITY_FOR_ON_DEMAND.toString()));
      jobProperties.put(DynamoDBConstants.WRITE_THROUGHPUT, tableDesc.getProperties()
          .getProperty(DynamoDBConstants.WRITE_THROUGHPUT,
              DynamoDBConstants.DEFAULT_CAPACITY_FOR_ON_DEMAND.toString()));
    }

    jobProperties.put(DynamoDBConstants.ITEM_COUNT, description.getItemCount().toString());
    jobProperties.put(DynamoDBConstants.TABLE_SIZE_BYTES, description.getTableSizeBytes()
        .toString());
    jobProperties.put(DynamoDBConstants.AVG_ITEM_SIZE, averageItemSize.toString());

    log.info("Average item size: " + averageItemSize);
    log.info("Item count: " + description.getItemCount());
    log.info("Table size: " + description.getTableSizeBytes());
    log.info("Read throughput: " + jobProperties.get(DynamoDBConstants.READ_THROUGHPUT));
    log.info("Write throughput: " + jobProperties.get(DynamoDBConstants.WRITE_THROUGHPUT));

  } finally {
    client.close();
  }
}

Source File: DynamoDBStorageHandler.java From emr-dynamodb-connector with Apache License 2.0

4 votes

@Override
public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
  configureTableJobProperties(tableDesc, jobProperties);
}

Source File: DynamoDBStorageHandler.java From emr-dynamodb-connector with Apache License 2.0

4 votes

@Override
public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
  configureTableJobProperties(tableDesc, jobProperties);
}

Source File: JdbcStorageHandler.java From HiveJdbcStorageHandler with Apache License 2.0

4 votes

@Override
public void configureTableJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    configureJobProperties(tableDesc, jobProperties);
}

Source File: JdbcStorageHandler.java From HiveJdbcStorageHandler with Apache License 2.0

4 votes

@Override
public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    configureJobProperties(tableDesc, jobProperties);
}

Source File: JdbcStorageHandler.java From HiveJdbcStorageHandler with Apache License 2.0

4 votes

@Override
public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    configureJobProperties(tableDesc, jobProperties);
}

Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0

4 votes

@Override
@Deprecated
public void configureTableJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    throw new UnsupportedOperationException();
}

Source File: TestHoodieCombineHiveInputFormat.java From hudi with Apache License 2.0

4 votes

@Test
@Disabled
public void testHoodieRealtimeCombineHoodieInputFormat() throws Exception {

  Configuration conf = new Configuration();
  // initial commit
  Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema());
  HoodieTestUtils.init(hadoopConf, tempDir.toAbsolutePath().toString(), HoodieTableType.MERGE_ON_READ);
  String commitTime = "100";
  final int numRecords = 1000;
  // Create 3 parquet files with 1000 records each
  File partitionDir = InputFormatTestUtil.prepareParquetTable(tempDir, schema, 3, numRecords, commitTime);
  InputFormatTestUtil.commit(tempDir, commitTime);

  // insert 1000 update records to log file 0
  String newCommitTime = "101";
  HoodieLogFormat.Writer writer =
      InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid0", commitTime, newCommitTime,
          numRecords, numRecords, 0);
  writer.close();
  // insert 1000 update records to log file 1
  writer =
      InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid1", commitTime, newCommitTime,
          numRecords, numRecords, 0);
  writer.close();
  // insert 1000 update records to log file 2
  writer =
      InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid2", commitTime, newCommitTime,
          numRecords, numRecords, 0);
  writer.close();

  TableDesc tblDesc = Utilities.defaultTd;
  // Set the input format
  tblDesc.setInputFileFormatClass(HoodieCombineHiveInputFormat.class);
  PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
  LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
  pt.put(new Path(tempDir.toAbsolutePath().toString()), partDesc);
  MapredWork mrwork = new MapredWork();
  mrwork.getMapWork().setPathToPartitionInfo(pt);
  Path mapWorkPath = new Path(tempDir.toAbsolutePath().toString());
  Utilities.setMapRedWork(conf, mrwork, mapWorkPath);
  jobConf = new JobConf(conf);
  // Add the paths
  FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
  jobConf.set(HAS_MAP_WORK, "true");
  // The following config tells Hive to choose ExecMapper to read the MAP_WORK
  jobConf.set(MAPRED_MAPPER_CLASS, ExecMapper.class.getName());
  // setting the split size to be 3 to create one split for 3 file groups
  jobConf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MAXSIZE, "3");

  HoodieCombineHiveInputFormat combineHiveInputFormat = new HoodieCombineHiveInputFormat();
  String tripsHiveColumnTypes = "double,string,string,string,double,double,double,double,double";
  InputFormatTestUtil.setPropsForInputFormat(jobConf, schema, tripsHiveColumnTypes);
  InputSplit[] splits = combineHiveInputFormat.getSplits(jobConf, 1);
  // Since the SPLIT_SIZE is 3, we should create only 1 split with all 3 file groups
  assertEquals(1, splits.length);
  RecordReader<NullWritable, ArrayWritable> recordReader =
      combineHiveInputFormat.getRecordReader(splits[0], jobConf, null);
  NullWritable nullWritable = recordReader.createKey();
  ArrayWritable arrayWritable = recordReader.createValue();
  int counter = 0;
  while (recordReader.next(nullWritable, arrayWritable)) {
    // read over all the splits
    counter++;
  }
  // should read out 3 splits, each for file0, file1, file2 containing 1000 records each
  assertEquals(3000, counter);
}

Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0

4 votes

@Override
public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    init(tableDesc, false);
    copyToJobProperties(jobProperties, tableDesc.getProperties());
    setUserProviderIfNotSet(jobProperties);
}

Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0

4 votes

@Override
public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    init(tableDesc, true);
    copyToJobProperties(jobProperties, tableDesc.getProperties());
    setUserProviderIfNotSet(jobProperties);
}

Source File: LWStorageHandler.java From hive-solr with Apache License 2.0

4 votes

@Override
public void configureInputJobProperties(TableDesc tableDesc,
        Map<String, String> jobProperties) {

  setProperties(tableDesc.getProperties(), jobProperties);
}

Source File: KuduStorageHandler.java From HiveKudu-Handler with Apache License 2.0

4 votes

@Override
public void configureInputJobProperties(TableDesc tableDesc,
                                        Map<String, String> jobProperties) {
    configureJobProperties(tableDesc, jobProperties);
}

Source File: KuduStorageHandler.java From HiveKudu-Handler with Apache License 2.0

4 votes

@Override
public void configureOutputJobProperties(TableDesc tableDesc,
                                         Map<String, String> jobProperties) {
    configureJobProperties(tableDesc, jobProperties);
}

Source File: CassandraStorageHandler.java From Hive-Cassandra with Apache License 2.0

4 votes

public void configureOutputJobProperties(TableDesc inTableDesc, Map<String, String> inStringStringMap) {
    configureTableJobProperties(inTableDesc, inStringStringMap);
}

org.apache.hadoop.hive.ql.plan.TableDesc Java Examples