org.apache.hive.hcatalog.data.HCatRecord Java Exaples

Source File: FastIndexMapper.java From ES-Fastloader with Apache License 2.0

6 votes

@Override
protected void map(Object key, HCatRecord value, Context context) throws IOException, InterruptedException {
    DefaultHCatRecord hCatRecord = (DefaultHCatRecord) value;
    int shardNo;

    List<String> keyList = taskConfig.getKeyList();
    if(keyList==null || keyList.size()==0) {
        shardNo = (int) (Math.random()*templateConfig.getReducerNum());
    } else {
        String keyStr = getKeyValue(keyList, hCatRecord);
        shardNo = CommonUtils.getShardId(keyStr, templateConfig.getReducerNum());
    }

    //shard分片个数与reduce个数一样
    context.write(new IntWritable(shardNo), hCatRecord);
}

Source File: HCatalogIO.java From beam with Apache License 2.0

6 votes

@Override
@SuppressWarnings("deprecation")
public PCollection<HCatRecord> expand(PBegin input) {
  checkArgument(getTable() != null, "withTable() is required");
  checkArgument(getConfigProperties() != null, "withConfigProperties() is required");
  Watch.Growth<Read, Integer, Integer> growthFn;
  if (getPollingInterval() != null) {
    growthFn = Watch.growthOf(new PartitionPollerFn()).withPollInterval(getPollingInterval());
    if (getTerminationCondition() != null) {
      growthFn = growthFn.withTerminationPerInput(getTerminationCondition());
    }
    return input
        .apply("ConvertToReadRequest", Create.of(this))
        .apply("WatchForNewPartitions", growthFn)
        .apply("PartitionReader", ParDo.of(new PartitionReaderFn(getConfigProperties())));
  } else {
    // Treat as Bounded
    checkArgument(
        getTerminationCondition() == null,
        "withTerminationCondition() is not required when using in bounded reads mode");
    return input.apply(org.apache.beam.sdk.io.Read.from(new BoundedHCatalogSource(this)));
  }
}

Source File: IIDistinctColumnsMapper.java From Kylin with Apache License 2.0

6 votes

@Override
public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {

    HCatFieldSchema fieldSchema = null;
    for (short i = 0; i < columnSize; i++) {
        outputKey.set(i);
        fieldSchema = schema.get(i);
        Object fieldValue = record.get(fieldSchema.getName(), schema);
        if (fieldValue == null)
            continue;
        byte[] bytes = Bytes.toBytes(fieldValue.toString());
        outputValue.set(bytes, 0, bytes.length);
        context.write(outputKey, outputValue);
    }

}

Source File: ColumnCardinalityMapper.java From Kylin with Apache License 2.0

6 votes

@Override
public void map(T key, HCatRecord value, Context context) throws IOException, InterruptedException {

    HCatFieldSchema field;
    Object fieldValue;
    for (int m = 0; m < columnSize; m++) {
        field = schema.get(m);
        fieldValue = value.get(field.getName(), schema);
        if (fieldValue == null)
            fieldValue = "NULL";
        
        if (counter < 5 && m < 10) {
            System.out.println("Get row " + counter + " column '" + field.getName() + "'  value: " + fieldValue);
        }

        if (fieldValue != null)
            getHllc(m).add(Bytes.toBytes(fieldValue.toString()));
    }

    counter++;
}

Source File: HCatalogIOTest.java From beam with Apache License 2.0

6 votes

/** Test of Read using SourceTestUtils.assertSourcesEqualReferenceSource(..). */
@Test
@NeedsTestData
public void testSourceEqualsSplits() throws Exception {
  final int numRows = 1500;
  final int numSamples = 10;
  final long bytesPerRow = 15;
  ReaderContext context = getReaderContext(getConfigPropertiesAsMap(service.getHiveConf()));
  HCatalogIO.Read spec =
      HCatalogIO.read()
          .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
          .withContext(context)
          .withTable(TEST_TABLE);

  BoundedHCatalogSource source = new BoundedHCatalogSource(spec);
  List<BoundedSource<HCatRecord>> unSplitSource = source.split(-1, OPTIONS);
  assertEquals(1, unSplitSource.size());

  List<BoundedSource<HCatRecord>> splits =
      source.split(numRows * bytesPerRow / numSamples, OPTIONS);
  assertTrue(splits.size() >= 1);

  SourceTestUtils.assertSourcesEqualReferenceSource(unSplitSource.get(0), splits, OPTIONS);
}

Source File: TableDataBuilderTest.java From HiveRunner with Apache License 2.0

6 votes

@Test
public void testPartitionedSimple() {
  HCatTable table = table().cols(columns(COLUMN_1)).partCols(columns(PARTITION_COLUMN_1));

  Multimap<Map<String, String>, HCatRecord> data = new TableDataBuilder(table)
      .addRow("value", "partition_value")
      .build();

  assertEquals(1, data.size());

  Map<String, String> partitionSpec = new HashMap<>();
  partitionSpec.put(PARTITION_COLUMN_1, "partition_value");

  Collection<HCatRecord> rows = data.get(partitionSpec);
  assertEquals(1, rows.size());
  HCatRecord row = rows.iterator().next();
  assertEquals(Arrays.asList((Object) "value", "partition_value"), row.getAll());
}

Source File: HCatalogIOTest.java From beam with Apache License 2.0

6 votes

/** Test of Read using SourceTestUtils.readFromSource(..). */
@Test
@NeedsTestData
public void testReadFromSource() throws Exception {
  ReaderContext context = getReaderContext(getConfigPropertiesAsMap(service.getHiveConf()));
  HCatalogIO.Read spec =
      HCatalogIO.read()
          .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
          .withContext(context)
          .withTable(TEST_TABLE);

  List<String> records = new ArrayList<>();
  for (int i = 0; i < context.numSplits(); i++) {
    BoundedHCatalogSource source = new BoundedHCatalogSource(spec.withSplitId(i));
    for (HCatRecord record : SourceTestUtils.readFromSource(source, OPTIONS)) {
      records.add(record.get(0).toString());
    }
  }
  assertThat(records, containsInAnyOrder(getExpectedRecords(TEST_RECORDS_COUNT).toArray()));
}

Source File: FactDistinctColumnsMapper.java From Kylin with Apache License 2.0

6 votes

@Override
public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {

    try {

        int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
        HCatFieldSchema fieldSchema = null;
        for (int i : factDictCols) {
            outputKey.set((short) i);
            fieldSchema = schema.get(flatTableIndexes[i]);
            Object fieldValue = record.get(fieldSchema.getName(), schema);
            if (fieldValue == null)
                continue;
            byte[] bytes = Bytes.toBytes(fieldValue.toString());
            outputValue.set(bytes, 0, bytes.length);
            context.write(outputKey, outputValue);
        }
    } catch (Exception ex) {
        handleErrorRecord(record, ex);
    }

}

Source File: HCatalogIO.java From beam with Apache License 2.0

6 votes

/**
 * Calculates the 'desired' number of splits based on desiredBundleSizeBytes which is passed as
 * a hint to native API. Retrieves the actual splits generated by native API, which could be
 * different from the 'desired' split count calculated using desiredBundleSizeBytes
 */
@Override
public List<BoundedSource<HCatRecord>> split(
    long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
  int desiredSplitCount = 1;
  long estimatedSizeBytes = getEstimatedSizeBytes(options);
  if (desiredBundleSizeBytes > 0 && estimatedSizeBytes > 0) {
    desiredSplitCount = (int) Math.ceil((double) estimatedSizeBytes / desiredBundleSizeBytes);
  }
  ReaderContext readerContext = getReaderContext(desiredSplitCount);
  // process the splits returned by native API
  // this could be different from 'desiredSplitCount' calculated above
  LOG.info(
      "Splitting into bundles of {} bytes: "
          + "estimated size {}, desired split count {}, actual split count {}",
      desiredBundleSizeBytes,
      estimatedSizeBytes,
      desiredSplitCount,
      readerContext.numSplits());

  List<BoundedSource<HCatRecord>> res = new ArrayList<>();
  for (int split = 0; split < readerContext.numSplits(); split++) {
    res.add(new BoundedHCatalogSource(spec.withContext(readerContext).withSplitId(split)));
  }
  return res;
}

Source File: HCatalogTestUtils.java From aliyun-maxcompute-data-collectors with Apache License 2.0

6 votes

private List<HCatRecord> generateHCatRecords(int numRecords,
  HCatSchema hCatTblSchema, ColumnGenerator... extraCols) throws Exception {
  List<HCatRecord> records = new ArrayList<HCatRecord>();
  List<HCatFieldSchema> hCatTblCols = hCatTblSchema.getFields();
  int size = hCatTblCols.size();
  for (int i = 0; i < numRecords; ++i) {
    DefaultHCatRecord record = new DefaultHCatRecord(size);
    record.set(hCatTblCols.get(0).getName(), hCatTblSchema, i);
    record.set(hCatTblCols.get(1).getName(), hCatTblSchema, "textfield" + i);
    int idx = 0;
    for (int j = 0; j < extraCols.length; ++j) {
      if (extraCols[j].getKeyType() == KeyType.STATIC_KEY) {
        continue;
      }
      record.set(hCatTblCols.get(idx + 2).getName(), hCatTblSchema,
        extraCols[j].getHCatValue(i));
      ++idx;
    }

    records.add(record);
  }
  return records;
}

Source File: TableDataInserter.java From HiveRunner with Apache License 2.0

6 votes

private void insert(Map<String, String> partitionSpec, Iterable<HCatRecord> rows) {
  WriteEntity entity = new WriteEntity.Builder()
      .withDatabase(databaseName)
      .withTable(tableName)
      .withPartition(partitionSpec)
      .build();

  try {
    HCatWriter master = DataTransferFactory.getHCatWriter(entity, config);
    WriterContext context = master.prepareWrite();
    HCatWriter writer = DataTransferFactory.getHCatWriter(context);
    writer.write(rows.iterator());
    master.commit(context);
  } catch (HCatException e) {
    throw new RuntimeException("An error occurred while inserting data to " + databaseName + "." + tableName, e);
  }
}

Source File: InvertedIndexMapper.java From Kylin with Apache License 2.0

5 votes

@Override
public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {

    rec.reset();
    for (int i = 0; i < fields.size(); i++) {
        Object fieldValue = record.get(i);
        rec.setValueString(i, fieldValue == null? null : fieldValue.toString());
    }

    outputKey.set(rec.getTimestamp());
    // outputValue's backing bytes array is the same as rec

    context.write(outputKey, outputValue);
}

Source File: TableDataBuilderTest.java From HiveRunner with Apache License 2.0

5 votes

@Test
public void testUnpartitionedEmptyRow() {
  HCatTable table = table().cols(columns(COLUMN_1));

  Multimap<Map<String, String>, HCatRecord> data = new TableDataBuilder(table).newRow().build();

  assertEquals(1, data.size());
  Iterator<HCatRecord> iterator = data.values().iterator();
  HCatRecord row = iterator.next();
  assertEquals(Arrays.asList((Object) null), row.getAll());
}

Source File: PartitionReaderFn.java From beam with Apache License 2.0

5 votes

@ProcessElement
public void processElement(ProcessContext c) throws Exception {
  final Read readRequest = c.element().getKey();
  final Integer partitionIndexToRead = c.element().getValue();
  ReaderContext readerContext = getReaderContext(readRequest, partitionIndexToRead);
  for (int i = 0; i < readerContext.numSplits(); i++) {
    HCatReader reader = DataTransferFactory.getHCatReader(readerContext, i);
    Iterator<HCatRecord> hcatIterator = reader.read();
    while (hcatIterator.hasNext()) {
      final HCatRecord record = hcatIterator.next();
      c.output(record);
    }
  }
}

Source File: HCatalogTestUtils.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

public String hCatRecordDump(List<HCatRecord> recs,
  HCatSchema schema) throws Exception {
  List<String> fields = schema.getFieldNames();
  int count = 0;
  StringBuilder sb = new StringBuilder(1024);
  for (HCatRecord rec : recs) {
    sb.append("HCat Record : " + ++count).append('\n');
    for (String field : fields) {
      sb.append('\t').append(field).append('=');
      sb.append(rec.get(field, schema)).append('\n');
      sb.append("\n\n");
    }
  }
  return sb.toString();
}

Source File: NetezzaExternalTableHCatExportMapper.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

@Override
public void map(LongWritable key, HCatRecord hcr, Context context)
  throws IOException, InterruptedException {
  SqoopRecord sqr = helper.convertToSqoopRecord(hcr);
  writeSqoopRecord(sqr);
  context.progress();
}

Source File: HiveTableReader.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public static String[] getRowAsStringArray(HCatRecord record) {
    String[] arr = new String[record.size()];
    for (int i = 0; i < arr.length; i++) {
        Object o = record.get(i);
        arr[i] = (o == null || "\\N".equals(o)) ? null : o.toString();
    }
    return arr;
}

Source File: TableDataBuilderTest.java From HiveRunner with Apache License 2.0

5 votes

@Test
public void testUnpartitionedWithColumnMask() {
  HCatTable table = table().cols(columns(COLUMN_1));

  Multimap<Map<String, String>, HCatRecord> data = new TableDataBuilder(table)
      .withColumns(COLUMN_1)
      .addRow("value")
      .build();

  assertEquals(1, data.size());
  Iterator<HCatRecord> iterator = data.values().iterator();
  HCatRecord row = iterator.next();
  assertEquals(Arrays.asList((Object) "value"), row.getAll());
}

Source File: TableDataBuilderTest.java From HiveRunner with Apache License 2.0

5 votes

@Test
public void testPartitionedMultiplePartitionsAndRows() {
  HCatTable table = table().cols(columns(COLUMN_1)).partCols(columns(PARTITION_COLUMN_1));

  Multimap<Map<String, String>, HCatRecord> data = new TableDataBuilder(table)
      .addRow("value1", "partition_value1")
      .addRow("value2", "partition_value1")
      .addRow("value3", "partition_value2")
      .addRow("value4", "partition_value2")
      .build();

  assertEquals(4, data.size());

  Map<String, String> partitionSpec = new HashMap<>();
  partitionSpec.put(PARTITION_COLUMN_1, "partition_value1");

  Collection<HCatRecord> rows = data.get(partitionSpec);
  assertEquals(2, rows.size());
  Iterator<HCatRecord> iterator = rows.iterator();
  HCatRecord row = iterator.next();
  assertEquals(Arrays.asList((Object) "value1", "partition_value1"), row.getAll());
  row = iterator.next();
  assertEquals(Arrays.asList((Object) "value2", "partition_value1"), row.getAll());

  partitionSpec = new HashMap<>();
  partitionSpec.put(PARTITION_COLUMN_1, "partition_value2");

  rows = data.get(partitionSpec);
  assertEquals(2, rows.size());
  iterator = rows.iterator();
  row = iterator.next();
  assertEquals(Arrays.asList((Object) "value3", "partition_value2"), row.getAll());
  row = iterator.next();
  assertEquals(Arrays.asList((Object) "value4", "partition_value2"), row.getAll());
}

Source File: TableDataBuilder.java From HiveRunner with Apache License 2.0

5 votes

TableDataBuilder copyRow() {
  checkState(row != null, "No previous row to copy.");
  HCatRecord copy = new DefaultHCatRecord(new ArrayList<>(row.getAll()));
  flushRow();
  row = copy;
  return this;
}

Source File: HCatalogIOTestUtils.java From beam with Apache License 2.0

5 votes

/** Returns a list of HCatRecords of passed size. */
public static List<HCatRecord> buildHCatRecords(int size) {
  List<HCatRecord> expected = new ArrayList<>();
  for (int i = 0; i < size; i++) {
    expected.add(toHCatRecord(i));
  }
  return expected;
}

Source File: FactDistinctColumnsMapper.java From Kylin with Apache License 2.0

5 votes

private void handleErrorRecord(HCatRecord record, Exception ex) throws IOException {

        System.err.println("Insane record: " + record.getAll());
        ex.printStackTrace(System.err);

        errorRecordCounter++;
        if (errorRecordCounter > BatchConstants.ERROR_RECORD_THRESHOLD) {
            if (ex instanceof IOException)
                throw (IOException) ex;
            else if (ex instanceof RuntimeException)
                throw (RuntimeException) ex;
            else
                throw new RuntimeException("", ex);
        }
    }

Source File: HCatInputFormatBase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public T nextRecord(T record) throws IOException {
	if (!this.fetched) {
		// first record
		fetchNext();
	}
	if (!this.hasNext) {
		return null;
	}
	try {

		// get next HCatRecord
		HCatRecord v = this.recordReader.getCurrentValue();
		this.fetched = false;

		if (this.fieldNames.length > 0) {
			// return as Flink tuple
			return this.buildFlinkTuple(record, v);

		} else {
			// return as HCatRecord
			return (T) v;
		}

	} catch (InterruptedException e) {
		throw new IOException("Could not get next record.", e);
	}
}

Source File: HCatalogIO.java From beam with Apache License 2.0

5 votes

@Override
public HCatRecord getCurrent() {
  if (current == null) {
    throw new NoSuchElementException("Current element is null");
  }
  return current;
}

Source File: HCatalogIO.java From beam with Apache License 2.0

5 votes

@Override
public PDone expand(PCollection<HCatRecord> input) {
  checkArgument(getConfigProperties() != null, "withConfigProperties() is required");
  checkArgument(getTable() != null, "withTable() is required");
  input.apply(ParDo.of(new WriteFn(this)));
  return PDone.in(input.getPipeline());
}

Source File: HCatalogIOTest.java From beam with Apache License 2.0

5 votes

/** Perform end-to-end test of Write-then-Read operation. */
@Test
@NeedsEmptyTestTables
public void testWriteThenReadSuccess() {
  defaultPipeline
      .apply(Create.of(buildHCatRecords(TEST_RECORDS_COUNT)))
      .apply(
          HCatalogIO.write()
              .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
              .withDatabase(TEST_DATABASE)
              .withTable(TEST_TABLE)
              .withPartition(new java.util.HashMap<>())
              .withBatchSize(512L));
  defaultPipeline.run();

  PCollection<String> output =
      readAfterWritePipeline
          .apply(
              HCatalogIO.read()
                  .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
                  .withDatabase(TEST_DATABASE)
                  .withTable(TEST_TABLE)
                  .withFilter(TEST_FILTER))
          .apply(
              ParDo.of(
                  new DoFn<HCatRecord, String>() {
                    @ProcessElement
                    public void processElement(ProcessContext c) {
                      c.output(c.element().get(0).toString());
                    }
                  }));
  PAssert.that(output).containsInAnyOrder(getExpectedRecords(TEST_RECORDS_COUNT));
  readAfterWritePipeline.run();
}

Source File: HiveTableReader.java From kylin with Apache License 2.0

5 votes

public static List<String> getRowAsList(HCatRecord record, List<String> rowValues) {
    List<Object> allFields = record.getAll();
    for (Object o : allFields) {
        rowValues.add((o == null) ? null : o.toString());
    }
    return rowValues;
}

Source File: HiveTableReader.java From kylin with Apache License 2.0

5 votes

public static String[] getRowAsStringArray(HCatRecord record) {
    String[] arr = new String[record.size()];
    for (int i = 0; i < arr.length; i++) {
        Object o = record.get(i);
        arr[i] = (o == null || "\\N".equals(o)) ? null : o.toString();
    }
    return arr;
}

Source File: HCatInputFormatBase.java From flink with Apache License 2.0

5 votes

@Override
public T nextRecord(T record) throws IOException {
	if (!this.fetched) {
		// first record
		fetchNext();
	}
	if (!this.hasNext) {
		return null;
	}
	try {

		// get next HCatRecord
		HCatRecord v = this.recordReader.getCurrentValue();
		this.fetched = false;

		if (this.fieldNames.length > 0) {
			// return as Flink tuple
			return this.buildFlinkTuple(record, v);

		} else {
			// return as HCatRecord
			return (T) v;
		}

	} catch (InterruptedException e) {
		throw new IOException("Could not get next record.", e);
	}
}

Source File: FactDistinctColumnsMapperTest.java From kylin with Apache License 2.0

5 votes

@Test
public void testMapper() throws IOException {
    Configuration configuration = mapDriver.getConfiguration();
    configuration.set(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, "100");
    configuration.set(BatchConstants.CFG_CUBE_NAME, "test_kylin_cube_with_slr_1_new_segment");
    configuration.set(BatchConstants.CFG_CUBE_SEGMENT_ID, "198va32a-a33e-4b69-83dd-0bb8b1f8c53b");
    HCatRecord value1 = new DefaultHCatRecord(11);
    value1.set(0, "2012-08-16");
    value1.set(1, "48027");
    value1.set(2, "0");
    value1.set(3, "Home & Garden");
    value1.set(4, "Cheese & Crackers");
    value1.set(5, "Cheese & Crackers");
    value1.set(6, "48027");
    value1.set(7, "16");
    value1.set(8, "10000010");
    value1.set(9, "204.28");
    value1.set(10, "5");
    mapDriver.addInput(new LongWritable(0), value1);

    List<Pair<SelfDefineSortableKey, Text>> result = mapDriver.run();
    int colsNeedDictSize = cubeDesc.getAllColumnsNeedDictionaryBuilt().size();
    int cuboidsCnt = cubeDesc.getAllCuboids().size();

    assertEquals(
            colsNeedDictSize + (cubeDesc.getRowkey().getRowKeyColumns().length - colsNeedDictSize) * 2 + cuboidsCnt,
            result.size());
}

org.apache.hive.hcatalog.data.HCatRecord Java Examples