org.apache.hadoop.hive.ql.exec.vector.StructColumnVector Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.exec.vector.StructColumnVector.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrcWriter.java From osm2orc with ISC License | 6 votes |
@Override public void process(WayContainer container) { DecimalColumnVector lat = (DecimalColumnVector) batch.cols[3]; DecimalColumnVector lon = (DecimalColumnVector) batch.cols[4]; ListColumnVector nds = (ListColumnVector) batch.cols[5]; checkLimit(); addCommonProperties(container); lat.isNull[row] = true; lon.isNull[row] = true; lat.set(row, (HiveDecimal) null); lon.set(row, (HiveDecimal) null); Way way = container.getEntity(); nds.lengths[row] = way.getWayNodes().size(); nds.childCount += nds.lengths[row]; nds.child.ensureSize(nds.childCount, nds.offsets[row] != 0); for (int j = 0; j < way.getWayNodes().size(); j++) { StructColumnVector ndsStruct = (StructColumnVector) nds.child; ((LongColumnVector) ndsStruct.fields[0]).vector[(int) nds.offsets[row] + j] = way.getWayNodes().get(j).getNodeId(); } }
Example #2
Source File: HiveORCCopiers.java From dremio-oss with Apache License 2.0 | 6 votes |
StructCopier(HiveColumnVectorData columnVectorData, int ordinalId, StructColumnVector inputVector, StructVector outputVector, HiveOperatorContextOptions operatorContextOptions) { this.inputVector = inputVector; this.outputVector = outputVector; int fieldCount = inputVector.fields.length; int arrowIdx = 0; int childPos = ordinalId + 1; // first child is immediately next to struct vector itself for (int idx=0; idx<fieldCount; ++idx) { if (columnVectorData.isColumnVectorIncluded(childPos)) { ValueVector arrowElementVector = outputVector.getVectorById(arrowIdx); ColumnVector hiveElementVector = inputVector.fields[idx]; ORCCopier childCopier = createCopier(columnVectorData, childPos, arrowElementVector, hiveElementVector, operatorContextOptions); fieldCopiers.add(childCopier); arrowIdx++; } else { fieldCopiers.add(new NoOpCopier(null, null)); } childPos += columnVectorData.getTotalVectorCount(childPos); } }
Example #3
Source File: HiveORCCopiers.java From dremio-oss with Apache License 2.0 | 6 votes |
StructCopier(HiveColumnVectorData columnVectorData, int ordinalId, StructColumnVector inputVector, StructVector outputVector, HiveOperatorContextOptions operatorContextOptions) { this.inputVector = inputVector; this.outputVector = outputVector; int fieldCount = inputVector.fields.length; int arrowIdx = 0; int childPos = ordinalId + 1; // first child is immediately next to struct vector itself for (int idx=0; idx<fieldCount; ++idx) { if (columnVectorData.isColumnVectorIncluded(childPos)) { ValueVector arrowElementVector = outputVector.getVectorById(arrowIdx); ColumnVector hiveElementVector = inputVector.fields[idx]; ORCCopier childCopier = createCopier(columnVectorData, childPos, arrowElementVector, hiveElementVector, operatorContextOptions); fieldCopiers.add(childCopier); arrowIdx++; } else { fieldCopiers.add(new NoOpCopier(null, null)); } childPos += columnVectorData.getTotalVectorCount(childPos); } }
Example #4
Source File: HiveORCVectorizedReader.java From dremio-oss with Apache License 2.0 | 5 votes |
private ColumnVector[] createTransactionalVectors(ColumnVector[] dataVectors) { ColumnVector[] transVectors = new ColumnVector[6]; transVectors[0] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); transVectors[1] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); transVectors[2] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); transVectors[3] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); transVectors[4] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); transVectors[5] = new StructColumnVector(dataVectors.length, dataVectors); return transVectors; }
Example #5
Source File: VectorColumnFiller.java From secor with Apache License 2.0 | 5 votes |
public void convert(JsonElement value, ColumnVector vect, int row) { if (value == null || value.isJsonNull()) { vect.noNulls = false; vect.isNull[row] = true; } else { StructColumnVector vector = (StructColumnVector) vect; JsonObject obj = value.getAsJsonObject(); for (int c = 0; c < childrenConverters.length; ++c) { JsonElement elem = obj.get(fieldNames.get(c)); childrenConverters[c].convert(elem, vector.fields[c], row); } } }
Example #6
Source File: JsonFieldFiller.java From secor with Apache License 2.0 | 5 votes |
private static void setStruct(JSONWriter writer, StructColumnVector batch, TypeDescription schema, int row) throws JSONException { writer.object(); List<String> fieldNames = schema.getFieldNames(); List<TypeDescription> fieldTypes = schema.getChildren(); for (int i = 0; i < fieldTypes.size(); ++i) { writer.key(fieldNames.get(i)); setValue(writer, batch.fields[i], fieldTypes.get(i), row); } writer.endObject(); }
Example #7
Source File: OrcBatchReader.java From flink with Apache License 2.0 | 5 votes |
private static void readNonNullStructColumn(Object[] vals, int fieldIdx, StructColumnVector structVector, TypeDescription schema, int childCount) { List<TypeDescription> childrenTypes = schema.getChildren(); int numFields = childrenTypes.size(); // create a batch of Rows to read the structs Row[] structs = new Row[childCount]; // TODO: possible improvement: reuse existing Row objects for (int i = 0; i < childCount; i++) { structs[i] = new Row(numFields); } // read struct fields // we don't have to handle isRepeating because ORC assumes that it is propagated into the children. for (int i = 0; i < numFields; i++) { readField(structs, i, childrenTypes.get(i), structVector.fields[i], childCount); } if (fieldIdx == -1) { // set struct as an object System.arraycopy(structs, 0, vals, 0, childCount); } else { // set struct as a field of Row Row[] rows = (Row[]) vals; for (int i = 0; i < childCount; i++) { rows[i].setField(fieldIdx, structs[i]); } } }
Example #8
Source File: OrcWriter.java From osm2orc with ISC License | 5 votes |
@Override public void process(RelationContainer container) { DecimalColumnVector lat = (DecimalColumnVector) batch.cols[3]; DecimalColumnVector lon = (DecimalColumnVector) batch.cols[4]; ListColumnVector members = (ListColumnVector) batch.cols[6]; checkLimit(); addCommonProperties(container); lat.isNull[row] = true; lon.isNull[row] = true; lat.set(row, (HiveDecimal) null); lon.set(row, (HiveDecimal) null); Relation relation = container.getEntity(); members.lengths[row] = relation.getMembers().size(); members.childCount += members.lengths[row]; members.child.ensureSize(members.childCount, members.offsets[row] != 0); for (int j = 0; j < relation.getMembers().size(); j++) { StructColumnVector membersStruct = (StructColumnVector) members.child; ((BytesColumnVector) membersStruct.fields[0]).setVal((int) members.offsets[row] + j, relation.getMembers().get(j).getMemberType().toString().toLowerCase().getBytes()); ((LongColumnVector) membersStruct.fields[1]).vector[(int) members.offsets[row] + j] = relation.getMembers().get(j).getMemberId(); ((BytesColumnVector) membersStruct.fields[2]).setVal((int) members.offsets[row] + j, relation.getMembers().get(j).getMemberRole().getBytes()); } }
Example #9
Source File: HiveORCCopiers.java From dremio-oss with Apache License 2.0 | 5 votes |
/** * Helper method to create {@link ORCCopier}s based on given input, output vector types and projected column ordinals. * * @param projectedColOrdinals ordinals of the columns that we are interested in reading from the file. * @param output * @param input * @return */ public static ORCCopier[] createCopiers(final HiveColumnVectorData columnVectorData, final List<Integer> projectedColOrdinals, int[] ordinalIdsFromOrcFile, final ValueVector[] output, final VectorizedRowBatch input, boolean isOriginal, HiveOperatorContextOptions operatorContextOptions) { final int numColumns = output.length; final ORCCopier[] copiers = new ORCCopier[numColumns]; final ColumnVector[] cols = isOriginal ? input.cols : ((StructColumnVector) input.cols[HiveORCVectorizedReader.TRANS_ROW_COLUMN_INDEX]).fields; for (int i = 0; i < numColumns; i++) { boolean copierCreated = false; if (i < projectedColOrdinals.size()) { int projectedColOrdinal = projectedColOrdinals.get(i); if (projectedColOrdinal < ordinalIdsFromOrcFile.length && projectedColOrdinal < cols.length) { int ordinalId = ordinalIdsFromOrcFile[ projectedColOrdinal ]; copiers[i] = createCopier(columnVectorData, ordinalId, output[i], cols[projectedColOrdinal], operatorContextOptions); copierCreated = true; } } if (!copierCreated) { copiers[i] = new NoOpCopier(null, null); } } return copiers; }
Example #10
Source File: OrcBatchReader.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static void readNonNullStructColumn(Object[] vals, int fieldIdx, StructColumnVector structVector, TypeDescription schema, int childCount) { List<TypeDescription> childrenTypes = schema.getChildren(); int numFields = childrenTypes.size(); // create a batch of Rows to read the structs Row[] structs = new Row[childCount]; // TODO: possible improvement: reuse existing Row objects for (int i = 0; i < childCount; i++) { structs[i] = new Row(numFields); } // read struct fields // we don't have to handle isRepeating because ORC assumes that it is propagated into the children. for (int i = 0; i < numFields; i++) { readField(structs, i, childrenTypes.get(i), structVector.fields[i], childCount); } if (fieldIdx == -1) { // set struct as an object System.arraycopy(structs, 0, vals, 0, childCount); } else { // set struct as a field of Row Row[] rows = (Row[]) vals; for (int i = 0; i < childCount; i++) { rows[i].setField(fieldIdx, structs[i]); } } }
Example #11
Source File: HiveORCVectorizedReader.java From dremio-oss with Apache License 2.0 | 5 votes |
private ColumnVector getStructColumnVector(StructObjectInspector soi) { ArrayList<ColumnVector> vectors = new ArrayList<>(); List<? extends StructField> members = soi.getAllStructFieldRefs(); for (StructField structField: members) { vectors.add(getColumnVector(structField.getFieldObjectInspector())); } ColumnVector[] columnVectors = vectors.toArray(new ColumnVector[0]); return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, columnVectors); }
Example #12
Source File: HiveORCCopiers.java From dremio-oss with Apache License 2.0 | 5 votes |
/** * Helper method to create {@link ORCCopier}s based on given input, output vector types and projected column ordinals. * * @param projectedColOrdinals ordinals of the columns that we are interested in reading from the file. * @param output * @param input * @return */ public static ORCCopier[] createCopiers(final HiveColumnVectorData columnVectorData, final List<Integer> projectedColOrdinals, int[] ordinalIdsFromOrcFile, final ValueVector[] output, final VectorizedRowBatch input, boolean isOriginal, HiveOperatorContextOptions operatorContextOptions) { final int numColumns = output.length; final ORCCopier[] copiers = new ORCCopier[numColumns]; final ColumnVector[] cols = isOriginal ? input.cols : ((StructColumnVector) input.cols[HiveORCVectorizedReader.TRANS_ROW_COLUMN_INDEX]).fields; for (int i = 0; i < numColumns; i++) { boolean copierCreated = false; if (i < projectedColOrdinals.size()) { int projectedColOrdinal = projectedColOrdinals.get(i); if (projectedColOrdinal < ordinalIdsFromOrcFile.length && projectedColOrdinal < cols.length) { int ordinalId = ordinalIdsFromOrcFile[ projectedColOrdinal ]; copiers[i] = createCopier(columnVectorData, ordinalId, output[i], cols[projectedColOrdinal], operatorContextOptions); copierCreated = true; } } if (!copierCreated) { copiers[i] = new NoOpCopier(null, null); } } return copiers; }
Example #13
Source File: HiveORCVectorizedReader.java From dremio-oss with Apache License 2.0 | 5 votes |
private ColumnVector[] createTransactionalVectors(ColumnVector[] dataVectors) { ColumnVector[] transVectors = new ColumnVector[6]; transVectors[0] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); transVectors[1] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); transVectors[2] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); transVectors[3] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); transVectors[4] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); transVectors[5] = new StructColumnVector(dataVectors.length, dataVectors); return transVectors; }
Example #14
Source File: HiveORCVectorizedReader.java From dremio-oss with Apache License 2.0 | 5 votes |
private ColumnVector getStructColumnVector(StructObjectInspector soi) { ArrayList<ColumnVector> vectors = new ArrayList<>(); List<? extends StructField> members = soi.getAllStructFieldRefs(); for (StructField structField: members) { vectors.add(getColumnVector(structField.getFieldObjectInspector())); } ColumnVector[] columnVectors = vectors.toArray(new ColumnVector[0]); return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, columnVectors); }
Example #15
Source File: OrcBatchReader.java From flink with Apache License 2.0 | 5 votes |
private static void readNonNullStructColumn(Object[] vals, int fieldIdx, StructColumnVector structVector, TypeDescription schema, int childCount) { List<TypeDescription> childrenTypes = schema.getChildren(); int numFields = childrenTypes.size(); // create a batch of Rows to read the structs Row[] structs = new Row[childCount]; // TODO: possible improvement: reuse existing Row objects for (int i = 0; i < childCount; i++) { structs[i] = new Row(numFields); } // read struct fields // we don't have to handle isRepeating because ORC assumes that it is propagated into the children. for (int i = 0; i < numFields; i++) { readField(structs, i, childrenTypes.get(i), structVector.fields[i], childCount); } if (fieldIdx == -1) { // set struct as an object System.arraycopy(structs, 0, vals, 0, childCount); } else { // set struct as a field of Row Row[] rows = (Row[]) vals; for (int i = 0; i < childCount; i++) { rows[i].setField(fieldIdx, structs[i]); } } }
Example #16
Source File: OrcBatchReader.java From flink with Apache License 2.0 | 4 votes |
private static void readStructColumn(Object[] vals, int fieldIdx, StructColumnVector structVector, TypeDescription schema, int childCount) { List<TypeDescription> childrenTypes = schema.getChildren(); int numFields = childrenTypes.size(); // Early out if struct column is repeating and always null. // This is the only repeating case we need to handle. // ORC assumes that repeating values have been pushed to the children. if (structVector.isRepeating && structVector.isNull[0]) { if (fieldIdx < 0) { for (int i = 0; i < childCount; i++) { vals[i] = null; } } else { for (int i = 0; i < childCount; i++) { ((Row) vals[i]).setField(fieldIdx, null); } } return; } // create a batch of Rows to read the structs Row[] structs = new Row[childCount]; // TODO: possible improvement: reuse existing Row objects for (int i = 0; i < childCount; i++) { structs[i] = new Row(numFields); } // read struct fields for (int i = 0; i < numFields; i++) { ColumnVector fieldVector = structVector.fields[i]; if (!fieldVector.isRepeating) { // Reduce fieldVector reads by setting all entries null where struct is null. if (fieldVector.noNulls) { // fieldVector had no nulls. Just use struct null information. System.arraycopy(structVector.isNull, 0, fieldVector.isNull, 0, structVector.isNull.length); structVector.fields[i].noNulls = false; } else { // fieldVector had nulls. Merge field nulls with struct nulls. for (int j = 0; j < structVector.isNull.length; j++) { structVector.fields[i].isNull[j] = structVector.isNull[j] || structVector.fields[i].isNull[j]; } } } readField(structs, i, childrenTypes.get(i), structVector.fields[i], childCount); } boolean[] isNullVector = structVector.isNull; if (fieldIdx == -1) { // set struct as an object for (int i = 0; i < childCount; i++) { if (isNullVector[i]) { vals[i] = null; } else { vals[i] = structs[i]; } } } else { // set struct as a field of Row Row[] rows = (Row[]) vals; for (int i = 0; i < childCount; i++) { if (isNullVector[i]) { rows[i].setField(fieldIdx, null); } else { rows[i].setField(fieldIdx, structs[i]); } } } }
Example #17
Source File: OrcBatchReader.java From flink with Apache License 2.0 | 4 votes |
private static void readStructColumn(Object[] vals, int fieldIdx, StructColumnVector structVector, TypeDescription schema, int childCount) { List<TypeDescription> childrenTypes = schema.getChildren(); int numFields = childrenTypes.size(); // Early out if struct column is repeating and always null. // This is the only repeating case we need to handle. // ORC assumes that repeating values have been pushed to the children. if (structVector.isRepeating && structVector.isNull[0]) { if (fieldIdx < 0) { for (int i = 0; i < childCount; i++) { vals[i] = null; } } else { for (int i = 0; i < childCount; i++) { ((Row) vals[i]).setField(fieldIdx, null); } } return; } // create a batch of Rows to read the structs Row[] structs = new Row[childCount]; // TODO: possible improvement: reuse existing Row objects for (int i = 0; i < childCount; i++) { structs[i] = new Row(numFields); } // read struct fields for (int i = 0; i < numFields; i++) { ColumnVector fieldVector = structVector.fields[i]; if (!fieldVector.isRepeating) { // Reduce fieldVector reads by setting all entries null where struct is null. if (fieldVector.noNulls) { // fieldVector had no nulls. Just use struct null information. System.arraycopy(structVector.isNull, 0, fieldVector.isNull, 0, structVector.isNull.length); structVector.fields[i].noNulls = false; } else { // fieldVector had nulls. Merge field nulls with struct nulls. for (int j = 0; j < structVector.isNull.length; j++) { structVector.fields[i].isNull[j] = structVector.isNull[j] || structVector.fields[i].isNull[j]; } } } readField(structs, i, childrenTypes.get(i), structVector.fields[i], childCount); } boolean[] isNullVector = structVector.isNull; if (fieldIdx == -1) { // set struct as an object for (int i = 0; i < childCount; i++) { if (isNullVector[i]) { vals[i] = null; } else { vals[i] = structs[i]; } } } else { // set struct as a field of Row Row[] rows = (Row[]) vals; for (int i = 0; i < childCount; i++) { if (isNullVector[i]) { rows[i].setField(fieldIdx, null); } else { rows[i].setField(fieldIdx, structs[i]); } } } }
Example #18
Source File: JsonFieldFiller.java From secor with Apache License 2.0 | 4 votes |
static void setValue(JSONWriter writer, ColumnVector vector, TypeDescription schema, int row) throws JSONException { if (vector.isRepeating) { row = 0; } if (vector.noNulls || !vector.isNull[row]) { switch (schema.getCategory()) { case BOOLEAN: writer.value(((LongColumnVector) vector).vector[row] != 0); break; case BYTE: case SHORT: case INT: case LONG: writer.value(((LongColumnVector) vector).vector[row]); break; case FLOAT: case DOUBLE: writer.value(((DoubleColumnVector) vector).vector[row]); break; case STRING: case CHAR: case VARCHAR: writer.value(((BytesColumnVector) vector).toString(row)); break; case DECIMAL: writer.value(((DecimalColumnVector) vector).vector[row] .toString()); break; case DATE: writer.value(new DateWritable( (int) ((LongColumnVector) vector).vector[row]) .toString()); break; case TIMESTAMP: writer.value(((TimestampColumnVector) vector) .asScratchTimestamp(row).toString()); break; case LIST: setList(writer, (ListColumnVector) vector, schema, row); break; case STRUCT: setStruct(writer, (StructColumnVector) vector, schema, row); break; case UNION: setUnion(writer, (UnionColumnVector) vector, schema, row); break; case BINARY: // To prevent similar mistakes like the one described in https://github.com/pinterest/secor/pull/1018, // it would be better to explicitly throw an exception here rather than ignore the incoming values, // which causes silent failures in a later stage. throw new UnsupportedOperationException(); case MAP: setMap(writer, (MapColumnVector) vector, schema, row); break; default: throw new IllegalArgumentException("Unknown type " + schema.toString()); } } else { writer.value(null); } }
Example #19
Source File: OrcBatchReader.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private static void readStructColumn(Object[] vals, int fieldIdx, StructColumnVector structVector, TypeDescription schema, int childCount) { List<TypeDescription> childrenTypes = schema.getChildren(); int numFields = childrenTypes.size(); // Early out if struct column is repeating and always null. // This is the only repeating case we need to handle. // ORC assumes that repeating values have been pushed to the children. if (structVector.isRepeating && structVector.isNull[0]) { if (fieldIdx < 0) { for (int i = 0; i < childCount; i++) { vals[i] = null; } } else { for (int i = 0; i < childCount; i++) { ((Row) vals[i]).setField(fieldIdx, null); } } return; } // create a batch of Rows to read the structs Row[] structs = new Row[childCount]; // TODO: possible improvement: reuse existing Row objects for (int i = 0; i < childCount; i++) { structs[i] = new Row(numFields); } // read struct fields for (int i = 0; i < numFields; i++) { ColumnVector fieldVector = structVector.fields[i]; if (!fieldVector.isRepeating) { // Reduce fieldVector reads by setting all entries null where struct is null. if (fieldVector.noNulls) { // fieldVector had no nulls. Just use struct null information. System.arraycopy(structVector.isNull, 0, fieldVector.isNull, 0, structVector.isNull.length); structVector.fields[i].noNulls = false; } else { // fieldVector had nulls. Merge field nulls with struct nulls. for (int j = 0; j < structVector.isNull.length; j++) { structVector.fields[i].isNull[j] = structVector.isNull[j] || structVector.fields[i].isNull[j]; } } } readField(structs, i, childrenTypes.get(i), structVector.fields[i], childCount); } boolean[] isNullVector = structVector.isNull; if (fieldIdx == -1) { // set struct as an object for (int i = 0; i < childCount; i++) { if (isNullVector[i]) { vals[i] = null; } else { vals[i] = structs[i]; } } } else { // set struct as a field of Row Row[] rows = (Row[]) vals; for (int i = 0; i < childCount; i++) { if (isNullVector[i]) { rows[i].setField(fieldIdx, null); } else { rows[i].setField(fieldIdx, structs[i]); } } } }