Java Code Examples for org.apache.orc.TypeDescription#getId()
The following examples show how to use
org.apache.orc.TypeDescription#getId() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OrcRowInputFormat.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Computes the ORC projection mask of the fields to include from the selected fields.rowOrcInputFormat.nextRecord(null). * * @return The ORC projection mask. */ private boolean[] computeProjectionMask() { // mask with all fields of the schema boolean[] projectionMask = new boolean[schema.getMaximumId() + 1]; // for each selected field for (int inIdx : selectedFields) { // set all nested fields of a selected field to true TypeDescription fieldSchema = schema.getChildren().get(inIdx); for (int i = fieldSchema.getId(); i <= fieldSchema.getMaximumId(); i++) { projectionMask[i] = true; } } return projectionMask; }
Example 2
Source File: OrcRowInputFormat.java From flink with Apache License 2.0 | 5 votes |
/** * Computes the ORC projection mask of the fields to include from the selected fields.rowOrcInputFormat.nextRecord(null). * * @return The ORC projection mask. */ private boolean[] computeProjectionMask() { // mask with all fields of the schema boolean[] projectionMask = new boolean[schema.getMaximumId() + 1]; // for each selected field for (int inIdx : selectedFields) { // set all nested fields of a selected field to true TypeDescription fieldSchema = schema.getChildren().get(inIdx); for (int i = fieldSchema.getId(); i <= fieldSchema.getMaximumId(); i++) { projectionMask[i] = true; } } return projectionMask; }
Example 3
Source File: ORCSchemaUtil.java From iceberg with Apache License 2.0 | 5 votes |
private static Map<Integer, OrcField> icebergToOrcMapping(String name, TypeDescription orcType) { Map<Integer, OrcField> icebergToOrc = Maps.newHashMap(); switch (orcType.getCategory()) { case STRUCT: List<String> childrenNames = orcType.getFieldNames(); List<TypeDescription> children = orcType.getChildren(); for (int i = 0; i < children.size(); i++) { icebergToOrc.putAll(icebergToOrcMapping(childrenNames.get(i), children.get(i))); } break; case LIST: icebergToOrc.putAll(icebergToOrcMapping("element", orcType.getChildren().get(0))); break; case MAP: icebergToOrc.putAll(icebergToOrcMapping("key", orcType.getChildren().get(0))); icebergToOrc.putAll(icebergToOrcMapping("value", orcType.getChildren().get(1))); break; } if (orcType.getId() > 0) { // Only add to non-root types. icebergID(orcType) .ifPresent(integer -> icebergToOrc.put(integer, new OrcField(name, orcType))); } return icebergToOrc; }
Example 4
Source File: OrcNoHiveShim.java From flink with Apache License 2.0 | 5 votes |
@Override public RecordReader createRecordReader( Configuration conf, TypeDescription schema, int[] selectedFields, List<OrcSplitReader.Predicate> conjunctPredicates, org.apache.flink.core.fs.Path path, long splitStart, long splitLength) throws IOException { // open ORC file and create reader org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(path.toUri()); Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf)); // get offset and length for the stripes that start in the split Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit( splitStart, splitLength, orcReader.getStripes()); // create ORC row reader configuration Reader.Options options = new Reader.Options() .schema(schema) .range(offsetAndLength.f0, offsetAndLength.f1) .useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf)) .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf)) .tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf)); // TODO configure filters // configure selected fields options.include(computeProjectionMask(schema, selectedFields)); // create ORC row reader RecordReader orcRowsReader = orcReader.rows(options); // assign ids schema.getId(); return orcRowsReader; }
Example 5
Source File: OrcShimV200.java From flink with Apache License 2.0 | 5 votes |
/** * Computes the ORC projection mask of the fields to include from the selected fields.rowOrcInputFormat.nextRecord(null). * * @return The ORC projection mask. */ public static boolean[] computeProjectionMask(TypeDescription schema, int[] selectedFields) { // mask with all fields of the schema boolean[] projectionMask = new boolean[schema.getMaximumId() + 1]; // for each selected field for (int inIdx : selectedFields) { // set all nested fields of a selected field to true TypeDescription fieldSchema = schema.getChildren().get(inIdx); for (int i = fieldSchema.getId(); i <= fieldSchema.getMaximumId(); i++) { projectionMask[i] = true; } } return projectionMask; }
Example 6
Source File: OrcShimV200.java From flink with Apache License 2.0 | 4 votes |
@Override public RecordReader createRecordReader( Configuration conf, TypeDescription schema, int[] selectedFields, List<Predicate> conjunctPredicates, org.apache.flink.core.fs.Path path, long splitStart, long splitLength) throws IOException { // open ORC file and create reader Path hPath = new Path(path.toUri()); Reader orcReader = createReader(hPath, conf); // get offset and length for the stripes that start in the split Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit( splitStart, splitLength, orcReader.getStripes()); // create ORC row reader configuration Reader.Options options = readOrcConf( new Reader.Options().schema(schema).range(offsetAndLength.f0, offsetAndLength.f1), conf); // configure filters if (!conjunctPredicates.isEmpty()) { SearchArgument.Builder b = SearchArgumentFactory.newBuilder(); b = b.startAnd(); for (Predicate predicate : conjunctPredicates) { predicate.add(b); } b = b.end(); options.searchArgument(b.build(), new String[]{}); } // configure selected fields options.include(computeProjectionMask(schema, selectedFields)); // create ORC row reader RecordReader orcRowsReader = createRecordReader(orcReader, options); // assign ids schema.getId(); return orcRowsReader; }