org.apache.parquet.io.InvalidRecordException Java Examples
The following examples show how to use
org.apache.parquet.io.InvalidRecordException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetFileReaderTest.java From kafka-connect-fs with Apache License 2.0 | 6 votes |
@ParameterizedTest @MethodSource("fileSystemConfigProvider") public void readerWithInvalidProjection(ReaderFsTestConfig fsConfig) throws IOException { Schema testSchema = SchemaBuilder.record("test_projection").namespace("test.avro") .fields() .name("field1").type("string").noDefault() .endRecord(); Map<String, Object> readerConfig = getReaderConfig(); readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, testSchema.toString()); readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension()); FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration()); fsConfig.setReader(getReader(testFs, fsConfig.getDataFile(), readerConfig)); try { readAllData(fsConfig); } catch (Exception e) { assertEquals(ConnectException.class, e.getClass()); assertEquals(InvalidRecordException.class, e.getCause().getClass()); } }
Example #2
Source File: AvroRecordConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
private Schema.Field getAvroField(String parquetFieldName) { Schema.Field avroField = avroSchema.getField(parquetFieldName); if (avroField != null) { return avroField; } for (Schema.Field f : avroSchema.getFields()) { if (f.aliases().contains(parquetFieldName)) { return f; } } throw new InvalidRecordException(String.format( "Parquet/Avro schema mismatch: Avro field '%s' not found", parquetFieldName)); }
Example #3
Source File: ParquetRowiseReader.java From dremio-oss with Apache License 2.0 | 5 votes |
private void verifyDecimalTypesAreSame(OutputMutator output, ParquetColumnResolver columnResolver) { for (ValueVector vector : output.getVectors()) { Field fieldInSchema = vector.getField(); if (fieldInSchema.getType().getTypeID() == ArrowType.ArrowTypeID.Decimal) { ArrowType.Decimal typeInTable = (ArrowType.Decimal) fieldInSchema.getType(); Type typeInParquet = null; // the field in arrow schema may not be present in hive schema try { typeInParquet = schema.getType(columnResolver.getParquetColumnName(fieldInSchema.getName())); } catch (InvalidRecordException e) { } if (typeInParquet == null) { continue; } boolean schemaMisMatch = true; OriginalType originalType = typeInParquet.getOriginalType(); if (originalType.equals(OriginalType.DECIMAL) ) { int precision = typeInParquet .asPrimitiveType().getDecimalMetadata().getPrecision(); int scale = typeInParquet.asPrimitiveType().getDecimalMetadata().getScale(); ArrowType decimalType = new ArrowType.Decimal(precision, scale); if (decimalType.equals(typeInTable)) { schemaMisMatch = false; } } if (schemaMisMatch) { throw UserException.schemaChangeError().message("Mixed types "+ fieldInSchema.getType() + " , " + typeInParquet + " is not supported.") .build(logger); } } } }
Example #4
Source File: MessageType.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void checkContains(Type subType) { if (!(subType instanceof MessageType)) { throw new InvalidRecordException(subType + " found: expected " + this); } checkGroupContains(subType); }
Example #5
Source File: GroupType.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * * @param name string name of a field * @return the index of the field with that name */ public int getFieldIndex(String name) { Integer i = indexByName.get(name); if (i == null) { throw new InvalidRecordException(name + " not found in " + this); } return i.intValue(); }
Example #6
Source File: GroupType.java From parquet-mr with Apache License 2.0 | 5 votes |
void checkGroupContains(Type subType) { if (subType.isPrimitive()) { throw new InvalidRecordException(subType + " found: expected " + this); } List<Type> fields = subType.asGroupType().getFields(); for (Type otherType : fields) { Type thisType = this.getType(otherType.getName()); thisType.checkContains(otherType); } }
Example #7
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public int getMaxRepetitionLevel(String[] path, int i) { if (path.length != i) { throw new InvalidRecordException("Arrived at primitive node, path invalid"); } return isRepetition(Repetition.REPEATED)? 1 : 0; }
Example #8
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public int getMaxDefinitionLevel(String[] path, int i) { if (path.length != i) { throw new InvalidRecordException("Arrived at primitive node, path invalid"); } return isRepetition(Repetition.REQUIRED) ? 0 : 1; }
Example #9
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public Type getType(String[] path, int i) { if (path.length != i) { throw new InvalidRecordException("Arrived at primitive node at index " + i + " , path invalid: " + Arrays.toString(path)); } return this; }
Example #10
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override void checkContains(Type subType) { super.checkContains(subType); if (!subType.isPrimitive()) { throw new InvalidRecordException(subType + " found: expected " + this); } PrimitiveType primitiveType = subType.asPrimitiveType(); if (this.primitive != primitiveType.primitive) { throw new InvalidRecordException(subType + " found: expected " + this); } }
Example #11
Source File: ProtoMessageConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * Translates given parquet enum value to protocol buffer enum value. * @throws org.apache.parquet.io.InvalidRecordException is there is no corresponding value. * */ private Descriptors.EnumValueDescriptor translateEnumValue(Binary binaryValue) { Descriptors.EnumValueDescriptor protoValue = enumLookup.get(binaryValue); if (protoValue == null) { Set<Binary> knownValues = enumLookup.keySet(); String msg = "Illegal enum value \"" + binaryValue + "\"" + " in protocol buffer \"" + fieldType.getFullName() + "\"" + " legal values are: \"" + knownValues + "\""; throw new InvalidRecordException(msg); } return protoValue; }
Example #12
Source File: AvroIndexedRecordConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private Schema.Field getAvroField(String parquetFieldName) { Schema.Field avroField = avroSchema.getField(parquetFieldName); for (Schema.Field f : avroSchema.getFields()) { if (f.aliases().contains(parquetFieldName)) { return f; } } if (avroField == null) { throw new InvalidRecordException(String.format( "Parquet/Avro schema mismatch. Avro field '%s' not found.", parquetFieldName)); } return avroField; }
Example #13
Source File: ParquetFilePOJOReader.java From attic-apex-malhar with Apache License 2.0 | 4 votes |
/** * Converts a Parquet <b>Group</b>(parquet.example.data.Group) to a POJO. * Supported parquet primitive types are BOOLEAN, INT32, INT64, FLOAT, DOUBLE * and BINARY * * @throws ParquetEncodingException * if group contains unsupported type */ @Override protected Object convertGroup(Group group) { Object obj; try { obj = pojoClass.newInstance(); } catch (InstantiationException | IllegalAccessException ex) { throw new RuntimeException(ex); } for (int i = 0; i < activeFieldInfos.size(); i++) { try { ParquetFilePOJOReader.ActiveFieldInfo afi = activeFieldInfos.get(i); switch (afi.primitiveTypeName) { case BOOLEAN: Boolean booleanVal = Boolean.parseBoolean(group.getValueToString(afi.fieldIndex, 0)); ((PojoUtils.SetterBoolean<Object>)afi.setter).set(obj, booleanVal); break; case INT32: Integer intVal = Integer.parseInt(group.getValueToString(afi.fieldIndex, 0)); ((PojoUtils.SetterInt<Object>)afi.setter).set(obj, intVal); break; case INT64: Long longVal = Long.parseLong(group.getValueToString(afi.fieldIndex, 0)); ((PojoUtils.SetterLong<Object>)afi.setter).set(obj, longVal); break; case FLOAT: Float floatVal = Float.parseFloat(group.getValueToString(afi.fieldIndex, 0)); ((PojoUtils.SetterFloat<Object>)afi.setter).set(obj, floatVal); break; case DOUBLE: Double doubleVal = Double.parseDouble(group.getValueToString(afi.fieldIndex, 0)); ((PojoUtils.SetterDouble<Object>)afi.setter).set(obj, doubleVal); break; case BINARY: ((PojoUtils.Setter<Object, String>)afi.setter).set(obj, group.getValueToString(afi.fieldIndex, 0)); break; default: throw new ParquetEncodingException("Unsupported column type: " + afi.primitiveTypeName); } } catch (InvalidRecordException e) { logger.error("Field not found in schema {} ", e); } } return obj; }
Example #14
Source File: ParquetTableSource.java From flink with Apache License 2.0 | 4 votes |
@Nullable private Tuple2<Column, Comparable> extractColumnAndLiteral(BinaryComparison comp) { String columnName = getColumnName(comp); ColumnPath columnPath = ColumnPath.fromDotString(columnName); TypeInformation<?> typeInfo = null; try { Type type = parquetSchema.getType(columnPath.toArray()); typeInfo = ParquetSchemaConverter.convertParquetTypeToTypeInfo(type); } catch (InvalidRecordException e) { LOG.error("Pushed predicate on undefined field name {} in schema", columnName); return null; } // fetch literal and ensure it is comparable Object value = getLiteral(comp); // validate that literal is comparable if (!(value instanceof Comparable)) { LOG.warn("Encountered a non-comparable literal of type {}." + "Cannot push predicate [{}] into ParquetTablesource." + "This is a bug and should be reported.", value.getClass().getCanonicalName(), comp); return null; } if (typeInfo == BasicTypeInfo.BYTE_TYPE_INFO || typeInfo == BasicTypeInfo.SHORT_TYPE_INFO || typeInfo == BasicTypeInfo.INT_TYPE_INFO) { return new Tuple2<>(FilterApi.intColumn(columnName), ((Number) value).intValue()); } else if (typeInfo == BasicTypeInfo.LONG_TYPE_INFO) { return new Tuple2<>(FilterApi.longColumn(columnName), ((Number) value).longValue()); } else if (typeInfo == BasicTypeInfo.FLOAT_TYPE_INFO) { return new Tuple2<>(FilterApi.floatColumn(columnName), ((Number) value).floatValue()); } else if (typeInfo == BasicTypeInfo.BOOLEAN_TYPE_INFO) { return new Tuple2<>(FilterApi.booleanColumn(columnName), (Boolean) value); } else if (typeInfo == BasicTypeInfo.DOUBLE_TYPE_INFO) { return new Tuple2<>(FilterApi.doubleColumn(columnName), ((Number) value).doubleValue()); } else if (typeInfo == BasicTypeInfo.STRING_TYPE_INFO) { return new Tuple2<>(FilterApi.binaryColumn(columnName), Binary.fromString((String) value)); } else { // unsupported type return null; } }
Example #15
Source File: Type.java From parquet-mr with Apache License 2.0 | 4 votes |
void checkContains(Type subType) { if (!this.name.equals(subType.name) || this.repetition != subType.repetition) { throw new InvalidRecordException(subType + " found: expected " + this); } }
Example #16
Source File: ProtoWriteSupport.java From parquet-mr with Apache License 2.0 | 4 votes |
private FieldWriter unknownType(FieldDescriptor fieldDescriptor) { String exceptionMsg = "Unknown type with descriptor \"" + fieldDescriptor + "\" and type \"" + fieldDescriptor.getJavaType() + "\"."; throw new InvalidRecordException(exceptionMsg); }