Java Code Examples for org.apache.parquet.schema.Type#getLogicalTypeAnnotation()

The following examples show how to use org.apache.parquet.schema.Type#getLogicalTypeAnnotation() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ProtoMessageConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public ListConverter(Message.Builder parentBuilder, Descriptors.FieldDescriptor fieldDescriptor, Type parquetType) {
  LogicalTypeAnnotation logicalTypeAnnotation = parquetType.getLogicalTypeAnnotation();
  if (!(logicalTypeAnnotation instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) || parquetType.isPrimitive()) {
    throw new ParquetDecodingException("Expected LIST wrapper. Found: " + logicalTypeAnnotation + " instead.");
  }

  GroupType rootWrapperType = parquetType.asGroupType();
  if (!rootWrapperType.containsField("list") || rootWrapperType.getType("list").isPrimitive()) {
    throw new ParquetDecodingException("Expected repeated 'list' group inside LIST wrapperr but got: " + rootWrapperType);
  }

  GroupType listType = rootWrapperType.getType("list").asGroupType();
  if (!listType.containsField("element")) {
    throw new ParquetDecodingException("Expected 'element' inside repeated list group but got: " + listType);
  }

  Type elementType = listType.getType("element");
  converter = newMessageConverter(parentBuilder, fieldDescriptor, elementType);
}
 
Example 2
Source File: ProtoWriteSupport.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private GroupType getGroupType(Type type) {
  LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
  if (logicalTypeAnnotation == null) {
    return type.asGroupType();
  }
  return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<GroupType>() {
    @Override
    public Optional<GroupType> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
      return ofNullable(type.asGroupType().getType("list").asGroupType().getType("element").asGroupType());
    }

    @Override
    public Optional<GroupType> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
      return ofNullable(type.asGroupType().getType("key_value").asGroupType().getType("value").asGroupType());
    }
  }).orElse(type.asGroupType());
}
 
Example 3
Source File: SimpleRecordConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Converter createConverter(Type field) {
  LogicalTypeAnnotation ltype = field.getLogicalTypeAnnotation();

  if (field.isPrimitive()) {
    if (ltype != null) {
      return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() {
        @Override
        public Optional<Converter> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
          return of(new StringConverter(field.getName()));
        }

        @Override
        public Optional<Converter> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
          int scale = decimalLogicalType.getScale();
          return of(new DecimalConverter(field.getName(), scale));
        }
      }).orElse(new SimplePrimitiveConverter(field.getName()));
    }
    return new SimplePrimitiveConverter(field.getName());
  }

  GroupType groupType = field.asGroupType();
  if (ltype != null) {
    return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() {
      @Override
      public Optional<Converter> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
        return of(new SimpleMapRecordConverter(groupType, field.getName(), SimpleRecordConverter.this));
      }

      @Override
      public Optional<Converter> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
        return of(new SimpleListRecordConverter(groupType, field.getName(), SimpleRecordConverter.this));
      }
    }).orElse(new SimpleRecordConverter(groupType, field.getName(), this));
  }
  return new SimpleRecordConverter(groupType, field.getName(), this);
}
 
Example 4
Source File: ProtoMessageConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Converter newMessageConverter(final Message.Builder parentBuilder, final Descriptors.FieldDescriptor fieldDescriptor, Type parquetType) {

    boolean isRepeated = fieldDescriptor.isRepeated();

    ParentValueContainer parent;

    if (isRepeated) {
      parent = new ParentValueContainer() {
        @Override
        public void add(Object value) {
          parentBuilder.addRepeatedField(fieldDescriptor, value);
        }
      };
    } else {
      parent = new ParentValueContainer() {
        @Override
        public void add(Object value) {
          parentBuilder.setField(fieldDescriptor, value);
        }
      };
    }

    LogicalTypeAnnotation logicalTypeAnnotation = parquetType.getLogicalTypeAnnotation();
    if (logicalTypeAnnotation == null) {
      return newScalarConverter(parent, parentBuilder, fieldDescriptor, parquetType);
    }

    return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() {
      @Override
      public Optional<Converter> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
        return of(new ListConverter(parentBuilder, fieldDescriptor, parquetType));
      }

      @Override
      public Optional<Converter> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
        return of(new MapConverter(parentBuilder, fieldDescriptor, parquetType));
      }
    }).orElseGet(() -> newScalarConverter(parent, parentBuilder, fieldDescriptor, parquetType));
  }
 
Example 5
Source File: ProtoMessageConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public MapConverter(Message.Builder parentBuilder, Descriptors.FieldDescriptor fieldDescriptor, Type parquetType) {
  LogicalTypeAnnotation logicalTypeAnnotation = parquetType.getLogicalTypeAnnotation();
  if (!(logicalTypeAnnotation instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation)) {
    throw new ParquetDecodingException("Expected MAP wrapper. Found: " + logicalTypeAnnotation + " instead.");
  }

  Type parquetSchema;
  if (parquetType.asGroupType().containsField("key_value")){
    parquetSchema = parquetType.asGroupType().getType("key_value");
  } else {
    throw new ParquetDecodingException("Expected map but got: " + parquetType);
  }

  converter = newMessageConverter(parentBuilder, fieldDescriptor, parquetSchema);
}
 
Example 6
Source File: TupleConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
BagConverter(GroupType parquetSchema, FieldSchema pigSchema, ParentValueContainer parent, boolean numbersDefaultToZero, boolean columnIndexAccess) throws FrontendException {
  this.parent = parent;
  if (parquetSchema.getFieldCount() != 1) {
    throw new IllegalArgumentException("bags have only one field. " + parquetSchema + " size = " + parquetSchema.getFieldCount());
  }
  Type nestedType = parquetSchema.getType(0);

  ParentValueContainer childsParent;
  FieldSchema pigField;
  if (nestedType.isPrimitive() || nestedType.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation
    || nestedType.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) {
    // Pig bags always contain tuples
    // In that case we need to wrap the value in an extra tuple
    childsParent = new ParentValueContainer() {
      @Override
      void add(Object value) {
        buffer.add(TF.newTuple(value));
      }};
    pigField = pigSchema.schema.getField(0).schema.getField(0);
  } else {
    childsParent = new ParentValueContainer() {
      @Override
      void add(Object value) {
        buffer.add((Tuple)value);
      }};
    pigField = pigSchema.schema.getField(0);
  }
  child = newConverter(pigField, nestedType, childsParent, numbersDefaultToZero, columnIndexAccess);
}
 
Example 7
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Type filterBag(GroupType bagType, FieldSchema bagFieldSchema) throws FrontendException {
  if (LOG.isDebugEnabled()) LOG.debug("filtering BAG schema:\n" + bagType + "\nwith:\n " + bagFieldSchema);
  if (bagType.getFieldCount() != 1) {
    throw new RuntimeException("not unwrapping the right type, this should be a Bag: " + bagType);
  }
  Type nested = bagType.getType(0);
  FieldSchema innerField = bagFieldSchema.schema.getField(0);
  if (nested.isPrimitive() || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation
    || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) {
    // Bags always contain tuples => we skip the extra tuple that was inserted in that case.
    innerField = innerField.schema.getField(0);
  }
  return bagType.withNewFields(filter(nested, innerField));
}
 
Example 8
Source File: TupleConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
static Converter newConverter(FieldSchema pigField, Type type, final ParentValueContainer parent, boolean elephantBirdCompatible, boolean columnIndexAccess) {
  try {
    switch (pigField.type) {
    case DataType.BAG:
      return new BagConverter(type.asGroupType(), pigField, parent, elephantBirdCompatible, columnIndexAccess);
    case DataType.MAP:
      return new MapConverter(type.asGroupType(), pigField, parent, elephantBirdCompatible, columnIndexAccess);
    case DataType.TUPLE:
      return new TupleConverter(type.asGroupType(), pigField.schema, elephantBirdCompatible, columnIndexAccess) {
        @Override
        public void end() {
          super.end();
          parent.add(this.currentTuple);
        }
      };
    case DataType.CHARARRAY:
        //If the orignal type isn't a string, we don't want to use the dictionary because
        //a custom implementation will be needed for each type.  Just default to no dictionary.
      return new FieldStringConverter(parent, type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation);
    case DataType.BYTEARRAY:
      return new FieldByteArrayConverter(parent);
    case DataType.INTEGER:
      return new FieldIntegerConverter(parent);
    case DataType.BOOLEAN:
      if (elephantBirdCompatible) {
        return new FieldIntegerConverter(parent);
      } else {
        return new FieldBooleanConverter(parent);
      }
    case DataType.FLOAT:
      return new FieldFloatConverter(parent);
    case DataType.DOUBLE:
      return new FieldDoubleConverter(parent);
    case DataType.LONG:
      return new FieldLongConverter(parent);
    case DataType.BIGDECIMAL:
      return new FieldBigDecimalConverter(type, parent);
    default:
      throw new TupleConversionException("unsupported pig type: " + pigField);
    }
  } catch (FrontendException | RuntimeException e) {
    throw new TupleConversionException(
        "error while preparing converter for:\n" + pigField + "\n" + type, e);
  }
}