Java Code Examples for org.apache.pig.ResourceSchema.ResourceFieldSchema#getType()

The following examples show how to use org.apache.pig.ResourceSchema.ResourceFieldSchema#getType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Utf8StorageConverter.java    From spork with Apache License 2.0 6 votes vote down vote up
private Object consumeComplexType(PushbackInputStream in, ResourceFieldSchema complexFieldSchema) throws IOException {
    Object field;
    switch (complexFieldSchema.getType()) {
    case DataType.BAG:
        field = consumeBag(in, complexFieldSchema);
        break;
    case DataType.TUPLE:
        field = consumeTuple(in, complexFieldSchema);
        break;
    case DataType.MAP:
        field = consumeMap(in, complexFieldSchema);
        break;
    default:
        throw new IOException("Unknown complex data type");
    }
    return field;
}
 
Example 2
Source File: OrcStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public List<String> getPredicateFields(String location, Job job) throws IOException {
    ResourceSchema schema = getSchema(location, job);
    List<String> predicateFields = new ArrayList<String>();
    for (ResourceFieldSchema field : schema.getFields()) {
        switch(field.getType()) {
        case DataType.BOOLEAN:
        case DataType.INTEGER:
        case DataType.LONG:
        case DataType.FLOAT:
        case DataType.DOUBLE:
        case DataType.DATETIME:
        case DataType.CHARARRAY:
        case DataType.BIGINTEGER:
        case DataType.BIGDECIMAL:
            predicateFields.add(field.getName());
            break;
        default:
            // Skip DataType.BYTEARRAY, DataType.TUPLE, DataType.MAP and DataType.BAG
            break;
        }
    }
    return predicateFields;
}
 
Example 3
Source File: AvroSchema2Pig.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Convert an Avro schema to a Pig schema
 */
public static ResourceSchema convert(Schema schema) throws IOException {

    if (AvroStorageUtils.containsGenericUnion(schema))
        throw new IOException ("We don't accept schema containing generic unions.");

    Set<Schema> visitedRecords = new HashSet<Schema>();
    ResourceFieldSchema inSchema = inconvert(schema, FIELD, visitedRecords);

    ResourceSchema tupleSchema;
    if (inSchema.getType() == DataType.TUPLE) {
        tupleSchema = inSchema.getSchema();
    } else { // other typs
        ResourceFieldSchema tupleWrapper = AvroStorageUtils.wrapAsTuple(inSchema);

        ResourceSchema topSchema = new ResourceSchema();
        topSchema.setFields(new ResourceFieldSchema[] { tupleWrapper });

        tupleSchema = topSchema;

    }
    return tupleSchema;
}
 
Example 4
Source File: AvroSchema2Pig.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/**
 * Convert an Avro schema to a Pig schema
 */
public static ResourceSchema convert(Schema schema) throws IOException
{

    if (AvroStorageUtils.containsGenericUnion(schema))
        throw new IOException("We don't accept schema containing generic unions.");

    Set<Schema> visitedRecords = new HashSet<Schema>();
    ResourceFieldSchema inSchema = inconvert(schema, FIELD, visitedRecords);

    ResourceSchema tupleSchema;
    if (inSchema.getType() == DataType.TUPLE)
    {
        tupleSchema = inSchema.getSchema();
    }
    else
    { // other typs
        ResourceFieldSchema tupleWrapper = AvroStorageUtils.wrapAsTuple(inSchema);

        ResourceSchema topSchema = new ResourceSchema();
        topSchema.setFields(new ResourceFieldSchema[] { tupleWrapper });

        tupleSchema = topSchema;

    }
    return tupleSchema;
}
 
Example 5
Source File: AvroSchema2Pig.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/**
 * Add a field schema to a bag schema
 */
static protected void add2BagSchema(ResourceFieldSchema fieldSchema,
                                    ResourceFieldSchema subFieldSchema) throws IOException
{

    ResourceFieldSchema wrapped =
            (subFieldSchema.getType() == DataType.TUPLE) ? subFieldSchema
                    : AvroStorageUtils.wrapAsTuple(subFieldSchema);

    ResourceSchema listSchema = new ResourceSchema();
    listSchema.setFields(new ResourceFieldSchema[] { wrapped });

    fieldSchema.setSchema(listSchema);

}
 
Example 6
Source File: AvroStorageUtils.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/** check whether it is just a wrapped tuple */
public static boolean isTupleWrapper(ResourceFieldSchema pigSchema) {
    Boolean status = false;
    if(pigSchema.getType() == DataType.TUPLE)
        if(pigSchema.getName() != null)
            if(pigSchema.getName().equals(AvroStorageUtils.PIG_TUPLE_WRAPPER))
                status = true;
    return status;
}
 
Example 7
Source File: JsonStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
public ResourceSchema fixSchema(ResourceSchema s){
  for (ResourceFieldSchema filed : s.getFields()) {
    if(filed.getType() == DataType.NULL)
      filed.setType(DataType.BYTEARRAY);
  }
  return s;
}
 
Example 8
Source File: Schema.java    From spork with Apache License 2.0 5 votes vote down vote up
public static Schema getPigSchema(ResourceSchema rSchema) 
throws FrontendException {
    if(rSchema == null) {
        return null;
    }
    List<FieldSchema> fsList = new ArrayList<FieldSchema>();
    for(ResourceFieldSchema rfs : rSchema.getFields()) {
        FieldSchema fs = new FieldSchema(rfs.getName(), 
                rfs.getSchema() == null ? 
                        null : getPigSchema(rfs.getSchema()), rfs.getType());
        
        if(rfs.getType() == DataType.BAG) {
            if (fs.schema != null) { // allow partial schema
                if (fs.schema.size() == 1) {
                    FieldSchema innerFs = fs.schema.getField(0);
                    if (innerFs.type != DataType.TUPLE) {
                        ResourceFieldSchema.throwInvalidSchemaException();
                    }
                } else {
                    ResourceFieldSchema.throwInvalidSchemaException();
                }
            } 
        }
        fsList.add(fs);
    }
    return new Schema(fsList);
}
 
Example 9
Source File: AvroSchema2Pig.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
  * Add a field schema to a bag schema
  */
static protected void add2BagSchema(ResourceFieldSchema fieldSchema,
                                ResourceFieldSchema subFieldSchema)
                                throws IOException {

    ResourceFieldSchema wrapped = (subFieldSchema.getType() == DataType.TUPLE)
                                                          ? subFieldSchema
                                                          : AvroStorageUtils.wrapAsTuple(subFieldSchema);

    ResourceSchema listSchema = new ResourceSchema();
    listSchema.setFields(new ResourceFieldSchema[] { wrapped });

    fieldSchema.setSchema(listSchema);

}
 
Example 10
Source File: PigValueWriter.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Checks to see if the given field is a schema-less Map that has values.
 * @return true if Map has no schema but has values (mixed schema map). false if not a Map or if Map is just empty.
 */
private boolean isPopulatedMixedValueMap(ResourceFieldSchema schema, int field, Tuple object) {
    if (schema.getType() != DataType.MAP) {
        // Can't be a mixed value map if it's not a map at all.
        return false;
    }

    try {
        Object fieldValue = object.get(field);
        Map<?, ?> map = (Map<?, ?>) fieldValue;
        return schema.getSchema() == null && !(map == null || map.isEmpty());
    } catch (ExecException e) {
        throw new EsHadoopIllegalStateException(e);
    }
}
 
Example 11
Source File: AvroStorageUtils.java    From spork with Apache License 2.0 5 votes vote down vote up
/** check whether it is just a wrapped tuple */
public static boolean isTupleWrapper(ResourceFieldSchema pigSchema) {
    Boolean status = false;
    if(pigSchema.getType() == DataType.TUPLE)
        if(pigSchema.getName() != null)
            if(pigSchema.getName().equals(AvroStorageUtils.PIG_TUPLE_WRAPPER))
                status = true;
    return status;
}
 
Example 12
Source File: PigFieldExtractor.java    From elasticsearch-hadoop with Apache License 2.0 4 votes vote down vote up
@Override
protected Object extractField(Object target) {
    List<String> fieldNames = getFieldNames();
    for (int index = 0; index < fieldNames.size(); index++) {
        String fieldName = fieldNames.get(index);
        if (target instanceof PigTuple) {
            PigTuple pt = (PigTuple) target;
            ResourceFieldSchema[] fields = pt.getSchema().getSchema().getFields();

            boolean foundField = false;
            for (int i = 0; i < fields.length && !foundField; i++) {
                ResourceFieldSchema field = fields[i];
                if (fieldName.equals(field.getName())) {
                    foundField = true;
                    byte type = field.getType();
                    try {
                        Object object = pt.getTuple().get(i);
                        if (DataType.isAtomic(type)) {
                            target = object.toString();
                        }
                        else if (type == DataType.TUPLE) {
                            PigTuple rpt = new PigTuple(field.getSchema());
                            if (object instanceof PigTuple) {
                                rpt.setTuple(((PigTuple) object).getTuple());
                            }
                            else {
                                rpt.setTuple((Tuple) object);
                            }
                            target = rpt;
                        }
                        else {
                            Assert.isTrue(false, String.format("Unsupported data type [%s] for field [%s]; use only 'primitives' or 'tuples'", DataType.findTypeName(type), fieldName));
                        }
                    } catch (ExecException ex) {
                        throw new EsHadoopIllegalStateException(String.format("Cannot retrieve field [%s]", fieldName), ex);
                    }
                }
            }
        }
        else {
            return NOT_FOUND;
        }
    }
    return target;
}
 
Example 13
Source File: TypeUtil.java    From phoenix with Apache License 2.0 4 votes vote down vote up
/**
 * Transforms the PhoenixRecord to Pig {@link Tuple}.
 * @param record
 * @param projectedColumns
 * @return
 * @throws IOException
 */
public static Tuple transformToTuple(final PhoenixPigDBWritable record, final ResourceFieldSchema[] projectedColumns) throws IOException {
    
    List<Object> columnValues = record.getValues();
    if(columnValues == null || columnValues.size() == 0 || projectedColumns == null || projectedColumns.length != columnValues.size()) {
        return null;
    }
    int columns = columnValues.size();
    Tuple tuple = TupleFactory.getInstance().newTuple(columns);
    try {
        for(int i = 0 ; i < columns ; i++) {
            final ResourceFieldSchema fieldSchema = projectedColumns[i];
            Object object = columnValues.get(i);
            if (object == null) {
                tuple.set(i, null);
                continue;
            }
            
            switch(fieldSchema.getType()) {
                case DataType.BYTEARRAY:
                    byte[] bytes = PDataType.fromTypeId(PBinary.INSTANCE.getSqlType()).toBytes(object);
                    tuple.set(i,new DataByteArray(bytes,0,bytes.length));
                    break;
                case DataType.CHARARRAY:
                    tuple.set(i,DataType.toString(object));
                    break;
                case DataType.DOUBLE:
                    tuple.set(i,DataType.toDouble(object));
                    break;
                case DataType.FLOAT:
                    tuple.set(i,DataType.toFloat(object));
                    break;
                case DataType.INTEGER:
                    tuple.set(i,DataType.toInteger(object));
                    break;
                case DataType.LONG:
                    tuple.set(i,DataType.toLong(object));
                    break;
                case DataType.BOOLEAN:
                    tuple.set(i,DataType.toBoolean(object));
                    break;
                case DataType.DATETIME:
                    tuple.set(i,DataType.toDateTime(object));
                    break;
                default:
                    throw new RuntimeException(String.format(" Not supported [%s] pig type" , fieldSchema));
            }
        }
    } catch( Exception ex) {
        final String errorMsg = String.format(" Error transforming PhoenixRecord to Tuple [%s] ", ex.getMessage());
        LOG.error(errorMsg);
        throw new PigException(errorMsg);
    }
      return tuple;
}
 
Example 14
Source File: PigUtils.java    From elasticsearch-hadoop with Apache License 2.0 4 votes vote down vote up
static boolean isComplexType(ResourceFieldSchema fieldSchema) {
    return (fieldSchema != null && fieldSchema.getType() >= 100);
}
 
Example 15
Source File: FixedWidthLoader.java    From spork with Apache License 2.0 4 votes vote down vote up
private Object readField(String line, ResourceFieldSchema field, FixedWidthField column) 
                         throws IOException, IllegalArgumentException {

    int start = column.start;
    int end = Math.min(column.end, line.length());

    if (start > line.length())
        return null;

    if (end <= start)
        return null;

    String s  = line.substring(start, end);
    String sTrim = s.trim();

    switch (field.getType()) {
        case DataType.UNKNOWN:
        case DataType.BYTEARRAY:
        case DataType.CHARARRAY:
            if (s.trim().length() == 0)
                return null;
            return s.trim();

        case DataType.BOOLEAN:
            return Boolean.parseBoolean(sTrim);

        case DataType.INTEGER:
            return Integer.parseInt(sTrim);

        case DataType.LONG:
            return Long.parseLong(sTrim);

        case DataType.FLOAT:
            return Float.parseFloat(sTrim);
        
        case DataType.DOUBLE:
            return Double.parseDouble(sTrim);

        case DataType.DATETIME:
            return (new DateTime(sTrim)).toDateTime(DateTimeZone.UTC);

        case DataType.MAP:
        case DataType.TUPLE:
        case DataType.BAG:
            throw new IllegalArgumentException("Object types (map, tuple, bag) are not supported by FixedWidthLoader");
        
        default:
            throw new IllegalArgumentException(
                "Unknown type in input schema: " + field.getType());
    }
}
 
Example 16
Source File: PigSchema2Avro.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Check whether Avro type is compatible with Pig type
 * 
 */
protected static boolean isCompatible(Schema avroSchema, ResourceFieldSchema pigSchema) {

    Schema.Type avroType = avroSchema.getType();
    byte pigType = pigSchema.getType();

    if (avroType.equals(Schema.Type.UNION)) {
        return true;
    } else if (pigType == DataType.TUPLE) {
        /* Tuple is compatible with any type; for users may want to
           get rid of the tuple wrapper */
        return true;
    }
    return  (avroType.equals(Schema.Type.ARRAY) && pigType == DataType.BAG)
                  || (avroType.equals(Schema.Type.MAP) && pigType == DataType.MAP)
                  || (avroType.equals(Schema.Type.STRING) 
                                                  && pigType == DataType.CHARARRAY 
                                                  || pigType == DataType.BIGCHARARRAY)
                  || (avroType.equals(Schema.Type.ENUM) 
                                                  && pigType == DataType.CHARARRAY)
                  || (avroType.equals(Schema.Type.BOOLEAN) 
                                                  && pigType == DataType.BOOLEAN 
                                                  || pigType == DataType.INTEGER)
                  || (avroType.equals(Schema.Type.BYTES) 
                                                  && pigType == DataType.BYTEARRAY)
                  || (avroType.equals(Schema.Type.DOUBLE) 
                                                  && pigType == DataType.DOUBLE
                                                  || pigType == DataType.FLOAT
                                                  || pigType == DataType.INTEGER 
                                                  || pigType == DataType.LONG)
                  || (avroType.equals(Schema.Type.FLOAT)
                                                  && pigType == DataType.FLOAT
                                                  || pigType == DataType.INTEGER 
                                                  || pigType == DataType.LONG)
                  || (avroType.equals(Schema.Type.FIXED) 
                                                  && pigType == DataType.BYTEARRAY)
                  || (avroType.equals(Schema.Type.INT) 
                                                  && pigType == DataType.INTEGER)
                  || (avroType.equals(Schema.Type.LONG)
                                                  && pigType == DataType.LONG 
                                                  || pigType == DataType.INTEGER);

}
 
Example 17
Source File: AbstractAccumuloStorage.java    From spork with Apache License 2.0 4 votes vote down vote up
protected byte schemaToType(Object o, ResourceFieldSchema fieldSchema) {
    return (fieldSchema == null) ? DataType.findType(o) : fieldSchema
            .getType();
}
 
Example 18
Source File: PigSchema2Avro.java    From Cubert with Apache License 2.0 4 votes vote down vote up
/**
 * Check whether Avro type is compatible with Pig type
 * 
 */
protected static boolean isCompatible(Schema avroSchema, ResourceFieldSchema pigSchema) {

    Schema.Type avroType = avroSchema.getType();
    byte pigType = pigSchema.getType();

    if (avroType.equals(Schema.Type.UNION)) {
        return true;
    } else if (pigType == DataType.TUPLE) {
        /* Tuple is compatible with any type; for users may want to
           get rid of the tuple wrapper */
        return true;
    }
    return  (avroType.equals(Schema.Type.ARRAY) && pigType == DataType.BAG)
                  || (avroType.equals(Schema.Type.MAP) && pigType == DataType.MAP)
                  || (avroType.equals(Schema.Type.STRING) 
                                                  && pigType == DataType.CHARARRAY 
                                                  || pigType == DataType.BIGCHARARRAY)
                  || (avroType.equals(Schema.Type.ENUM) 
                                                  && pigType == DataType.CHARARRAY)
                  || (avroType.equals(Schema.Type.BOOLEAN) 
                                                  && pigType == DataType.BOOLEAN 
                                                  || pigType == DataType.INTEGER)
                  || (avroType.equals(Schema.Type.BYTES) 
                                                  && pigType == DataType.BYTEARRAY)
                  || (avroType.equals(Schema.Type.DOUBLE) 
                                                  && pigType == DataType.DOUBLE
                                                  || pigType == DataType.FLOAT
                                                  || pigType == DataType.INTEGER 
                                                  || pigType == DataType.LONG)
                  || (avroType.equals(Schema.Type.FLOAT)
                                                  && pigType == DataType.FLOAT
                                                  || pigType == DataType.INTEGER 
                                                  || pigType == DataType.LONG)
                  || (avroType.equals(Schema.Type.FIXED) 
                                                  && pigType == DataType.BYTEARRAY)
                  || (avroType.equals(Schema.Type.INT) 
                                                  && pigType == DataType.INTEGER)
                  || (avroType.equals(Schema.Type.LONG)
                                                  && pigType == DataType.LONG 
                                                  || pigType == DataType.INTEGER);

}