org.apache.pig.ResourceSchema.ResourceFieldSchema#getSchema

Source File: AvroSchema2Pig.java From spork with Apache License 2.0

6 votes

/**
 * Convert an Avro schema to a Pig schema
 */
public static ResourceSchema convert(Schema schema) throws IOException {

    if (AvroStorageUtils.containsGenericUnion(schema))
        throw new IOException ("We don't accept schema containing generic unions.");

    Set<Schema> visitedRecords = new HashSet<Schema>();
    ResourceFieldSchema inSchema = inconvert(schema, FIELD, visitedRecords);

    ResourceSchema tupleSchema;
    if (inSchema.getType() == DataType.TUPLE) {
        tupleSchema = inSchema.getSchema();
    } else { // other typs
        ResourceFieldSchema tupleWrapper = AvroStorageUtils.wrapAsTuple(inSchema);

        ResourceSchema topSchema = new ResourceSchema();
        topSchema.setFields(new ResourceFieldSchema[] { tupleWrapper });

        tupleSchema = topSchema;

    }
    return tupleSchema;
}

Source File: AvroSchema2Pig.java From Cubert with Apache License 2.0

5 votes

/**
 * Convert an Avro schema to a Pig schema
 */
public static ResourceSchema convert(Schema schema) throws IOException
{

    if (AvroStorageUtils.containsGenericUnion(schema))
        throw new IOException("We don't accept schema containing generic unions.");

    Set<Schema> visitedRecords = new HashSet<Schema>();
    ResourceFieldSchema inSchema = inconvert(schema, FIELD, visitedRecords);

    ResourceSchema tupleSchema;
    if (inSchema.getType() == DataType.TUPLE)
    {
        tupleSchema = inSchema.getSchema();
    }
    else
    { // other typs
        ResourceFieldSchema tupleWrapper = AvroStorageUtils.wrapAsTuple(inSchema);

        ResourceSchema topSchema = new ResourceSchema();
        topSchema.setFields(new ResourceFieldSchema[] { tupleWrapper });

        tupleSchema = topSchema;

    }
    return tupleSchema;
}

Source File: OrcStorage.java From spork with Apache License 2.0

5 votes

@Override
public ResourceSchema getSchema(String location, Job job)
        throws IOException {
    if (typeInfo == null) {
        typeInfo = getTypeInfo(location, job);
        // still null means case of multiple load store
        if (typeInfo == null) {
            return null;
        }
    }

    ResourceFieldSchema fs = OrcUtils.getResourceFieldSchema(typeInfo);
    return fs.getSchema();
}

Source File: Schema.java From spork with Apache License 2.0

5 votes

public static Schema getPigSchema(ResourceSchema rSchema) 
throws FrontendException {
    if(rSchema == null) {
        return null;
    }
    List<FieldSchema> fsList = new ArrayList<FieldSchema>();
    for(ResourceFieldSchema rfs : rSchema.getFields()) {
        FieldSchema fs = new FieldSchema(rfs.getName(), 
                rfs.getSchema() == null ? 
                        null : getPigSchema(rfs.getSchema()), rfs.getType());
        
        if(rfs.getType() == DataType.BAG) {
            if (fs.schema != null) { // allow partial schema
                if (fs.schema.size() == 1) {
                    FieldSchema innerFs = fs.schema.getField(0);
                    if (innerFs.type != DataType.TUPLE) {
                        ResourceFieldSchema.throwInvalidSchemaException();
                    }
                } else {
                    ResourceFieldSchema.throwInvalidSchemaException();
                }
            } 
        }
        fsList.add(fs);
    }
    return new Schema(fsList);
}

Source File: PigValueWriter.java From elasticsearch-hadoop with Apache License 2.0

5 votes

/**
 * Checks to see if the given field is a schema-less Map that has values.
 * @return true if Map has no schema but has values (mixed schema map). false if not a Map or if Map is just empty.
 */
private boolean isPopulatedMixedValueMap(ResourceFieldSchema schema, int field, Tuple object) {
    if (schema.getType() != DataType.MAP) {
        // Can't be a mixed value map if it's not a map at all.
        return false;
    }

    try {
        Object fieldValue = object.get(field);
        Map<?, ?> map = (Map<?, ?>) fieldValue;
        return schema.getSchema() == null && !(map == null || map.isEmpty());
    } catch (ExecException e) {
        throw new EsHadoopIllegalStateException(e);
    }
}

Source File: PigBytesConverter.java From elasticsearch-hadoop with Apache License 2.0

4 votes

@Override
public void convert(Object from, BytesArray to) {

    // expect PigTuple holding a Tuple with only one field - chararray or bytearray
    Assert.isTrue(from instanceof PigTuple,
            String.format("Unexpected object type, expecting [%s], given [%s]", PigTuple.class, from.getClass()));

    PigTuple pt = (PigTuple) from;
    ResourceFieldSchema schema = pt.getSchema();

    // unwrap the tuple
    ResourceSchema tupleSchema = schema.getSchema();

    // empty tuple shortcut
    if (tupleSchema == null) {
        // write empty doc
        to.bytes("{}");
        return;
    }

    ResourceFieldSchema[] fields = tupleSchema.getFields();
    Assert.isTrue(fields.length == 1, "When using JSON input, only one field is expected");

    Object object;
    byte type;

    try {
        object = pt.getTuple().get(0);
        type = pt.getTuple().getType(0);
    } catch (Exception ex) {
        throw new EsHadoopIllegalStateException("Encountered exception while processing tuple", ex);
    }


    if (type == DataType.BIGCHARARRAY || type == DataType.CHARARRAY) {
        to.bytes(object.toString());
        return;
    }
    if (type == DataType.BYTEARRAY) {
        DataByteArray dba = (DataByteArray) object;
        to.bytes(dba.get(), dba.size());
        return;
    }

    throw new EsHadoopIllegalArgumentException(String.format("Cannot handle Pig type [%s]; expecting [%s,%s]", object.getClass(), String.class, DataByteArray.class));
}

Source File: PigFieldExtractor.java From elasticsearch-hadoop with Apache License 2.0

4 votes

@Override
protected Object extractField(Object target) {
    List<String> fieldNames = getFieldNames();
    for (int index = 0; index < fieldNames.size(); index++) {
        String fieldName = fieldNames.get(index);
        if (target instanceof PigTuple) {
            PigTuple pt = (PigTuple) target;
            ResourceFieldSchema[] fields = pt.getSchema().getSchema().getFields();

            boolean foundField = false;
            for (int i = 0; i < fields.length && !foundField; i++) {
                ResourceFieldSchema field = fields[i];
                if (fieldName.equals(field.getName())) {
                    foundField = true;
                    byte type = field.getType();
                    try {
                        Object object = pt.getTuple().get(i);
                        if (DataType.isAtomic(type)) {
                            target = object.toString();
                        }
                        else if (type == DataType.TUPLE) {
                            PigTuple rpt = new PigTuple(field.getSchema());
                            if (object instanceof PigTuple) {
                                rpt.setTuple(((PigTuple) object).getTuple());
                            }
                            else {
                                rpt.setTuple((Tuple) object);
                            }
                            target = rpt;
                        }
                        else {
                            Assert.isTrue(false, String.format("Unsupported data type [%s] for field [%s]; use only 'primitives' or 'tuples'", DataType.findTypeName(type), fieldName));
                        }
                    } catch (ExecException ex) {
                        throw new EsHadoopIllegalStateException(String.format("Cannot retrieve field [%s]", fieldName), ex);
                    }
                }
            }
        }
        else {
            return NOT_FOUND;
        }
    }
    return target;
}

Java Code Examples for org.apache.pig.ResourceSchema.ResourceFieldSchema#getSchema()