Java Code Examples for org.apache.pig.ResourceSchema.ResourceFieldSchema#getSchema()

The following examples show how to use org.apache.pig.ResourceSchema.ResourceFieldSchema#getSchema() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroSchema2Pig.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Convert an Avro schema to a Pig schema
 */
public static ResourceSchema convert(Schema schema) throws IOException {

    if (AvroStorageUtils.containsGenericUnion(schema))
        throw new IOException ("We don't accept schema containing generic unions.");

    Set<Schema> visitedRecords = new HashSet<Schema>();
    ResourceFieldSchema inSchema = inconvert(schema, FIELD, visitedRecords);

    ResourceSchema tupleSchema;
    if (inSchema.getType() == DataType.TUPLE) {
        tupleSchema = inSchema.getSchema();
    } else { // other typs
        ResourceFieldSchema tupleWrapper = AvroStorageUtils.wrapAsTuple(inSchema);

        ResourceSchema topSchema = new ResourceSchema();
        topSchema.setFields(new ResourceFieldSchema[] { tupleWrapper });

        tupleSchema = topSchema;

    }
    return tupleSchema;
}
 
Example 2
Source File: AvroSchema2Pig.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/**
 * Convert an Avro schema to a Pig schema
 */
public static ResourceSchema convert(Schema schema) throws IOException
{

    if (AvroStorageUtils.containsGenericUnion(schema))
        throw new IOException("We don't accept schema containing generic unions.");

    Set<Schema> visitedRecords = new HashSet<Schema>();
    ResourceFieldSchema inSchema = inconvert(schema, FIELD, visitedRecords);

    ResourceSchema tupleSchema;
    if (inSchema.getType() == DataType.TUPLE)
    {
        tupleSchema = inSchema.getSchema();
    }
    else
    { // other typs
        ResourceFieldSchema tupleWrapper = AvroStorageUtils.wrapAsTuple(inSchema);

        ResourceSchema topSchema = new ResourceSchema();
        topSchema.setFields(new ResourceFieldSchema[] { tupleWrapper });

        tupleSchema = topSchema;

    }
    return tupleSchema;
}
 
Example 3
Source File: OrcStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public ResourceSchema getSchema(String location, Job job)
        throws IOException {
    if (typeInfo == null) {
        typeInfo = getTypeInfo(location, job);
        // still null means case of multiple load store
        if (typeInfo == null) {
            return null;
        }
    }

    ResourceFieldSchema fs = OrcUtils.getResourceFieldSchema(typeInfo);
    return fs.getSchema();
}
 
Example 4
Source File: Schema.java    From spork with Apache License 2.0 5 votes vote down vote up
public static Schema getPigSchema(ResourceSchema rSchema) 
throws FrontendException {
    if(rSchema == null) {
        return null;
    }
    List<FieldSchema> fsList = new ArrayList<FieldSchema>();
    for(ResourceFieldSchema rfs : rSchema.getFields()) {
        FieldSchema fs = new FieldSchema(rfs.getName(), 
                rfs.getSchema() == null ? 
                        null : getPigSchema(rfs.getSchema()), rfs.getType());
        
        if(rfs.getType() == DataType.BAG) {
            if (fs.schema != null) { // allow partial schema
                if (fs.schema.size() == 1) {
                    FieldSchema innerFs = fs.schema.getField(0);
                    if (innerFs.type != DataType.TUPLE) {
                        ResourceFieldSchema.throwInvalidSchemaException();
                    }
                } else {
                    ResourceFieldSchema.throwInvalidSchemaException();
                }
            } 
        }
        fsList.add(fs);
    }
    return new Schema(fsList);
}
 
Example 5
Source File: PigValueWriter.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Checks to see if the given field is a schema-less Map that has values.
 * @return true if Map has no schema but has values (mixed schema map). false if not a Map or if Map is just empty.
 */
private boolean isPopulatedMixedValueMap(ResourceFieldSchema schema, int field, Tuple object) {
    if (schema.getType() != DataType.MAP) {
        // Can't be a mixed value map if it's not a map at all.
        return false;
    }

    try {
        Object fieldValue = object.get(field);
        Map<?, ?> map = (Map<?, ?>) fieldValue;
        return schema.getSchema() == null && !(map == null || map.isEmpty());
    } catch (ExecException e) {
        throw new EsHadoopIllegalStateException(e);
    }
}
 
Example 6
Source File: PigBytesConverter.java    From elasticsearch-hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public void convert(Object from, BytesArray to) {

    // expect PigTuple holding a Tuple with only one field - chararray or bytearray
    Assert.isTrue(from instanceof PigTuple,
            String.format("Unexpected object type, expecting [%s], given [%s]", PigTuple.class, from.getClass()));

    PigTuple pt = (PigTuple) from;
    ResourceFieldSchema schema = pt.getSchema();

    // unwrap the tuple
    ResourceSchema tupleSchema = schema.getSchema();

    // empty tuple shortcut
    if (tupleSchema == null) {
        // write empty doc
        to.bytes("{}");
        return;
    }

    ResourceFieldSchema[] fields = tupleSchema.getFields();
    Assert.isTrue(fields.length == 1, "When using JSON input, only one field is expected");

    Object object;
    byte type;

    try {
        object = pt.getTuple().get(0);
        type = pt.getTuple().getType(0);
    } catch (Exception ex) {
        throw new EsHadoopIllegalStateException("Encountered exception while processing tuple", ex);
    }


    if (type == DataType.BIGCHARARRAY || type == DataType.CHARARRAY) {
        to.bytes(object.toString());
        return;
    }
    if (type == DataType.BYTEARRAY) {
        DataByteArray dba = (DataByteArray) object;
        to.bytes(dba.get(), dba.size());
        return;
    }

    throw new EsHadoopIllegalArgumentException(String.format("Cannot handle Pig type [%s]; expecting [%s,%s]", object.getClass(), String.class, DataByteArray.class));
}
 
Example 7
Source File: PigFieldExtractor.java    From elasticsearch-hadoop with Apache License 2.0 4 votes vote down vote up
@Override
protected Object extractField(Object target) {
    List<String> fieldNames = getFieldNames();
    for (int index = 0; index < fieldNames.size(); index++) {
        String fieldName = fieldNames.get(index);
        if (target instanceof PigTuple) {
            PigTuple pt = (PigTuple) target;
            ResourceFieldSchema[] fields = pt.getSchema().getSchema().getFields();

            boolean foundField = false;
            for (int i = 0; i < fields.length && !foundField; i++) {
                ResourceFieldSchema field = fields[i];
                if (fieldName.equals(field.getName())) {
                    foundField = true;
                    byte type = field.getType();
                    try {
                        Object object = pt.getTuple().get(i);
                        if (DataType.isAtomic(type)) {
                            target = object.toString();
                        }
                        else if (type == DataType.TUPLE) {
                            PigTuple rpt = new PigTuple(field.getSchema());
                            if (object instanceof PigTuple) {
                                rpt.setTuple(((PigTuple) object).getTuple());
                            }
                            else {
                                rpt.setTuple((Tuple) object);
                            }
                            target = rpt;
                        }
                        else {
                            Assert.isTrue(false, String.format("Unsupported data type [%s] for field [%s]; use only 'primitives' or 'tuples'", DataType.findTypeName(type), fieldName));
                        }
                    } catch (ExecException ex) {
                        throw new EsHadoopIllegalStateException(String.format("Cannot retrieve field [%s]", fieldName), ex);
                    }
                }
            }
        }
        else {
            return NOT_FOUND;
        }
    }
    return target;
}