Java Code Examples for org.apache.pig.ResourceSchema.ResourceFieldSchema#getSchema()
The following examples show how to use
org.apache.pig.ResourceSchema.ResourceFieldSchema#getSchema() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroSchema2Pig.java From spork with Apache License 2.0 | 6 votes |
/** * Convert an Avro schema to a Pig schema */ public static ResourceSchema convert(Schema schema) throws IOException { if (AvroStorageUtils.containsGenericUnion(schema)) throw new IOException ("We don't accept schema containing generic unions."); Set<Schema> visitedRecords = new HashSet<Schema>(); ResourceFieldSchema inSchema = inconvert(schema, FIELD, visitedRecords); ResourceSchema tupleSchema; if (inSchema.getType() == DataType.TUPLE) { tupleSchema = inSchema.getSchema(); } else { // other typs ResourceFieldSchema tupleWrapper = AvroStorageUtils.wrapAsTuple(inSchema); ResourceSchema topSchema = new ResourceSchema(); topSchema.setFields(new ResourceFieldSchema[] { tupleWrapper }); tupleSchema = topSchema; } return tupleSchema; }
Example 2
Source File: AvroSchema2Pig.java From Cubert with Apache License 2.0 | 5 votes |
/** * Convert an Avro schema to a Pig schema */ public static ResourceSchema convert(Schema schema) throws IOException { if (AvroStorageUtils.containsGenericUnion(schema)) throw new IOException("We don't accept schema containing generic unions."); Set<Schema> visitedRecords = new HashSet<Schema>(); ResourceFieldSchema inSchema = inconvert(schema, FIELD, visitedRecords); ResourceSchema tupleSchema; if (inSchema.getType() == DataType.TUPLE) { tupleSchema = inSchema.getSchema(); } else { // other typs ResourceFieldSchema tupleWrapper = AvroStorageUtils.wrapAsTuple(inSchema); ResourceSchema topSchema = new ResourceSchema(); topSchema.setFields(new ResourceFieldSchema[] { tupleWrapper }); tupleSchema = topSchema; } return tupleSchema; }
Example 3
Source File: OrcStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public ResourceSchema getSchema(String location, Job job) throws IOException { if (typeInfo == null) { typeInfo = getTypeInfo(location, job); // still null means case of multiple load store if (typeInfo == null) { return null; } } ResourceFieldSchema fs = OrcUtils.getResourceFieldSchema(typeInfo); return fs.getSchema(); }
Example 4
Source File: Schema.java From spork with Apache License 2.0 | 5 votes |
public static Schema getPigSchema(ResourceSchema rSchema) throws FrontendException { if(rSchema == null) { return null; } List<FieldSchema> fsList = new ArrayList<FieldSchema>(); for(ResourceFieldSchema rfs : rSchema.getFields()) { FieldSchema fs = new FieldSchema(rfs.getName(), rfs.getSchema() == null ? null : getPigSchema(rfs.getSchema()), rfs.getType()); if(rfs.getType() == DataType.BAG) { if (fs.schema != null) { // allow partial schema if (fs.schema.size() == 1) { FieldSchema innerFs = fs.schema.getField(0); if (innerFs.type != DataType.TUPLE) { ResourceFieldSchema.throwInvalidSchemaException(); } } else { ResourceFieldSchema.throwInvalidSchemaException(); } } } fsList.add(fs); } return new Schema(fsList); }
Example 5
Source File: PigValueWriter.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
/** * Checks to see if the given field is a schema-less Map that has values. * @return true if Map has no schema but has values (mixed schema map). false if not a Map or if Map is just empty. */ private boolean isPopulatedMixedValueMap(ResourceFieldSchema schema, int field, Tuple object) { if (schema.getType() != DataType.MAP) { // Can't be a mixed value map if it's not a map at all. return false; } try { Object fieldValue = object.get(field); Map<?, ?> map = (Map<?, ?>) fieldValue; return schema.getSchema() == null && !(map == null || map.isEmpty()); } catch (ExecException e) { throw new EsHadoopIllegalStateException(e); } }
Example 6
Source File: PigBytesConverter.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Override public void convert(Object from, BytesArray to) { // expect PigTuple holding a Tuple with only one field - chararray or bytearray Assert.isTrue(from instanceof PigTuple, String.format("Unexpected object type, expecting [%s], given [%s]", PigTuple.class, from.getClass())); PigTuple pt = (PigTuple) from; ResourceFieldSchema schema = pt.getSchema(); // unwrap the tuple ResourceSchema tupleSchema = schema.getSchema(); // empty tuple shortcut if (tupleSchema == null) { // write empty doc to.bytes("{}"); return; } ResourceFieldSchema[] fields = tupleSchema.getFields(); Assert.isTrue(fields.length == 1, "When using JSON input, only one field is expected"); Object object; byte type; try { object = pt.getTuple().get(0); type = pt.getTuple().getType(0); } catch (Exception ex) { throw new EsHadoopIllegalStateException("Encountered exception while processing tuple", ex); } if (type == DataType.BIGCHARARRAY || type == DataType.CHARARRAY) { to.bytes(object.toString()); return; } if (type == DataType.BYTEARRAY) { DataByteArray dba = (DataByteArray) object; to.bytes(dba.get(), dba.size()); return; } throw new EsHadoopIllegalArgumentException(String.format("Cannot handle Pig type [%s]; expecting [%s,%s]", object.getClass(), String.class, DataByteArray.class)); }
Example 7
Source File: PigFieldExtractor.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Override protected Object extractField(Object target) { List<String> fieldNames = getFieldNames(); for (int index = 0; index < fieldNames.size(); index++) { String fieldName = fieldNames.get(index); if (target instanceof PigTuple) { PigTuple pt = (PigTuple) target; ResourceFieldSchema[] fields = pt.getSchema().getSchema().getFields(); boolean foundField = false; for (int i = 0; i < fields.length && !foundField; i++) { ResourceFieldSchema field = fields[i]; if (fieldName.equals(field.getName())) { foundField = true; byte type = field.getType(); try { Object object = pt.getTuple().get(i); if (DataType.isAtomic(type)) { target = object.toString(); } else if (type == DataType.TUPLE) { PigTuple rpt = new PigTuple(field.getSchema()); if (object instanceof PigTuple) { rpt.setTuple(((PigTuple) object).getTuple()); } else { rpt.setTuple((Tuple) object); } target = rpt; } else { Assert.isTrue(false, String.format("Unsupported data type [%s] for field [%s]; use only 'primitives' or 'tuples'", DataType.findTypeName(type), fieldName)); } } catch (ExecException ex) { throw new EsHadoopIllegalStateException(String.format("Cannot retrieve field [%s]", fieldName), ex); } } } } else { return NOT_FOUND; } } return target; }