Java Code Examples for org.apache.pig.ResourceSchema.ResourceFieldSchema#setType()
The following examples show how to use
org.apache.pig.ResourceSchema.ResourceFieldSchema#setType() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GenRandomData.java From spork with Apache License 2.0 | 6 votes |
public static ResourceFieldSchema getFloatDataBagFieldSchema(int column) throws IOException { ResourceFieldSchema intfs = new ResourceFieldSchema(); intfs.setType(DataType.INTEGER); ResourceSchema tupleSchema = new ResourceSchema(); ResourceFieldSchema[] fss = new ResourceFieldSchema[column]; for (int i=0;i<column;i++) { fss[i] = intfs; } tupleSchema.setFields(fss); ResourceFieldSchema tuplefs = new ResourceFieldSchema(); tuplefs.setSchema(tupleSchema); tuplefs.setType(DataType.TUPLE); ResourceSchema bagSchema = new ResourceSchema(); bagSchema.setFields(new ResourceFieldSchema[]{tuplefs}); ResourceFieldSchema bagfs = new ResourceFieldSchema(); bagfs.setSchema(bagSchema); bagfs.setType(DataType.BAG); return bagfs; }
Example 2
Source File: GenRandomData.java From spork with Apache License 2.0 | 6 votes |
public static ResourceFieldSchema getMixedTupleToConvertFieldSchema() throws IOException { ResourceFieldSchema stringfs = new ResourceFieldSchema(); stringfs.setType(DataType.CHARARRAY); ResourceFieldSchema intfs = new ResourceFieldSchema(); intfs.setType(DataType.INTEGER); ResourceFieldSchema longfs = new ResourceFieldSchema(); longfs.setType(DataType.LONG); ResourceFieldSchema floatfs = new ResourceFieldSchema(); floatfs.setType(DataType.FLOAT); ResourceFieldSchema doublefs = new ResourceFieldSchema(); doublefs.setType(DataType.DOUBLE); ResourceFieldSchema boolfs = new ResourceFieldSchema(); boolfs.setType(DataType.BOOLEAN); ResourceFieldSchema dtfs = new ResourceFieldSchema(); dtfs.setType(DataType.DATETIME); ResourceSchema tupleSchema = new ResourceSchema(); tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, longfs, intfs, doublefs, floatfs, stringfs, intfs, doublefs, floatfs, boolfs, dtfs}); ResourceFieldSchema tuplefs = new ResourceFieldSchema(); tuplefs.setSchema(tupleSchema); tuplefs.setType(DataType.TUPLE); return tuplefs; }
Example 3
Source File: AegisthusLoader.java From aegisthus with Apache License 2.0 | 6 votes |
protected ResourceSchema columnSchema() throws IOException { ResourceSchema schema = new ResourceSchema(); List<ResourceFieldSchema> fields = new ArrayList<>(); fields.add(field("name", DataType.BYTEARRAY)); fields.add(field("value", DataType.BYTEARRAY)); fields.add(field("ts", DataType.LONG)); fields.add(field("status", DataType.CHARARRAY)); fields.add(field("ttl", DataType.LONG)); ResourceSchema tuple = new ResourceSchema(); tuple.setFields(fields.toArray(new ResourceFieldSchema[0])); ResourceFieldSchema fs = new ResourceFieldSchema(); fs.setName("column"); fs.setType(DataType.TUPLE); fs.setSchema(tuple); fields.clear(); fields.add(fs); schema.setFields(fields.toArray(new ResourceFieldSchema[0])); return schema; }
Example 4
Source File: AvroStorageUtils.java From spork with Apache License 2.0 | 5 votes |
/** wrap a pig schema as tuple */ public static ResourceFieldSchema wrapAsTuple(ResourceFieldSchema subFieldSchema) throws IOException { ResourceSchema listSchema = new ResourceSchema(); listSchema.setFields(new ResourceFieldSchema[] { subFieldSchema }); ResourceFieldSchema tupleWrapper = new ResourceFieldSchema(); tupleWrapper.setType(DataType.TUPLE); tupleWrapper.setName(PIG_TUPLE_WRAPPER); tupleWrapper.setSchema(listSchema); return tupleWrapper; }
Example 5
Source File: GenRandomData.java From spork with Apache License 2.0 | 5 votes |
public static ResourceFieldSchema getFullTupTextDataBagFieldSchema() throws IOException{ ResourceFieldSchema tuplefs = getSmallBagTextTupleFieldSchema(); ResourceSchema outBagSchema = new ResourceSchema(); outBagSchema.setFields(new ResourceFieldSchema[]{tuplefs}); ResourceFieldSchema outBagfs = new ResourceFieldSchema(); outBagfs.setSchema(outBagSchema); outBagfs.setType(DataType.BAG); return outBagfs; }
Example 6
Source File: GenRandomData.java From spork with Apache License 2.0 | 5 votes |
public static ResourceFieldSchema getSmallTupDataBagFieldSchema() throws IOException { ResourceFieldSchema tuplefs = getSmallTupleFieldSchema(); ResourceSchema bagSchema = new ResourceSchema(); bagSchema.setFields(new ResourceFieldSchema[]{tuplefs}); ResourceFieldSchema bagfs = new ResourceFieldSchema(); bagfs.setSchema(bagSchema); bagfs.setType(DataType.BAG); return bagfs; }
Example 7
Source File: Loader.java From logparser with Apache License 2.0 | 5 votes |
@Override public ResourceSchema getSchema(String location, Job job) throws IOException { ResourceSchema rs = new ResourceSchema(); List<ResourceFieldSchema> fieldSchemaList = new ArrayList<>(); for (String fieldName : requestedFields) { ResourceFieldSchema rfs = new ResourceFieldSchema(); rfs.setName(fieldName); rfs.setDescription(fieldName); if(fieldName.endsWith(".*")) { rfs.setType(DataType.MAP); } else { EnumSet<Casts> casts = theInputFormat.getRecordReader().getCasts(fieldName); if (casts != null) { if (casts.contains(Casts.LONG)) { rfs.setType(DataType.LONG); } else { if (casts.contains(Casts.DOUBLE)) { rfs.setType(DataType.DOUBLE); } else { rfs.setType(DataType.CHARARRAY); } } } else { rfs.setType(DataType.BYTEARRAY); } } fieldSchemaList.add(rfs); } rs.setFields(fieldSchemaList.toArray(new ResourceFieldSchema[fieldSchemaList.size()])); return rs; }
Example 8
Source File: GenRandomData.java From spork with Apache License 2.0 | 5 votes |
public static ResourceFieldSchema getRandMapFieldSchema() throws IOException { ResourceFieldSchema bytefs = new ResourceFieldSchema(); bytefs.setType(DataType.BYTEARRAY); ResourceSchema mapSchema = new ResourceSchema(); mapSchema.setFields(new ResourceFieldSchema[]{bytefs}); ResourceFieldSchema mapfs = new ResourceFieldSchema(); mapfs.setSchema(mapSchema); mapfs.setType(DataType.MAP); return mapfs; }
Example 9
Source File: TestTextDataParser.java From spork with Apache License 2.0 | 5 votes |
public ResourceFieldSchema getBagFieldSchema() throws IOException{ ResourceFieldSchema tuplefs = getTupleFieldSchema(); ResourceSchema outBagSchema = new ResourceSchema(); outBagSchema.setFields(new ResourceFieldSchema[]{tuplefs}); ResourceFieldSchema outBagfs = new ResourceFieldSchema(); outBagfs.setSchema(outBagSchema); outBagfs.setType(DataType.BAG); return outBagfs; }
Example 10
Source File: OrcStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void checkSchema(ResourceSchema rs) throws IOException { ResourceFieldSchema fs = new ResourceFieldSchema(); fs.setType(DataType.TUPLE); fs.setSchema(rs); typeInfo = OrcUtils.getTypeInfo(fs); Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); p.setProperty(signature + SchemaSignatureSuffix, ObjectSerializer.serialize(typeInfo)); }
Example 11
Source File: JsonStorage.java From spork with Apache License 2.0 | 5 votes |
public ResourceSchema fixSchema(ResourceSchema s){ for (ResourceFieldSchema filed : s.getFields()) { if(filed.getType() == DataType.NULL) filed.setType(DataType.BYTEARRAY); } return s; }
Example 12
Source File: CqlNativeStorage.java From stratio-cassandra with Apache License 2.0 | 5 votes |
/** schema: (value, value, value) where keys are in the front. */ public ResourceSchema getSchema(String location, Job job) throws IOException { setLocation(location, job); CfInfo cfInfo = getCfInfo(loadSignature); CfDef cfDef = cfInfo.cfDef; // top-level schema, no type ResourceSchema schema = new ResourceSchema(); // get default marshallers and validators Map<MarshallerType, AbstractType> marshallers = getDefaultMarshallers(cfDef); Map<ByteBuffer, AbstractType> validators = getValidatorMap(cfDef); // will contain all fields for this schema List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>(); for (ColumnDef cdef : cfDef.column_metadata) { ResourceFieldSchema valSchema = new ResourceFieldSchema(); AbstractType validator = validators.get(cdef.name); if (validator == null) validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR); valSchema.setName(new String(cdef.getName())); valSchema.setType(getPigType(validator)); allSchemaFields.add(valSchema); } // top level schema contains everything schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()])); return schema; }
Example 13
Source File: AvroStorageUtils.java From Cubert with Apache License 2.0 | 5 votes |
/** wrap a pig schema as tuple */ public static ResourceFieldSchema wrapAsTuple(ResourceFieldSchema subFieldSchema) throws IOException { ResourceSchema listSchema = new ResourceSchema(); listSchema.setFields(new ResourceFieldSchema[] { subFieldSchema }); ResourceFieldSchema tupleWrapper = new ResourceFieldSchema(); tupleWrapper.setType(DataType.TUPLE); tupleWrapper.setName(PIG_TUPLE_WRAPPER); tupleWrapper.setSchema(listSchema); return tupleWrapper; }
Example 14
Source File: SchemaUtil.java From iceberg with Apache License 2.0 | 5 votes |
private static ResourceFieldSchema convert(Type type) throws IOException { ResourceFieldSchema result = new ResourceFieldSchema(); result.setType(convertType(type)); if (!type.isPrimitiveType()) { result.setSchema(convertComplex(type)); } return result; }
Example 15
Source File: PigTuple.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
public void setSchema(ResourceSchema schema) { schemaField = new ResourceFieldSchema(); schemaField.setType(DataType.TUPLE); try { schemaField.setSchema(schema); } catch (IOException ex) { throw new EsHadoopIllegalStateException(String.format("Cannot use schema [%s]", schema), ex); } }
Example 16
Source File: AvroSchema2Pig.java From spork with Apache License 2.0 | 4 votes |
/** * Convert a schema with field name to a pig schema */ private static ResourceFieldSchema inconvert(Schema in, String fieldName, Set<Schema> visitedRecords) throws IOException { AvroStorageLog.details("InConvert avro schema with field name " + fieldName); Schema.Type avroType = in.getType(); ResourceFieldSchema fieldSchema = new ResourceFieldSchema(); fieldSchema.setName(fieldName); if (avroType.equals(Schema.Type.RECORD)) { AvroStorageLog.details("convert to a pig tuple"); if (visitedRecords.contains(in)) { fieldSchema.setType(DataType.BYTEARRAY); } else { visitedRecords.add(in); fieldSchema.setType(DataType.TUPLE); ResourceSchema tupleSchema = new ResourceSchema(); List<Schema.Field> fields = in.getFields(); ResourceFieldSchema[] childFields = new ResourceFieldSchema[fields.size()]; int index = 0; for (Schema.Field field : fields) { childFields[index++] = inconvert(field.schema(), field.name(), visitedRecords); } tupleSchema.setFields(childFields); fieldSchema.setSchema(tupleSchema); visitedRecords.remove(in); } } else if (avroType.equals(Schema.Type.ARRAY)) { AvroStorageLog.details("convert array to a pig bag"); fieldSchema.setType(DataType.BAG); Schema elemSchema = in.getElementType(); ResourceFieldSchema subFieldSchema = inconvert(elemSchema, ARRAY_FIELD, visitedRecords); add2BagSchema(fieldSchema, subFieldSchema); } else if (avroType.equals(Schema.Type.MAP)) { AvroStorageLog.details("convert map to a pig map"); fieldSchema.setType(DataType.MAP); } else if (avroType.equals(Schema.Type.UNION)) { if (AvroStorageUtils.isAcceptableUnion(in)) { Schema acceptSchema = AvroStorageUtils.getAcceptedType(in); ResourceFieldSchema realFieldSchema = inconvert(acceptSchema, null, visitedRecords); fieldSchema.setType(realFieldSchema.getType()); fieldSchema.setSchema(realFieldSchema.getSchema()); } else throw new IOException("Do not support generic union:" + in); } else if (avroType.equals(Schema.Type.FIXED)) { fieldSchema.setType(DataType.BYTEARRAY); } else if (avroType.equals(Schema.Type.BOOLEAN)) { fieldSchema.setType(DataType.BOOLEAN); } else if (avroType.equals(Schema.Type.BYTES)) { fieldSchema.setType(DataType.BYTEARRAY); } else if (avroType.equals(Schema.Type.DOUBLE)) { fieldSchema.setType(DataType.DOUBLE); } else if (avroType.equals(Schema.Type.ENUM)) { fieldSchema.setType(DataType.CHARARRAY); } else if (avroType.equals(Schema.Type.FLOAT)) { fieldSchema.setType(DataType.FLOAT); } else if (avroType.equals(Schema.Type.INT)) { fieldSchema.setType(DataType.INTEGER); } else if (avroType.equals(Schema.Type.LONG)) { fieldSchema.setType(DataType.LONG); } else if (avroType.equals(Schema.Type.STRING)) { fieldSchema.setType(DataType.CHARARRAY); } else if (avroType.equals(Schema.Type.NULL)) { // value of NULL is always NULL fieldSchema.setType(DataType.INTEGER); } else { throw new IOException("Unsupported avro type:" + avroType); } return fieldSchema; }
Example 17
Source File: TestStore.java From spork with Apache License 2.0 | 4 votes |
@Test public void testStoreComplexDataWithNull() throws Exception { Tuple inputTuple = GenRandomData.genRandSmallBagTextTupleWithNulls(new Random(), 10, 100); inpDB = DefaultBagFactory.getInstance().newDefaultBag(); inpDB.add(inputTuple); storeAndCopyLocally(inpDB); PigStorage ps = new PigStorage("\t"); BufferedReader br = new BufferedReader(new FileReader(outputFileName)); for(String line=br.readLine();line!=null;line=br.readLine()){ System.err.println("Complex data: "); System.err.println(line); String[] flds = line.split("\t",-1); Tuple t = new DefaultTuple(); ResourceFieldSchema stringfs = new ResourceFieldSchema(); stringfs.setType(DataType.CHARARRAY); ResourceFieldSchema intfs = new ResourceFieldSchema(); intfs.setType(DataType.INTEGER); ResourceFieldSchema bytefs = new ResourceFieldSchema(); bytefs.setType(DataType.BYTEARRAY); ResourceSchema tupleSchema = new ResourceSchema(); tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, intfs}); ResourceFieldSchema tuplefs = new ResourceFieldSchema(); tuplefs.setSchema(tupleSchema); tuplefs.setType(DataType.TUPLE); ResourceSchema bagSchema = new ResourceSchema(); bagSchema.setFields(new ResourceFieldSchema[]{tuplefs}); ResourceFieldSchema bagfs = new ResourceFieldSchema(); bagfs.setSchema(bagSchema); bagfs.setType(DataType.BAG); ResourceSchema mapSchema = new ResourceSchema(); mapSchema.setFields(new ResourceFieldSchema[]{bytefs}); ResourceFieldSchema mapfs = new ResourceFieldSchema(); mapfs.setSchema(mapSchema); mapfs.setType(DataType.MAP); t.append(flds[0].compareTo("")!=0 ? ps.getLoadCaster().bytesToBag(flds[0].getBytes(), bagfs) : null); t.append(flds[1].compareTo("")!=0 ? new DataByteArray(flds[1].getBytes()) : null); t.append(flds[2].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[2].getBytes()) : null); t.append(flds[3].compareTo("")!=0 ? ps.getLoadCaster().bytesToDouble(flds[3].getBytes()) : null); t.append(flds[4].compareTo("")!=0 ? ps.getLoadCaster().bytesToFloat(flds[4].getBytes()) : null); t.append(flds[5].compareTo("")!=0 ? ps.getLoadCaster().bytesToInteger(flds[5].getBytes()) : null); t.append(flds[6].compareTo("")!=0 ? ps.getLoadCaster().bytesToLong(flds[6].getBytes()) : null); t.append(flds[7].compareTo("")!=0 ? ps.getLoadCaster().bytesToMap(flds[7].getBytes(), mapfs) : null); t.append(flds[8].compareTo("")!=0 ? ps.getLoadCaster().bytesToTuple(flds[8].getBytes(), tuplefs) : null); t.append(flds[9].compareTo("")!=0 ? ps.getLoadCaster().bytesToBoolean(flds[9].getBytes()) : null); t.append(flds[10].compareTo("")!=0 ? ps.getLoadCaster().bytesToDateTime(flds[10].getBytes()) : null); t.append(flds[11].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[10].getBytes()) : null); assertEquals(inputTuple, t); } br.close(); }
Example 18
Source File: GenRandomData.java From spork with Apache License 2.0 | 4 votes |
public static ResourceFieldSchema getSmallBagTextTupleFieldSchema() throws IOException{ ResourceFieldSchema dbafs = new ResourceFieldSchema(); dbafs.setType(DataType.BYTEARRAY); ResourceFieldSchema stringfs = new ResourceFieldSchema(); stringfs.setType(DataType.CHARARRAY); ResourceFieldSchema intfs = new ResourceFieldSchema(); intfs.setType(DataType.INTEGER); ResourceFieldSchema bagfs = getSmallTupDataBagFieldSchema(); ResourceFieldSchema floatfs = new ResourceFieldSchema(); floatfs.setType(DataType.FLOAT); ResourceFieldSchema doublefs = new ResourceFieldSchema(); doublefs.setType(DataType.DOUBLE); ResourceFieldSchema longfs = new ResourceFieldSchema(); longfs.setType(DataType.LONG); ResourceFieldSchema mapfs = new ResourceFieldSchema(); mapfs.setType(DataType.MAP); ResourceFieldSchema tuplefs = getSmallTupleFieldSchema(); ResourceFieldSchema boolfs = new ResourceFieldSchema(); boolfs.setType(DataType.BOOLEAN); ResourceFieldSchema dtfs = new ResourceFieldSchema(); dtfs.setType(DataType.DATETIME); ResourceSchema outSchema = new ResourceSchema(); outSchema.setFields(new ResourceFieldSchema[]{bagfs, dbafs, stringfs, doublefs, floatfs, intfs, longfs, mapfs, tuplefs, boolfs, dtfs}); ResourceFieldSchema outfs = new ResourceFieldSchema(); outfs.setSchema(outSchema); outfs.setType(DataType.TUPLE); return outfs; }
Example 19
Source File: SchemaUtil.java From iceberg with Apache License 2.0 | 4 votes |
private static ResourceSchema convertComplex(Type type) throws IOException { ResourceSchema result = new ResourceSchema(); switch (type.typeId()) { case STRUCT: StructType structType = type.asStructType(); List<ResourceFieldSchema> fields = Lists.newArrayList(); for (Types.NestedField f : structType.fields()) { fields.add(convert(f)); } result.setFields(fields.toArray(new ResourceFieldSchema[0])); return result; case LIST: ListType listType = type.asListType(); ResourceFieldSchema [] elementFieldSchemas = new ResourceFieldSchema[]{convert(listType.elementType())}; if (listType.elementType().isStructType()) { result.setFields(elementFieldSchemas); } else { //Wrap non-struct types in tuples ResourceSchema elementSchema = new ResourceSchema(); elementSchema.setFields(elementFieldSchemas); ResourceFieldSchema tupleSchema = new ResourceFieldSchema(); tupleSchema.setType(DataType.TUPLE); tupleSchema.setSchema(elementSchema); result.setFields(new ResourceFieldSchema[]{tupleSchema}); } return result; case MAP: MapType mapType = type.asMapType(); if (mapType.keyType().typeId() != Type.TypeID.STRING) { throw new FrontendException("Unsupported map key type: " + mapType.keyType()); } result.setFields(new ResourceFieldSchema[]{convert(mapType.valueType())}); return result; default: throw new FrontendException("Unsupported complex type: " + type); } }
Example 20
Source File: SchemaUtil.java From iceberg with Apache License 2.0 | 4 votes |
private static ResourceSchema convertComplex(Type type) throws IOException { ResourceSchema result = new ResourceSchema(); switch (type.typeId()) { case STRUCT: Types.StructType structType = type.asStructType(); List<ResourceFieldSchema> fields = Lists.newArrayList(); for (Types.NestedField f : structType.fields()) { fields.add(convert(f)); } result.setFields(fields.toArray(new ResourceFieldSchema[0])); return result; case LIST: Types.ListType listType = type.asListType(); ResourceFieldSchema [] elementFieldSchemas = new ResourceFieldSchema[]{convert(listType.elementType())}; if (listType.elementType().isStructType()) { result.setFields(elementFieldSchemas); } else { //Wrap non-struct types in tuples ResourceSchema elementSchema = new ResourceSchema(); elementSchema.setFields(elementFieldSchemas); ResourceFieldSchema tupleSchema = new ResourceFieldSchema(); tupleSchema.setType(DataType.TUPLE); tupleSchema.setSchema(elementSchema); result.setFields(new ResourceFieldSchema[]{tupleSchema}); } return result; case MAP: Types.MapType mapType = type.asMapType(); if (mapType.keyType().typeId() != Type.TypeID.STRING) { throw new FrontendException("Unsupported map key type: " + mapType.keyType()); } result.setFields(new ResourceFieldSchema[]{convert(mapType.valueType())}); return result; default: throw new FrontendException("Unsupported complex type: " + type); } }