Java Code Examples for org.apache.pig.ResourceSchema#setFields()
The following examples show how to use
org.apache.pig.ResourceSchema#setFields() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroSchema2Pig.java From spork with Apache License 2.0 | 6 votes |
/** * Convert an Avro schema to a Pig schema */ public static ResourceSchema convert(Schema schema) throws IOException { if (AvroStorageUtils.containsGenericUnion(schema)) throw new IOException ("We don't accept schema containing generic unions."); Set<Schema> visitedRecords = new HashSet<Schema>(); ResourceFieldSchema inSchema = inconvert(schema, FIELD, visitedRecords); ResourceSchema tupleSchema; if (inSchema.getType() == DataType.TUPLE) { tupleSchema = inSchema.getSchema(); } else { // other typs ResourceFieldSchema tupleWrapper = AvroStorageUtils.wrapAsTuple(inSchema); ResourceSchema topSchema = new ResourceSchema(); topSchema.setFields(new ResourceFieldSchema[] { tupleWrapper }); tupleSchema = topSchema; } return tupleSchema; }
Example 2
Source File: GenRandomData.java From spork with Apache License 2.0 | 6 votes |
public static ResourceFieldSchema getFloatDataBagFieldSchema(int column) throws IOException { ResourceFieldSchema intfs = new ResourceFieldSchema(); intfs.setType(DataType.INTEGER); ResourceSchema tupleSchema = new ResourceSchema(); ResourceFieldSchema[] fss = new ResourceFieldSchema[column]; for (int i=0;i<column;i++) { fss[i] = intfs; } tupleSchema.setFields(fss); ResourceFieldSchema tuplefs = new ResourceFieldSchema(); tuplefs.setSchema(tupleSchema); tuplefs.setType(DataType.TUPLE); ResourceSchema bagSchema = new ResourceSchema(); bagSchema.setFields(new ResourceFieldSchema[]{tuplefs}); ResourceFieldSchema bagfs = new ResourceFieldSchema(); bagfs.setSchema(bagSchema); bagfs.setType(DataType.BAG); return bagfs; }
Example 3
Source File: AegisthusLoader.java From aegisthus with Apache License 2.0 | 6 votes |
protected ResourceSchema columnSchema() throws IOException { ResourceSchema schema = new ResourceSchema(); List<ResourceFieldSchema> fields = new ArrayList<>(); fields.add(field("name", DataType.BYTEARRAY)); fields.add(field("value", DataType.BYTEARRAY)); fields.add(field("ts", DataType.LONG)); fields.add(field("status", DataType.CHARARRAY)); fields.add(field("ttl", DataType.LONG)); ResourceSchema tuple = new ResourceSchema(); tuple.setFields(fields.toArray(new ResourceFieldSchema[0])); ResourceFieldSchema fs = new ResourceFieldSchema(); fs.setName("column"); fs.setType(DataType.TUPLE); fs.setSchema(tuple); fields.clear(); fields.add(fs); schema.setFields(fields.toArray(new ResourceFieldSchema[0])); return schema; }
Example 4
Source File: PhoenixPigSchemaUtil.java From phoenix with Apache License 2.0 | 5 votes |
public static ResourceSchema getResourceSchema(final Configuration configuration) throws IOException { final ResourceSchema schema = new ResourceSchema(); try { List<ColumnInfo> columns = null; final SchemaType schemaType = PhoenixConfigurationUtil.getSchemaType(configuration); if(SchemaType.QUERY.equals(schemaType)) { final String sqlQuery = PhoenixConfigurationUtil.getSelectStatement(configuration); Preconditions.checkNotNull(sqlQuery, "No Sql Query exists within the configuration"); final SqlQueryToColumnInfoFunction function = new SqlQueryToColumnInfoFunction(configuration); columns = function.apply(sqlQuery); } else { columns = PhoenixConfigurationUtil.getSelectColumnMetadataList(configuration); } ResourceFieldSchema fields[] = new ResourceFieldSchema[columns.size()]; int i = 0; for(ColumnInfo cinfo : columns) { int sqlType = cinfo.getSqlType(); PDataType phoenixDataType = PDataType.fromTypeId(sqlType); byte pigType = TypeUtil.getPigDataTypeForPhoenixType(phoenixDataType); ResourceFieldSchema field = new ResourceFieldSchema(); field.setType(pigType).setName(cinfo.getDisplayName()); fields[i++] = field; } schema.setFields(fields); } catch(SQLException sqle) { LOG.error(String.format("Error: SQLException [%s] ",sqle.getMessage())); throw new IOException(sqle); } return schema; }
Example 5
Source File: AvroSchema2Pig.java From Cubert with Apache License 2.0 | 5 votes |
/** * Convert an Avro schema to a Pig schema */ public static ResourceSchema convert(Schema schema) throws IOException { if (AvroStorageUtils.containsGenericUnion(schema)) throw new IOException("We don't accept schema containing generic unions."); Set<Schema> visitedRecords = new HashSet<Schema>(); ResourceFieldSchema inSchema = inconvert(schema, FIELD, visitedRecords); ResourceSchema tupleSchema; if (inSchema.getType() == DataType.TUPLE) { tupleSchema = inSchema.getSchema(); } else { // other typs ResourceFieldSchema tupleWrapper = AvroStorageUtils.wrapAsTuple(inSchema); ResourceSchema topSchema = new ResourceSchema(); topSchema.setFields(new ResourceFieldSchema[] { tupleWrapper }); tupleSchema = topSchema; } return tupleSchema; }
Example 6
Source File: AegisthusLoader.java From aegisthus with Apache License 2.0 | 5 votes |
@Override public ResourceSchema getSchema(String location, Job job) throws IOException { ResourceSchema resourceSchema = new ResourceSchema(); List<ResourceFieldSchema> fields = new ArrayList<>(); fields.add(field("key", DataType.BYTEARRAY)); fields.add(field("deletedat", DataType.LONG)); fields.add(subfield("map_columns", DataType.MAP, columnSchema())); fields.add(subfield("bag_columns", DataType.BAG, columnSchema())); resourceSchema.setFields(fields.toArray(new ResourceFieldSchema[0])); return resourceSchema; }
Example 7
Source File: AvroStorageUtils.java From Cubert with Apache License 2.0 | 5 votes |
/** wrap a pig schema as tuple */ public static ResourceFieldSchema wrapAsTuple(ResourceFieldSchema subFieldSchema) throws IOException { ResourceSchema listSchema = new ResourceSchema(); listSchema.setFields(new ResourceFieldSchema[] { subFieldSchema }); ResourceFieldSchema tupleWrapper = new ResourceFieldSchema(); tupleWrapper.setType(DataType.TUPLE); tupleWrapper.setName(PIG_TUPLE_WRAPPER); tupleWrapper.setSchema(listSchema); return tupleWrapper; }
Example 8
Source File: CqlNativeStorage.java From stratio-cassandra with Apache License 2.0 | 5 votes |
/** schema: (value, value, value) where keys are in the front. */ public ResourceSchema getSchema(String location, Job job) throws IOException { setLocation(location, job); CfInfo cfInfo = getCfInfo(loadSignature); CfDef cfDef = cfInfo.cfDef; // top-level schema, no type ResourceSchema schema = new ResourceSchema(); // get default marshallers and validators Map<MarshallerType, AbstractType> marshallers = getDefaultMarshallers(cfDef); Map<ByteBuffer, AbstractType> validators = getValidatorMap(cfDef); // will contain all fields for this schema List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>(); for (ColumnDef cdef : cfDef.column_metadata) { ResourceFieldSchema valSchema = new ResourceFieldSchema(); AbstractType validator = validators.get(cdef.name); if (validator == null) validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR); valSchema.setName(new String(cdef.getName())); valSchema.setType(getPigType(validator)); allSchemaFields.add(valSchema); } // top level schema contains everything schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()])); return schema; }
Example 9
Source File: Utils.java From spork with Apache License 2.0 | 5 votes |
/** * This method adds FieldSchema of 'input source tag/path' as the first * field. This will be called only when PigStorage is invoked with * '-tagFile' or '-tagPath' option and the schema file is present to be * loaded. * * @param schema * @param fieldName * @return ResourceSchema */ public static ResourceSchema getSchemaWithInputSourceTag(ResourceSchema schema, String fieldName) { ResourceFieldSchema[] fieldSchemas = schema.getFields(); ResourceFieldSchema sourceTagSchema = new ResourceFieldSchema(new FieldSchema(fieldName, DataType.CHARARRAY)); ResourceFieldSchema[] fieldSchemasWithSourceTag = new ResourceFieldSchema[fieldSchemas.length + 1]; fieldSchemasWithSourceTag[0] = sourceTagSchema; for(int j = 0; j < fieldSchemas.length; j++) { fieldSchemasWithSourceTag[j + 1] = fieldSchemas[j]; } return schema.setFields(fieldSchemasWithSourceTag); }
Example 10
Source File: TestTextDataParser.java From spork with Apache License 2.0 | 5 votes |
ResourceFieldSchema getTupleFieldSchema() throws IOException { ResourceFieldSchema stringfs = new ResourceFieldSchema(); stringfs.setType(DataType.CHARARRAY); ResourceFieldSchema intfs = new ResourceFieldSchema(); intfs.setType(DataType.INTEGER); ResourceSchema tupleSchema = new ResourceSchema(); tupleSchema.setFields(new ResourceFieldSchema[]{intfs, stringfs}); ResourceFieldSchema tuplefs = new ResourceFieldSchema(); tuplefs.setSchema(tupleSchema); tuplefs.setType(DataType.TUPLE); return tuplefs; }
Example 11
Source File: AvroStorageUtils.java From spork with Apache License 2.0 | 5 votes |
/** wrap a pig schema as tuple */ public static ResourceFieldSchema wrapAsTuple(ResourceFieldSchema subFieldSchema) throws IOException { ResourceSchema listSchema = new ResourceSchema(); listSchema.setFields(new ResourceFieldSchema[] { subFieldSchema }); ResourceFieldSchema tupleWrapper = new ResourceFieldSchema(); tupleWrapper.setType(DataType.TUPLE); tupleWrapper.setName(PIG_TUPLE_WRAPPER); tupleWrapper.setSchema(listSchema); return tupleWrapper; }
Example 12
Source File: AvroSchema2Pig.java From spork with Apache License 2.0 | 5 votes |
/** * Add a field schema to a bag schema */ static protected void add2BagSchema(ResourceFieldSchema fieldSchema, ResourceFieldSchema subFieldSchema) throws IOException { ResourceFieldSchema wrapped = (subFieldSchema.getType() == DataType.TUPLE) ? subFieldSchema : AvroStorageUtils.wrapAsTuple(subFieldSchema); ResourceSchema listSchema = new ResourceSchema(); listSchema.setFields(new ResourceFieldSchema[] { wrapped }); fieldSchema.setSchema(listSchema); }
Example 13
Source File: GenRandomData.java From spork with Apache License 2.0 | 5 votes |
public static ResourceFieldSchema getSmallTupDataBagFieldSchema() throws IOException { ResourceFieldSchema tuplefs = getSmallTupleFieldSchema(); ResourceSchema bagSchema = new ResourceSchema(); bagSchema.setFields(new ResourceFieldSchema[]{tuplefs}); ResourceFieldSchema bagfs = new ResourceFieldSchema(); bagfs.setSchema(bagSchema); bagfs.setType(DataType.BAG); return bagfs; }
Example 14
Source File: TestStore.java From spork with Apache License 2.0 | 4 votes |
@Test public void testStoreComplexDataWithNull() throws Exception { Tuple inputTuple = GenRandomData.genRandSmallBagTextTupleWithNulls(new Random(), 10, 100); inpDB = DefaultBagFactory.getInstance().newDefaultBag(); inpDB.add(inputTuple); storeAndCopyLocally(inpDB); PigStorage ps = new PigStorage("\t"); BufferedReader br = new BufferedReader(new FileReader(outputFileName)); for(String line=br.readLine();line!=null;line=br.readLine()){ System.err.println("Complex data: "); System.err.println(line); String[] flds = line.split("\t",-1); Tuple t = new DefaultTuple(); ResourceFieldSchema stringfs = new ResourceFieldSchema(); stringfs.setType(DataType.CHARARRAY); ResourceFieldSchema intfs = new ResourceFieldSchema(); intfs.setType(DataType.INTEGER); ResourceFieldSchema bytefs = new ResourceFieldSchema(); bytefs.setType(DataType.BYTEARRAY); ResourceSchema tupleSchema = new ResourceSchema(); tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, intfs}); ResourceFieldSchema tuplefs = new ResourceFieldSchema(); tuplefs.setSchema(tupleSchema); tuplefs.setType(DataType.TUPLE); ResourceSchema bagSchema = new ResourceSchema(); bagSchema.setFields(new ResourceFieldSchema[]{tuplefs}); ResourceFieldSchema bagfs = new ResourceFieldSchema(); bagfs.setSchema(bagSchema); bagfs.setType(DataType.BAG); ResourceSchema mapSchema = new ResourceSchema(); mapSchema.setFields(new ResourceFieldSchema[]{bytefs}); ResourceFieldSchema mapfs = new ResourceFieldSchema(); mapfs.setSchema(mapSchema); mapfs.setType(DataType.MAP); t.append(flds[0].compareTo("")!=0 ? ps.getLoadCaster().bytesToBag(flds[0].getBytes(), bagfs) : null); t.append(flds[1].compareTo("")!=0 ? new DataByteArray(flds[1].getBytes()) : null); t.append(flds[2].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[2].getBytes()) : null); t.append(flds[3].compareTo("")!=0 ? ps.getLoadCaster().bytesToDouble(flds[3].getBytes()) : null); t.append(flds[4].compareTo("")!=0 ? ps.getLoadCaster().bytesToFloat(flds[4].getBytes()) : null); t.append(flds[5].compareTo("")!=0 ? ps.getLoadCaster().bytesToInteger(flds[5].getBytes()) : null); t.append(flds[6].compareTo("")!=0 ? ps.getLoadCaster().bytesToLong(flds[6].getBytes()) : null); t.append(flds[7].compareTo("")!=0 ? ps.getLoadCaster().bytesToMap(flds[7].getBytes(), mapfs) : null); t.append(flds[8].compareTo("")!=0 ? ps.getLoadCaster().bytesToTuple(flds[8].getBytes(), tuplefs) : null); t.append(flds[9].compareTo("")!=0 ? ps.getLoadCaster().bytesToBoolean(flds[9].getBytes()) : null); t.append(flds[10].compareTo("")!=0 ? ps.getLoadCaster().bytesToDateTime(flds[10].getBytes()) : null); t.append(flds[11].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[10].getBytes()) : null); assertEquals(inputTuple, t); } br.close(); }
Example 15
Source File: SchemaUtil.java From iceberg with Apache License 2.0 | 4 votes |
public static ResourceSchema convert(Schema icebergSchema) throws IOException { ResourceSchema result = new ResourceSchema(); result.setFields(convertFields(icebergSchema.columns())); return result; }
Example 16
Source File: AllLoader.java From spork with Apache License 2.0 | 4 votes |
@Override public ResourceSchema getSchema(String location, Job job) throws IOException { if (schema == null) { ResourceSchema foundSchema = jsonMetadata.getSchema(location, job); // determine schema from files in location if (foundSchema == null) { foundSchema = getSchemaFromLoadFunc(location, job); } // only add the partition keys if the schema is not null // we use the partitionKeySet to only set partition keys once. if (!(partitionKeysSet || foundSchema == null)) { String[] keys = getPartitionColumns(location, job); if (!(keys == null || keys.length == 0)) { // re-edit the pigSchema to contain the new partition keys. ResourceFieldSchema[] fields = foundSchema.getFields(); LOG.debug("Schema: " + Arrays.toString(fields)); ResourceFieldSchema[] newFields = Arrays.copyOf(fields, fields.length + keys.length); int index = fields.length; for (String key : keys) { newFields[index++] = new ResourceFieldSchema( new FieldSchema(key, DataType.CHARARRAY)); } foundSchema.setFields(newFields); LOG.debug("Added partition fields: " + keys + " to loader schema"); LOG.debug("Schema is: " + Arrays.toString(newFields)); } partitionKeysSet = true; } schema = foundSchema; } return schema; }
Example 17
Source File: AvroSchema2Pig.java From spork with Apache License 2.0 | 4 votes |
/** * Convert a schema with field name to a pig schema */ private static ResourceFieldSchema inconvert(Schema in, String fieldName, Set<Schema> visitedRecords) throws IOException { AvroStorageLog.details("InConvert avro schema with field name " + fieldName); Schema.Type avroType = in.getType(); ResourceFieldSchema fieldSchema = new ResourceFieldSchema(); fieldSchema.setName(fieldName); if (avroType.equals(Schema.Type.RECORD)) { AvroStorageLog.details("convert to a pig tuple"); if (visitedRecords.contains(in)) { fieldSchema.setType(DataType.BYTEARRAY); } else { visitedRecords.add(in); fieldSchema.setType(DataType.TUPLE); ResourceSchema tupleSchema = new ResourceSchema(); List<Schema.Field> fields = in.getFields(); ResourceFieldSchema[] childFields = new ResourceFieldSchema[fields.size()]; int index = 0; for (Schema.Field field : fields) { childFields[index++] = inconvert(field.schema(), field.name(), visitedRecords); } tupleSchema.setFields(childFields); fieldSchema.setSchema(tupleSchema); visitedRecords.remove(in); } } else if (avroType.equals(Schema.Type.ARRAY)) { AvroStorageLog.details("convert array to a pig bag"); fieldSchema.setType(DataType.BAG); Schema elemSchema = in.getElementType(); ResourceFieldSchema subFieldSchema = inconvert(elemSchema, ARRAY_FIELD, visitedRecords); add2BagSchema(fieldSchema, subFieldSchema); } else if (avroType.equals(Schema.Type.MAP)) { AvroStorageLog.details("convert map to a pig map"); fieldSchema.setType(DataType.MAP); } else if (avroType.equals(Schema.Type.UNION)) { if (AvroStorageUtils.isAcceptableUnion(in)) { Schema acceptSchema = AvroStorageUtils.getAcceptedType(in); ResourceFieldSchema realFieldSchema = inconvert(acceptSchema, null, visitedRecords); fieldSchema.setType(realFieldSchema.getType()); fieldSchema.setSchema(realFieldSchema.getSchema()); } else throw new IOException("Do not support generic union:" + in); } else if (avroType.equals(Schema.Type.FIXED)) { fieldSchema.setType(DataType.BYTEARRAY); } else if (avroType.equals(Schema.Type.BOOLEAN)) { fieldSchema.setType(DataType.BOOLEAN); } else if (avroType.equals(Schema.Type.BYTES)) { fieldSchema.setType(DataType.BYTEARRAY); } else if (avroType.equals(Schema.Type.DOUBLE)) { fieldSchema.setType(DataType.DOUBLE); } else if (avroType.equals(Schema.Type.ENUM)) { fieldSchema.setType(DataType.CHARARRAY); } else if (avroType.equals(Schema.Type.FLOAT)) { fieldSchema.setType(DataType.FLOAT); } else if (avroType.equals(Schema.Type.INT)) { fieldSchema.setType(DataType.INTEGER); } else if (avroType.equals(Schema.Type.LONG)) { fieldSchema.setType(DataType.LONG); } else if (avroType.equals(Schema.Type.STRING)) { fieldSchema.setType(DataType.CHARARRAY); } else if (avroType.equals(Schema.Type.NULL)) { // value of NULL is always NULL fieldSchema.setType(DataType.INTEGER); } else { throw new IOException("Unsupported avro type:" + avroType); } return fieldSchema; }
Example 18
Source File: SchemaUtil.java From iceberg with Apache License 2.0 | 4 votes |
private static ResourceSchema convertComplex(Type type) throws IOException { ResourceSchema result = new ResourceSchema(); switch (type.typeId()) { case STRUCT: StructType structType = type.asStructType(); List<ResourceFieldSchema> fields = Lists.newArrayList(); for (Types.NestedField f : structType.fields()) { fields.add(convert(f)); } result.setFields(fields.toArray(new ResourceFieldSchema[0])); return result; case LIST: ListType listType = type.asListType(); ResourceFieldSchema [] elementFieldSchemas = new ResourceFieldSchema[]{convert(listType.elementType())}; if (listType.elementType().isStructType()) { result.setFields(elementFieldSchemas); } else { //Wrap non-struct types in tuples ResourceSchema elementSchema = new ResourceSchema(); elementSchema.setFields(elementFieldSchemas); ResourceFieldSchema tupleSchema = new ResourceFieldSchema(); tupleSchema.setType(DataType.TUPLE); tupleSchema.setSchema(elementSchema); result.setFields(new ResourceFieldSchema[]{tupleSchema}); } return result; case MAP: MapType mapType = type.asMapType(); if (mapType.keyType().typeId() != Type.TypeID.STRING) { throw new FrontendException("Unsupported map key type: " + mapType.keyType()); } result.setFields(new ResourceFieldSchema[]{convert(mapType.valueType())}); return result; default: throw new FrontendException("Unsupported complex type: " + type); } }
Example 19
Source File: SchemaUtil.java From iceberg with Apache License 2.0 | 4 votes |
public static ResourceSchema convert(Schema icebergSchema) throws IOException { ResourceSchema result = new ResourceSchema(); result.setFields(convertFields(icebergSchema.columns())); return result; }
Example 20
Source File: SchemaUtil.java From iceberg with Apache License 2.0 | 4 votes |
private static ResourceSchema convertComplex(Type type) throws IOException { ResourceSchema result = new ResourceSchema(); switch (type.typeId()) { case STRUCT: Types.StructType structType = type.asStructType(); List<ResourceFieldSchema> fields = Lists.newArrayList(); for (Types.NestedField f : structType.fields()) { fields.add(convert(f)); } result.setFields(fields.toArray(new ResourceFieldSchema[0])); return result; case LIST: Types.ListType listType = type.asListType(); ResourceFieldSchema [] elementFieldSchemas = new ResourceFieldSchema[]{convert(listType.elementType())}; if (listType.elementType().isStructType()) { result.setFields(elementFieldSchemas); } else { //Wrap non-struct types in tuples ResourceSchema elementSchema = new ResourceSchema(); elementSchema.setFields(elementFieldSchemas); ResourceFieldSchema tupleSchema = new ResourceFieldSchema(); tupleSchema.setType(DataType.TUPLE); tupleSchema.setSchema(elementSchema); result.setFields(new ResourceFieldSchema[]{tupleSchema}); } return result; case MAP: Types.MapType mapType = type.asMapType(); if (mapType.keyType().typeId() != Type.TypeID.STRING) { throw new FrontendException("Unsupported map key type: " + mapType.keyType()); } result.setFields(new ResourceFieldSchema[]{convert(mapType.valueType())}); return result; default: throw new FrontendException("Unsupported complex type: " + type); } }