Java Code Examples for org.apache.pig.ResourceSchema.ResourceFieldSchema#setType()

The following examples show how to use org.apache.pig.ResourceSchema.ResourceFieldSchema#setType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GenRandomData.java    From spork with Apache License 2.0 6 votes vote down vote up
public static ResourceFieldSchema getFloatDataBagFieldSchema(int column) throws IOException {
    ResourceFieldSchema intfs = new ResourceFieldSchema();
    intfs.setType(DataType.INTEGER);
    
    ResourceSchema tupleSchema = new ResourceSchema();
    ResourceFieldSchema[] fss = new ResourceFieldSchema[column];
    for (int i=0;i<column;i++) {
        fss[i] = intfs;
    }
    tupleSchema.setFields(fss);
    ResourceFieldSchema tuplefs = new ResourceFieldSchema();
    tuplefs.setSchema(tupleSchema);
    tuplefs.setType(DataType.TUPLE);
    
    ResourceSchema bagSchema = new ResourceSchema();
    bagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
    ResourceFieldSchema bagfs = new ResourceFieldSchema();
    bagfs.setSchema(bagSchema);
    bagfs.setType(DataType.BAG);
    
    return bagfs;
}
 
Example 2
Source File: GenRandomData.java    From spork with Apache License 2.0 6 votes vote down vote up
public static ResourceFieldSchema getMixedTupleToConvertFieldSchema() throws IOException {
    ResourceFieldSchema stringfs = new ResourceFieldSchema();
    stringfs.setType(DataType.CHARARRAY);
    ResourceFieldSchema intfs = new ResourceFieldSchema();
    intfs.setType(DataType.INTEGER);
    ResourceFieldSchema longfs = new ResourceFieldSchema();
    longfs.setType(DataType.LONG);
    ResourceFieldSchema floatfs = new ResourceFieldSchema();
    floatfs.setType(DataType.FLOAT);
    ResourceFieldSchema doublefs = new ResourceFieldSchema();
    doublefs.setType(DataType.DOUBLE);
    ResourceFieldSchema boolfs = new ResourceFieldSchema();
    boolfs.setType(DataType.BOOLEAN);
    ResourceFieldSchema dtfs = new ResourceFieldSchema();
    dtfs.setType(DataType.DATETIME);
    
    ResourceSchema tupleSchema = new ResourceSchema();
    tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, longfs, intfs, doublefs, floatfs, stringfs, intfs, doublefs, floatfs, boolfs, dtfs});
    ResourceFieldSchema tuplefs = new ResourceFieldSchema();
    tuplefs.setSchema(tupleSchema);
    tuplefs.setType(DataType.TUPLE);
    
    return tuplefs;
}
 
Example 3
Source File: AegisthusLoader.java    From aegisthus with Apache License 2.0 6 votes vote down vote up
protected ResourceSchema columnSchema() throws IOException {
	ResourceSchema schema = new ResourceSchema();
	List<ResourceFieldSchema> fields = new ArrayList<>();

	fields.add(field("name", DataType.BYTEARRAY));
	fields.add(field("value", DataType.BYTEARRAY));
	fields.add(field("ts", DataType.LONG));
	fields.add(field("status", DataType.CHARARRAY));
	fields.add(field("ttl", DataType.LONG));

	ResourceSchema tuple = new ResourceSchema();
	tuple.setFields(fields.toArray(new ResourceFieldSchema[0]));

	ResourceFieldSchema fs = new ResourceFieldSchema();
	fs.setName("column");
	fs.setType(DataType.TUPLE);

	fs.setSchema(tuple);
	fields.clear();
	fields.add(fs);
	schema.setFields(fields.toArray(new ResourceFieldSchema[0]));

	return schema;
}
 
Example 4
Source File: AvroStorageUtils.java    From spork with Apache License 2.0 5 votes vote down vote up
/** wrap a pig schema as tuple */
public static ResourceFieldSchema wrapAsTuple(ResourceFieldSchema subFieldSchema) throws IOException {
    ResourceSchema listSchema = new ResourceSchema();
    listSchema.setFields(new ResourceFieldSchema[] { subFieldSchema });

    ResourceFieldSchema tupleWrapper = new ResourceFieldSchema();
    tupleWrapper.setType(DataType.TUPLE);
    tupleWrapper.setName(PIG_TUPLE_WRAPPER);
    tupleWrapper.setSchema(listSchema);

    return tupleWrapper;
}
 
Example 5
Source File: GenRandomData.java    From spork with Apache License 2.0 5 votes vote down vote up
public static ResourceFieldSchema getFullTupTextDataBagFieldSchema() throws IOException{
    ResourceFieldSchema tuplefs = getSmallBagTextTupleFieldSchema();
    
    ResourceSchema outBagSchema = new ResourceSchema();
    outBagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
    ResourceFieldSchema outBagfs = new ResourceFieldSchema();
    outBagfs.setSchema(outBagSchema);
    outBagfs.setType(DataType.BAG);
    
    return outBagfs;
}
 
Example 6
Source File: GenRandomData.java    From spork with Apache License 2.0 5 votes vote down vote up
public static ResourceFieldSchema getSmallTupDataBagFieldSchema() throws IOException {
    ResourceFieldSchema tuplefs = getSmallTupleFieldSchema();
    
    ResourceSchema bagSchema = new ResourceSchema();
    bagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
    ResourceFieldSchema bagfs = new ResourceFieldSchema();
    bagfs.setSchema(bagSchema);
    bagfs.setType(DataType.BAG);
    
    return bagfs;
}
 
Example 7
Source File: Loader.java    From logparser with Apache License 2.0 5 votes vote down vote up
@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
    ResourceSchema rs = new ResourceSchema();
    List<ResourceFieldSchema> fieldSchemaList = new ArrayList<>();

    for (String fieldName : requestedFields) {
        ResourceFieldSchema rfs = new ResourceFieldSchema();
        rfs.setName(fieldName);
        rfs.setDescription(fieldName);

        if(fieldName.endsWith(".*")) {
            rfs.setType(DataType.MAP);
        } else {
            EnumSet<Casts> casts = theInputFormat.getRecordReader().getCasts(fieldName);
            if (casts != null) {
                if (casts.contains(Casts.LONG)) {
                    rfs.setType(DataType.LONG);
                } else {
                    if (casts.contains(Casts.DOUBLE)) {
                        rfs.setType(DataType.DOUBLE);
                    } else {
                        rfs.setType(DataType.CHARARRAY);
                    }
                }
            } else {
                rfs.setType(DataType.BYTEARRAY);
            }
        }
        fieldSchemaList.add(rfs);
    }

    rs.setFields(fieldSchemaList.toArray(new ResourceFieldSchema[fieldSchemaList.size()]));
    return rs;
}
 
Example 8
Source File: GenRandomData.java    From spork with Apache License 2.0 5 votes vote down vote up
public static ResourceFieldSchema getRandMapFieldSchema() throws IOException {
    ResourceFieldSchema bytefs = new ResourceFieldSchema();
    bytefs.setType(DataType.BYTEARRAY);
    ResourceSchema mapSchema = new ResourceSchema();
    mapSchema.setFields(new ResourceFieldSchema[]{bytefs});
    ResourceFieldSchema mapfs = new ResourceFieldSchema();
    mapfs.setSchema(mapSchema);
    mapfs.setType(DataType.MAP);

    return mapfs;
}
 
Example 9
Source File: TestTextDataParser.java    From spork with Apache License 2.0 5 votes vote down vote up
public ResourceFieldSchema getBagFieldSchema() throws IOException{
    ResourceFieldSchema tuplefs = getTupleFieldSchema();

    ResourceSchema outBagSchema = new ResourceSchema();
    outBagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
    ResourceFieldSchema outBagfs = new ResourceFieldSchema();
    outBagfs.setSchema(outBagSchema);
    outBagfs.setType(DataType.BAG);

    return outBagfs;
}
 
Example 10
Source File: OrcStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void checkSchema(ResourceSchema rs) throws IOException {
    ResourceFieldSchema fs = new ResourceFieldSchema();
    fs.setType(DataType.TUPLE);
    fs.setSchema(rs);
    typeInfo = OrcUtils.getTypeInfo(fs);
    Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
    p.setProperty(signature + SchemaSignatureSuffix, ObjectSerializer.serialize(typeInfo));
}
 
Example 11
Source File: JsonStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
public ResourceSchema fixSchema(ResourceSchema s){
  for (ResourceFieldSchema filed : s.getFields()) {
    if(filed.getType() == DataType.NULL)
      filed.setType(DataType.BYTEARRAY);
  }
  return s;
}
 
Example 12
Source File: CqlNativeStorage.java    From stratio-cassandra with Apache License 2.0 5 votes vote down vote up
/** schema: (value, value, value) where keys are in the front. */
public ResourceSchema getSchema(String location, Job job) throws IOException
{
    setLocation(location, job);
    CfInfo cfInfo = getCfInfo(loadSignature);
    CfDef cfDef = cfInfo.cfDef;
    // top-level schema, no type
    ResourceSchema schema = new ResourceSchema();

    // get default marshallers and validators
    Map<MarshallerType, AbstractType> marshallers = getDefaultMarshallers(cfDef);
    Map<ByteBuffer, AbstractType> validators = getValidatorMap(cfDef);

    // will contain all fields for this schema
    List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>();

    for (ColumnDef cdef : cfDef.column_metadata)
    {
        ResourceFieldSchema valSchema = new ResourceFieldSchema();
        AbstractType validator = validators.get(cdef.name);
        if (validator == null)
            validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR);
        valSchema.setName(new String(cdef.getName()));
        valSchema.setType(getPigType(validator));
        allSchemaFields.add(valSchema);
    }

    // top level schema contains everything
    schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()]));
    return schema;
}
 
Example 13
Source File: AvroStorageUtils.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/** wrap a pig schema as tuple */
public static ResourceFieldSchema wrapAsTuple(ResourceFieldSchema subFieldSchema) throws IOException {
    ResourceSchema listSchema = new ResourceSchema();
    listSchema.setFields(new ResourceFieldSchema[] { subFieldSchema });

    ResourceFieldSchema tupleWrapper = new ResourceFieldSchema();
    tupleWrapper.setType(DataType.TUPLE);
    tupleWrapper.setName(PIG_TUPLE_WRAPPER);
    tupleWrapper.setSchema(listSchema);

    return tupleWrapper;
}
 
Example 14
Source File: SchemaUtil.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static ResourceFieldSchema convert(Type type) throws IOException {
  ResourceFieldSchema result = new ResourceFieldSchema();
  result.setType(convertType(type));

  if (!type.isPrimitiveType()) {
    result.setSchema(convertComplex(type));
  }

  return result;
}
 
Example 15
Source File: PigTuple.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
public void setSchema(ResourceSchema schema) {
    schemaField = new ResourceFieldSchema();
    schemaField.setType(DataType.TUPLE);
    try {
        schemaField.setSchema(schema);
    } catch (IOException ex) {
        throw new EsHadoopIllegalStateException(String.format("Cannot use schema [%s]", schema), ex);
    }
}
 
Example 16
Source File: AvroSchema2Pig.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Convert a schema with field name to a pig schema
 */
 private static ResourceFieldSchema inconvert(Schema in, String fieldName, Set<Schema> visitedRecords)
         throws IOException {

    AvroStorageLog.details("InConvert avro schema with field name " + fieldName);

    Schema.Type avroType = in.getType();
    ResourceFieldSchema fieldSchema = new ResourceFieldSchema();
    fieldSchema.setName(fieldName);

    if (avroType.equals(Schema.Type.RECORD)) {

        AvroStorageLog.details("convert to a pig tuple");

        if (visitedRecords.contains(in)) {
            fieldSchema.setType(DataType.BYTEARRAY);
        } else {
            visitedRecords.add(in);
            fieldSchema.setType(DataType.TUPLE);
            ResourceSchema tupleSchema = new ResourceSchema();
            List<Schema.Field> fields = in.getFields();
            ResourceFieldSchema[] childFields = new ResourceFieldSchema[fields.size()];
            int index = 0;
            for (Schema.Field field : fields) {
                childFields[index++] = inconvert(field.schema(), field.name(), visitedRecords);
            }

            tupleSchema.setFields(childFields);
            fieldSchema.setSchema(tupleSchema);
            visitedRecords.remove(in);
        }

    } else if (avroType.equals(Schema.Type.ARRAY)) {

        AvroStorageLog.details("convert array to a pig bag");
        fieldSchema.setType(DataType.BAG);
        Schema elemSchema = in.getElementType();
        ResourceFieldSchema subFieldSchema = inconvert(elemSchema, ARRAY_FIELD, visitedRecords);
        add2BagSchema(fieldSchema, subFieldSchema);

    } else if (avroType.equals(Schema.Type.MAP)) {

        AvroStorageLog.details("convert map to a pig map");
        fieldSchema.setType(DataType.MAP);

    } else if (avroType.equals(Schema.Type.UNION)) {

        if (AvroStorageUtils.isAcceptableUnion(in)) {
            Schema acceptSchema = AvroStorageUtils.getAcceptedType(in);
            ResourceFieldSchema realFieldSchema = inconvert(acceptSchema, null, visitedRecords);
            fieldSchema.setType(realFieldSchema.getType());
            fieldSchema.setSchema(realFieldSchema.getSchema());
        } else
            throw new IOException("Do not support generic union:" + in);

    } else if (avroType.equals(Schema.Type.FIXED)) {
         fieldSchema.setType(DataType.BYTEARRAY);
    } else if (avroType.equals(Schema.Type.BOOLEAN)) {
        fieldSchema.setType(DataType.BOOLEAN);
    } else if (avroType.equals(Schema.Type.BYTES)) {
        fieldSchema.setType(DataType.BYTEARRAY);
    } else if (avroType.equals(Schema.Type.DOUBLE)) {
        fieldSchema.setType(DataType.DOUBLE);
    } else if (avroType.equals(Schema.Type.ENUM)) {
        fieldSchema.setType(DataType.CHARARRAY);
    } else if (avroType.equals(Schema.Type.FLOAT)) {
        fieldSchema.setType(DataType.FLOAT);
    } else if (avroType.equals(Schema.Type.INT)) {
        fieldSchema.setType(DataType.INTEGER);
    } else if (avroType.equals(Schema.Type.LONG)) {
        fieldSchema.setType(DataType.LONG);
    } else if (avroType.equals(Schema.Type.STRING)) {
        fieldSchema.setType(DataType.CHARARRAY);
    } else if (avroType.equals(Schema.Type.NULL)) {
        // value of NULL is always NULL
        fieldSchema.setType(DataType.INTEGER);
    } else {
        throw new IOException("Unsupported avro type:" + avroType);
    }
    return fieldSchema;
}
 
Example 17
Source File: TestStore.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testStoreComplexDataWithNull() throws Exception {
    Tuple inputTuple = GenRandomData.genRandSmallBagTextTupleWithNulls(new Random(), 10, 100);
    inpDB = DefaultBagFactory.getInstance().newDefaultBag();
    inpDB.add(inputTuple);
    storeAndCopyLocally(inpDB);
    PigStorage ps = new PigStorage("\t");
    BufferedReader br = new BufferedReader(new FileReader(outputFileName));
    for(String line=br.readLine();line!=null;line=br.readLine()){
        System.err.println("Complex data: ");
        System.err.println(line);
        String[] flds = line.split("\t",-1);
        Tuple t = new DefaultTuple();

        ResourceFieldSchema stringfs = new ResourceFieldSchema();
        stringfs.setType(DataType.CHARARRAY);
        ResourceFieldSchema intfs = new ResourceFieldSchema();
        intfs.setType(DataType.INTEGER);
        ResourceFieldSchema bytefs = new ResourceFieldSchema();
        bytefs.setType(DataType.BYTEARRAY);

        ResourceSchema tupleSchema = new ResourceSchema();
        tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, intfs});
        ResourceFieldSchema tuplefs = new ResourceFieldSchema();
        tuplefs.setSchema(tupleSchema);
        tuplefs.setType(DataType.TUPLE);

        ResourceSchema bagSchema = new ResourceSchema();
        bagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
        ResourceFieldSchema bagfs = new ResourceFieldSchema();
        bagfs.setSchema(bagSchema);
        bagfs.setType(DataType.BAG);

        ResourceSchema mapSchema = new ResourceSchema();
        mapSchema.setFields(new ResourceFieldSchema[]{bytefs});
        ResourceFieldSchema mapfs = new ResourceFieldSchema();
        mapfs.setSchema(mapSchema);
        mapfs.setType(DataType.MAP);

        t.append(flds[0].compareTo("")!=0 ? ps.getLoadCaster().bytesToBag(flds[0].getBytes(), bagfs) : null);
        t.append(flds[1].compareTo("")!=0 ? new DataByteArray(flds[1].getBytes()) : null);
        t.append(flds[2].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[2].getBytes()) : null);
        t.append(flds[3].compareTo("")!=0 ? ps.getLoadCaster().bytesToDouble(flds[3].getBytes()) : null);
        t.append(flds[4].compareTo("")!=0 ? ps.getLoadCaster().bytesToFloat(flds[4].getBytes()) : null);
        t.append(flds[5].compareTo("")!=0 ? ps.getLoadCaster().bytesToInteger(flds[5].getBytes()) : null);
        t.append(flds[6].compareTo("")!=0 ? ps.getLoadCaster().bytesToLong(flds[6].getBytes()) : null);
        t.append(flds[7].compareTo("")!=0 ? ps.getLoadCaster().bytesToMap(flds[7].getBytes(), mapfs) : null);
        t.append(flds[8].compareTo("")!=0 ? ps.getLoadCaster().bytesToTuple(flds[8].getBytes(), tuplefs) : null);
        t.append(flds[9].compareTo("")!=0 ? ps.getLoadCaster().bytesToBoolean(flds[9].getBytes()) : null);
        t.append(flds[10].compareTo("")!=0 ? ps.getLoadCaster().bytesToDateTime(flds[10].getBytes()) : null);
        t.append(flds[11].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[10].getBytes()) : null);
        assertEquals(inputTuple, t);
    }
    br.close();
}
 
Example 18
Source File: GenRandomData.java    From spork with Apache License 2.0 4 votes vote down vote up
public static ResourceFieldSchema getSmallBagTextTupleFieldSchema() throws IOException{
    ResourceFieldSchema dbafs = new ResourceFieldSchema();
    dbafs.setType(DataType.BYTEARRAY);
    
    ResourceFieldSchema stringfs = new ResourceFieldSchema();
    stringfs.setType(DataType.CHARARRAY);
    
    ResourceFieldSchema intfs = new ResourceFieldSchema();
    intfs.setType(DataType.INTEGER);
	
    ResourceFieldSchema bagfs = getSmallTupDataBagFieldSchema();
    
    ResourceFieldSchema floatfs = new ResourceFieldSchema();
    floatfs.setType(DataType.FLOAT);
    
    ResourceFieldSchema doublefs = new ResourceFieldSchema();
    doublefs.setType(DataType.DOUBLE);
    
    ResourceFieldSchema longfs = new ResourceFieldSchema();
    longfs.setType(DataType.LONG);
    
    ResourceFieldSchema mapfs = new ResourceFieldSchema();
    mapfs.setType(DataType.MAP);
    
    ResourceFieldSchema tuplefs = getSmallTupleFieldSchema();

    ResourceFieldSchema boolfs = new ResourceFieldSchema();
    boolfs.setType(DataType.BOOLEAN);

    ResourceFieldSchema dtfs = new ResourceFieldSchema();
    dtfs.setType(DataType.DATETIME);
    
    ResourceSchema outSchema = new ResourceSchema();
    outSchema.setFields(new ResourceFieldSchema[]{bagfs, dbafs, stringfs, doublefs, floatfs,
            intfs, longfs, mapfs, tuplefs, boolfs, dtfs});
    ResourceFieldSchema outfs = new ResourceFieldSchema();
    outfs.setSchema(outSchema);
    outfs.setType(DataType.TUPLE);
    
    return outfs;
}
 
Example 19
Source File: SchemaUtil.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static ResourceSchema convertComplex(Type type) throws IOException {
  ResourceSchema result = new ResourceSchema();

  switch (type.typeId()) {
    case STRUCT:
      StructType structType = type.asStructType();

      List<ResourceFieldSchema> fields = Lists.newArrayList();

      for (Types.NestedField f : structType.fields()) {
        fields.add(convert(f));
      }

      result.setFields(fields.toArray(new ResourceFieldSchema[0]));

      return result;
    case LIST:
      ListType listType = type.asListType();

      ResourceFieldSchema [] elementFieldSchemas = new ResourceFieldSchema[]{convert(listType.elementType())};

      if (listType.elementType().isStructType()) {
        result.setFields(elementFieldSchemas);
      } else {
        //Wrap non-struct types in tuples
        ResourceSchema elementSchema = new ResourceSchema();
        elementSchema.setFields(elementFieldSchemas);

        ResourceFieldSchema tupleSchema = new ResourceFieldSchema();
        tupleSchema.setType(DataType.TUPLE);
        tupleSchema.setSchema(elementSchema);

        result.setFields(new ResourceFieldSchema[]{tupleSchema});
      }

      return result;
    case MAP:
      MapType mapType = type.asMapType();

      if (mapType.keyType().typeId() != Type.TypeID.STRING) {
        throw new FrontendException("Unsupported map key type: " + mapType.keyType());
      }
      result.setFields(new ResourceFieldSchema[]{convert(mapType.valueType())});

      return result;
    default:
      throw new FrontendException("Unsupported complex type: " + type);
  }
}
 
Example 20
Source File: SchemaUtil.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static ResourceSchema convertComplex(Type type) throws IOException {
  ResourceSchema result = new ResourceSchema();

  switch (type.typeId()) {
    case STRUCT:
      Types.StructType structType = type.asStructType();

      List<ResourceFieldSchema> fields = Lists.newArrayList();

      for (Types.NestedField f : structType.fields()) {
        fields.add(convert(f));
      }

      result.setFields(fields.toArray(new ResourceFieldSchema[0]));

      return result;
    case LIST:
      Types.ListType listType = type.asListType();

      ResourceFieldSchema [] elementFieldSchemas = new ResourceFieldSchema[]{convert(listType.elementType())};

      if (listType.elementType().isStructType()) {
        result.setFields(elementFieldSchemas);
      } else {
        //Wrap non-struct types in tuples
        ResourceSchema elementSchema = new ResourceSchema();
        elementSchema.setFields(elementFieldSchemas);

        ResourceFieldSchema tupleSchema = new ResourceFieldSchema();
        tupleSchema.setType(DataType.TUPLE);
        tupleSchema.setSchema(elementSchema);

        result.setFields(new ResourceFieldSchema[]{tupleSchema});
      }

      return result;
    case MAP:
      Types.MapType mapType = type.asMapType();

      if (mapType.keyType().typeId() != Type.TypeID.STRING) {
        throw new FrontendException("Unsupported map key type: " + mapType.keyType());
      }
      result.setFields(new ResourceFieldSchema[]{convert(mapType.valueType())});

      return result;
    default:
      throw new FrontendException("Unsupported complex type: " + type);
  }
}