org.apache.pig.impl.logicalLayer.schema.Schema#getFields

Source File: PhoenixHBaseLoaderIT.java From phoenix with Apache License 2.0

6 votes

/**
 * Validates the schema returned when specific columns of a table are given as part of LOAD .
 * @throws Exception
 */
@Test
public void testSchemaForTableWithSpecificColumns() throws Exception {
    
    //create the table
    final String ddl = "CREATE TABLE " + TABLE_FULL_NAME 
            + "  (ID INTEGER NOT NULL PRIMARY KEY,NAME VARCHAR, AGE INTEGER) ";
    conn.createStatement().execute(ddl);
    
    
    final String selectColumns = "ID,NAME";
    pigServer.registerQuery(String.format(
            "A = load 'hbase://table/%s/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');",
            TABLE_FULL_NAME, selectColumns, zkQuorum));
    
    Schema schema = pigServer.dumpSchema("A");
    List<FieldSchema> fields = schema.getFields();
    assertEquals(2, fields.size());
    assertTrue(fields.get(0).alias.equalsIgnoreCase("ID"));
    assertTrue(fields.get(0).type == DataType.INTEGER);
    assertTrue(fields.get(1).alias.equalsIgnoreCase("NAME"));
    assertTrue(fields.get(1).type == DataType.CHARARRAY);
    
}

Source File: PigUtils.java From elasticsearch-hadoop with Apache License 2.0

6 votes

private static void addField(Schema schema, List<String> fields, FieldAlias fa, String currentNode) {
    for (FieldSchema field : schema.getFields()) {
        String node;
        if (field.alias != null) {
            // if no field
            node = fa.toES(field.alias);
            node = (currentNode != null ? currentNode + "." + node : node);
        }
        else {
            node = currentNode;
        }
        // && field.type != DataType.TUPLE
        if (field.schema != null) {
            addField(field.schema, fields, fa, node);
        }
        else {
            if (!StringUtils.hasText(node)) {
                LogFactory.getLog(PigUtils.class).warn("Cannot detect alias for field in schema" + schema);
            }

            if (node != null) {
                fields.add(fa.toES(node));
            }
        }
    }
}

Source File: ZipBags.java From datafu with Apache License 2.0

6 votes

@Override
public Schema outputSchema(Schema input) {
    Schema bagTupleSchema = new Schema();
    Set<String> aliasSet = new HashSet<String>();
    for (FieldSchema schema : input.getFields()) { //Each field should be a bag
        if (schema.schema == null) throw new RuntimeException("Inner bag schemas are null");
        for (FieldSchema innerBagTuple : schema.schema.getFields()) {
            for (FieldSchema tupleField : innerBagTuple.schema.getFields()) {
                if (!aliasSet.add(tupleField.alias)) {
                    throw new RuntimeException("Duplicate field alias specified");
                }
                bagTupleSchema.add(tupleField);
            }
        }
    }
    try {
        return new Schema(new FieldSchema("zipped",bagTupleSchema, DataType.BAG));
    } catch (FrontendException e) {
        throw new RuntimeException(e);
    }
}

Source File: TypeCheckingExpVisitor.java From spork with Apache License 2.0

6 votes

private void insertCastsForUDF(UserFuncExpression func, Schema fromSch, Schema toSch, SchemaType toSchType)
throws FrontendException {
    List<FieldSchema> fsLst = fromSch.getFields();
    List<FieldSchema> tsLst = toSch.getFields();
    List<LogicalExpression> args = func.getArguments();
    int i=-1;
    for (FieldSchema fFSch : fsLst) {
        ++i;
        //if we get to the vararg field (if defined) : take it repeatedly
        FieldSchema tFSch = ((toSchType == SchemaType.VARARG) && i >= tsLst.size()) ? 
                tsLst.get(tsLst.size() - 1) : tsLst.get(i);
        if (fFSch.type == tFSch.type) {
            continue;
        }
        insertCast(func, Util.translateFieldSchema(tFSch), args.get(i));
    }
}

Source File: TOBAG.java From spork with Apache License 2.0

5 votes

@Override
public Schema outputSchema(Schema inputSch) {
    byte type = DataType.ERROR;
    Schema innerSchema = null;
    if(inputSch != null){
        for(FieldSchema fs : inputSch.getFields()){
            if(type == DataType.ERROR){
                type = fs.type;
                innerSchema = fs.schema;
            }else{
                if( type != fs.type || !nullEquals(innerSchema, fs.schema)){
                    // invalidate the type
                    type = DataType.ERROR;
                    break;
                }
            }
        }
    }
    try {
        if(type == DataType.ERROR){
            return Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        }
        FieldSchema innerFs = new Schema.FieldSchema(null, innerSchema, type);
        Schema innerSch = new Schema(innerFs);
        Schema bagSchema = new Schema(new FieldSchema(null, innerSch, DataType.BAG));
        return bagSchema;
    } catch (FrontendException e) {
        //This should not happen
        throw new RuntimeException("Bug : exception thrown while " +
                "creating output schema for TOBAG udf", e);
    }

}

Source File: AliasableEvalFunc.java From datafu with Apache License 2.0

5 votes

private void constructFieldAliases(Map<String, Integer> aliases, Schema tupleSchema, String prefix)
{    
  int position = 0;
  for (Schema.FieldSchema field : tupleSchema.getFields()) {
    String alias = getPrefixedAliasName(prefix, field.alias);
    if (field.alias != null && !field.alias.equals("null")) { 
      aliases.put(alias, position);
      log.debug("In instance: "+getInstanceName()+", stored alias " + alias + " as position " + position);
    }
    if (field.schema != null) {
      constructFieldAliases(aliases, field.schema, alias);
    }      
    position++;
  }
}

Source File: SchemaTupleFactory.java From spork with Apache License 2.0

5 votes

/**
 * This method inspects a Schema to see whether or
 * not a SchemaTuple implementation can be generated
 * for the types present. Currently, bags and maps
 * are not supported.
 * @param   s as Schema
 * @return  boolean type value, true if it is generatable
 */
public static boolean isGeneratable(Schema s) {
    if (s == null || s.size() == 0) {
        return false;
    }

    for (Schema.FieldSchema fs : s.getFields()) {
        if (fs.type == DataType.TUPLE && !isGeneratable(fs.schema)) {
            return false;
        }
    }

    return true;
}

Source File: SchemaTupleFrontend.java From spork with Apache License 2.0

5 votes

private static void stripAliases(Schema s) {
    for (Schema.FieldSchema fs : s.getFields()) {
        fs.alias = null;
        if (fs.schema != null) {
            stripAliases(fs.schema);
        }
    }
}

Source File: TOBAG2.java From spork with Apache License 2.0

5 votes

@Override
public Schema outputSchema(Schema inputSch) {
    byte type = DataType.ERROR;
    Schema innerSchema = null;
    if(inputSch != null){
        for(FieldSchema fs : inputSch.getFields()){
            if(type == DataType.ERROR){
                type = fs.type;
                innerSchema = fs.schema;
            }else{
                if( type != fs.type || !nullEquals(innerSchema, fs.schema)){
                    // invalidate the type
                    type = DataType.ERROR;
                    break;
                }
            }
        }
    }
    try {
        if(type == DataType.ERROR){
            return Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        }
        FieldSchema innerFs = new Schema.FieldSchema(null, innerSchema, type);
        Schema innerSch = new Schema(innerFs);
        Schema bagSchema = new Schema(new FieldSchema(null, innerSch, DataType.BAG));
        return bagSchema;
    } catch (FrontendException e) {
        //This should not happen
        throw new RuntimeException("Bug : exception thrown while " +
                "creating output schema for TOBAG udf", e);
    }

}

Source File: RubySchema.java From spork with Apache License 2.0

5 votes

/**
 * This allows the users to set an index or a range of values to
 * a specified RubySchema. The first argument must be a Fixnum or Range,
 * and the second argument may optionally be a Fixnum. The given index
 * (or range of indices) will be replaced by a RubySchema instantiated
 * based on the remaining arguments.
 *
 * @param context the contextthe method is being executed in
 * @param args    a varargs which has to be at least length two.
 * @return        the RubySchema that was added
 */
@JRubyMethod(name = {"[]=", "set"}, required = 2, rest = true)
public RubySchema set(ThreadContext context, IRubyObject[] args) {
    IRubyObject arg1 = args[0];
    IRubyObject arg2 = args[1];
    IRubyObject[] arg3 = Arrays.copyOfRange(args, 1, args.length);
    Schema s = internalSchema;
    Ruby runtime = context.getRuntime();
    List<Schema.FieldSchema> lfs = s.getFields();
    int min, max;
    if (arg1 instanceof RubyFixnum && arg2 instanceof RubyFixnum) {
        min = (int)((RubyFixnum)arg1).getLongValue();
        max = (int)((RubyFixnum)arg2).getLongValue();
        arg3 = Arrays.copyOfRange(args, 2, args.length);
    } else if (arg1 instanceof RubyFixnum) {
        min = (int)((RubyFixnum)arg1).getLongValue();
        max = min + 1;
    } else if (arg1 instanceof RubyRange) {
        min = (int)((RubyFixnum)((RubyRange)arg1).min(context, Block.NULL_BLOCK)).getLongValue();
        max = (int)((RubyFixnum)((RubyRange)arg1).max(context, Block.NULL_BLOCK)).getLongValue() + 1;
    } else {
        throw new RuntimeException("Bad arguments given to get function: ( " + arg1.toString() + " , " + arg2.toString()+ " )");
    }
    for (int i = min; i < max; i++)
        lfs.remove(min);
    if (arg3 == null || arg3.length == 0)
        throw new RuntimeException("Must have schema argument for []=");
    RubySchema rs = new RubySchema(runtime, runtime.getClass("Schema")).initialize(arg3);
    for (Schema.FieldSchema fs : rs.getInternalSchema().getFields())
        lfs.add(min++, fs);
    RubySchema.fixSchemaNames(internalSchema);
    return rs;
}

Source File: TestResourceSchema.java From spork with Apache License 2.0

5 votes

private boolean CheckTwoLevelAccess(Schema s) {
    if (s == null) return false;
    for (Schema.FieldSchema fs : s.getFields()) {
        if (fs.type == DataType.BAG 
                && fs.schema != null
                && fs.schema.isTwoLevelAccessRequired()) {
            return true;
        }
        if (CheckTwoLevelAccess(fs.schema)) return true;
    }            
    return false;        
}

Source File: ParquetLoader.java From parquet-mr with Apache License 2.0

5 votes

private void convertToElephantBirdCompatibleSchema(Schema schema) {
  if (schema == null) {
    return;
  }
  for(FieldSchema fieldSchema:schema.getFields()){
    if (fieldSchema.type== DataType.BOOLEAN) {
      fieldSchema.type=DataType.INTEGER;
    }
    convertToElephantBirdCompatibleSchema(fieldSchema.schema);
  }
}

Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0

5 votes

private Type[] convertTypes(Schema pigSchema) {
  List<FieldSchema> fields = pigSchema.getFields();
  Type[] types = new Type[fields.size()];
  for (int i = 0; i < types.length; i++) {
    types[i] = convert(fields.get(i), i);
  }
  return types;
}

Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0

5 votes

@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) {
  List<FieldSchema> fields = requestedPigSchema.getFields();
  List<Type> newFields = new ArrayList<Type>();
  for (int i = 0; i < fields.size(); i++) {
    FieldSchema fieldSchema = fields.get(i);
    String name = name(fieldSchema.alias, "field_"+i);
    if (schemaToFilter.containsField(name)) {
      newFields.add(filter(schemaToFilter.getType(name), fieldSchema));
    }
  }
  return newFields;
}

Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0

5 votes

@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList) {
  List<Type> newFields = new ArrayList<Type>();
  List<Pair<FieldSchema,Integer>> indexedFields = new ArrayList<Pair<FieldSchema,Integer>>();

  try {
    if(requiredFieldsList == null) {
      int index = 0;
      for(FieldSchema fs : pigSchema.getFields()) {
        indexedFields.add(new Pair<FieldSchema, Integer>(fs, index++));
      }
    } else {
      for(RequiredField rf : requiredFieldsList.getFields()) {
        indexedFields.add(new Pair<FieldSchema, Integer>(pigSchema.getField(rf.getAlias()), rf.getIndex()));
      }
    }

    for (Pair<FieldSchema, Integer> p : indexedFields) {
      FieldSchema fieldSchema = pigSchema.getField(p.first.alias);
      if (p.second < schemaToFilter.getFieldCount()) {
        Type type = schemaToFilter.getFields().get(p.second);
        newFields.add(filter(type, fieldSchema));
      }
    }
  } catch (FrontendException e) {
      throw new RuntimeException("Failed to filter requested fields", e);
  }
  return newFields;
}

Source File: TestSchemaTuple.java From spork with Apache License 2.0

5 votes

private void checkNullGetThrowsError(SchemaTuple<?> st) throws ExecException {
    Schema schema = st.getSchema();
    int i = 0;
    for (Schema.FieldSchema fs : schema.getFields()) {
        boolean fieldIsNull = false;
        try {
            switch (fs.type) {
            case DataType.BIGDECIMAL: st.getBigDecimal(i); break;
            case DataType.BIGINTEGER: st.getBigInteger(i); break;
            case DataType.BOOLEAN: st.getBoolean(i); break;
            case DataType.BYTEARRAY: st.getBytes(i); break;
            case DataType.CHARARRAY: st.getString(i); break;
            case DataType.INTEGER: st.getInt(i); break;
            case DataType.LONG: st.getLong(i); break;
            case DataType.FLOAT: st.getFloat(i); break;
            case DataType.DOUBLE: st.getDouble(i); break;
            case DataType.DATETIME: st.getDateTime(i); break;
            case DataType.TUPLE: st.getTuple(i); break;
            case DataType.BAG: st.getDataBag(i); break;
            case DataType.MAP: st.getMap(i); break;
            default: throw new RuntimeException("Unsupported FieldSchema in SchemaTuple: " + fs);
            }
        } catch (FieldIsNullException e) {
            fieldIsNull = true;
        }
        assertTrue(fieldIsNull);
        i++;
    }
}

Source File: PhoenixHBaseLoaderIT.java From phoenix with Apache License 2.0

5 votes

/**
 * Validates the schema returned when a SQL SELECT query is given as part of LOAD .
 * @throws Exception
 */
@Test
public void testSchemaForQuery() throws Exception {
    
   //create the table.
    String ddl = String.format("CREATE TABLE " + TABLE_FULL_NAME +
             "  (A_STRING VARCHAR NOT NULL, A_DECIMAL DECIMAL NOT NULL, CF1.A_INTEGER INTEGER, CF2.A_DOUBLE DOUBLE"
            + "  CONSTRAINT pk PRIMARY KEY (A_STRING, A_DECIMAL))\n", TABLE_FULL_NAME);
    conn.createStatement().execute(ddl);
    
    
    
    //sql query for LOAD
    final String sqlQuery = "SELECT A_STRING,CF1.A_INTEGER,CF2.A_DOUBLE FROM " + TABLE_FULL_NAME;
    pigServer.registerQuery(String.format(
            "A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');",
            sqlQuery, zkQuorum));
    
    //assert the schema.
    Schema schema = pigServer.dumpSchema("A");
    List<FieldSchema> fields = schema.getFields();
    assertEquals(3, fields.size());
    assertTrue(fields.get(0).alias.equalsIgnoreCase("a_string"));
    assertTrue(fields.get(0).type == DataType.CHARARRAY);
    assertTrue(fields.get(1).alias.equalsIgnoreCase("a_integer"));
    assertTrue(fields.get(1).type == DataType.INTEGER);
    assertTrue(fields.get(2).alias.equalsIgnoreCase("a_double"));
    assertTrue(fields.get(2).type == DataType.DOUBLE);
}

Source File: WeightedReservoirSample.java From datafu with Apache License 2.0

4 votes

@Override
public Schema outputSchema(Schema input) {
  try {
    Schema.FieldSchema inputFieldSchema = input.getField(0);

    if (inputFieldSchema.type != DataType.BAG) {
      throw new RuntimeException("Expected a BAG as input");
    }
    
    Schema inputBagSchema = inputFieldSchema.schema;
    
    if (inputBagSchema.getField(0).type != DataType.TUPLE)
    {
        throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                               DataType.findTypeName(inputBagSchema.getField(0).type)));
    }
    
    Schema tupleSchema = inputBagSchema.getField(0).schema;
    
    if(tupleSchema == null) {
        throw new RuntimeException("The tuple of input bag has no schema");
    }
    
    List<Schema.FieldSchema> fieldSchemaList = tupleSchema.getFields();
    
    if(fieldSchemaList == null || fieldSchemaList.size() <= Math.max(0, this.weightIdx)) {
        throw new RuntimeException("The field schema of the input tuple is null " +
        		                   "or the tuple size is no more than the weight field index: "
                                   + this.weightIdx);
    }
    
    if(fieldSchemaList.get(this.weightIdx).type != DataType.INTEGER &&
       fieldSchemaList.get(this.weightIdx).type != DataType.LONG &&
       fieldSchemaList.get(this.weightIdx).type != DataType.FLOAT &&
       fieldSchemaList.get(this.weightIdx).type != DataType.DOUBLE)
    {
        String[] expectedTypes = new String[] {DataType.findTypeName(DataType.INTEGER),
                                               DataType.findTypeName(DataType.LONG),
                                               DataType.findTypeName(DataType.FLOAT),
                                               DataType.findTypeName(DataType.DOUBLE)};
        throw new RuntimeException("Expect the type of the weight field of the input tuple to be of (" +
                java.util.Arrays.toString(expectedTypes) + "), but instead found (" + 
                DataType.findTypeName(fieldSchemaList.get(this.weightIdx).type) + "), weight field: " + 
                this.weightIdx);
    } 
    
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input),
                                             inputFieldSchema.schema, DataType.BAG));    
  } catch (FrontendException e) {
    e.printStackTrace();
    throw new RuntimeException(e);
  }
}

Source File: Coalesce.java From datafu with Apache License 2.0

4 votes

@Override
public Schema getOutputSchema(Schema input)
{
  if (input.getFields().size() == 0)
  {
    throw new RuntimeException("Expected at least one parameter");
  }
      
  Byte outputType = null;
  int pos = 0;
  for (FieldSchema field : input.getFields())
  {
    if (DataType.isSchemaType(field.type))
    {
      throw new RuntimeException(String.format("Not supported on schema types.  Found %s in position %d.",DataType.findTypeName(field.type),pos));
    }
    
    if (DataType.isComplex(field.type))
    {
      throw new RuntimeException(String.format("Not supported on complex types.  Found %s in position %d.",DataType.findTypeName(field.type),pos));
    }
    
    if (!DataType.isUsableType(field.type))
    {
      throw new RuntimeException(String.format("Not a usable type.  Found %s in position %d.",DataType.findTypeName(field.type),pos));
    }
    
    if (outputType == null)
    {
      outputType = field.type;
    }
    else if (!outputType.equals(field.type))
    {        
      if (strict)
      {
        throw new RuntimeException(String.format("Expected all types to be equal, but found '%s' in position %d.  First element has type '%s'.  "
                                                 + "If you'd like to attempt merging types, use the '%s' option, as '%s' is the default.",
                                                 DataType.findTypeName(field.type),pos,DataType.findTypeName((byte)outputType),LAZY_OPTION,STRICT_OPTION));
      }
      else
      {
        byte merged = DataType.mergeType(outputType, field.type);
        if (merged == DataType.ERROR)
        {
          throw new RuntimeException(String.format("Expected all types to be equal, but found '%s' in position %d, where output type is '%s', and types could not be merged.",
                                                   DataType.findTypeName(field.type),pos,DataType.findTypeName((byte)outputType)));
        }
        outputType = merged;
      }
    }
    
    pos++;
  }
  
  getInstanceProperties().put("type", outputType);
      
  return new Schema(new Schema.FieldSchema("item",outputType));
}

Source File: TestProjectRange.java From spork with Apache License 2.0

4 votes

private void setAliasesToNull(Schema schema) {
   for(FieldSchema fs : schema.getFields()){
       fs.alias = null;
   }
}

Java Code Examples for org.apache.pig.impl.logicalLayer.schema.Schema#getFields()