Java Code Examples for org.apache.pig.impl.logicalLayer.schema.Schema#getFields()
The following examples show how to use
org.apache.pig.impl.logicalLayer.schema.Schema#getFields() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PhoenixHBaseLoaderIT.java From phoenix with Apache License 2.0 | 6 votes |
/** * Validates the schema returned when specific columns of a table are given as part of LOAD . * @throws Exception */ @Test public void testSchemaForTableWithSpecificColumns() throws Exception { //create the table final String ddl = "CREATE TABLE " + TABLE_FULL_NAME + " (ID INTEGER NOT NULL PRIMARY KEY,NAME VARCHAR, AGE INTEGER) "; conn.createStatement().execute(ddl); final String selectColumns = "ID,NAME"; pigServer.registerQuery(String.format( "A = load 'hbase://table/%s/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE_FULL_NAME, selectColumns, zkQuorum)); Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(2, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("ID")); assertTrue(fields.get(0).type == DataType.INTEGER); assertTrue(fields.get(1).alias.equalsIgnoreCase("NAME")); assertTrue(fields.get(1).type == DataType.CHARARRAY); }
Example 2
Source File: PigUtils.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
private static void addField(Schema schema, List<String> fields, FieldAlias fa, String currentNode) { for (FieldSchema field : schema.getFields()) { String node; if (field.alias != null) { // if no field node = fa.toES(field.alias); node = (currentNode != null ? currentNode + "." + node : node); } else { node = currentNode; } // && field.type != DataType.TUPLE if (field.schema != null) { addField(field.schema, fields, fa, node); } else { if (!StringUtils.hasText(node)) { LogFactory.getLog(PigUtils.class).warn("Cannot detect alias for field in schema" + schema); } if (node != null) { fields.add(fa.toES(node)); } } } }
Example 3
Source File: ZipBags.java From datafu with Apache License 2.0 | 6 votes |
@Override public Schema outputSchema(Schema input) { Schema bagTupleSchema = new Schema(); Set<String> aliasSet = new HashSet<String>(); for (FieldSchema schema : input.getFields()) { //Each field should be a bag if (schema.schema == null) throw new RuntimeException("Inner bag schemas are null"); for (FieldSchema innerBagTuple : schema.schema.getFields()) { for (FieldSchema tupleField : innerBagTuple.schema.getFields()) { if (!aliasSet.add(tupleField.alias)) { throw new RuntimeException("Duplicate field alias specified"); } bagTupleSchema.add(tupleField); } } } try { return new Schema(new FieldSchema("zipped",bagTupleSchema, DataType.BAG)); } catch (FrontendException e) { throw new RuntimeException(e); } }
Example 4
Source File: TypeCheckingExpVisitor.java From spork with Apache License 2.0 | 6 votes |
private void insertCastsForUDF(UserFuncExpression func, Schema fromSch, Schema toSch, SchemaType toSchType) throws FrontendException { List<FieldSchema> fsLst = fromSch.getFields(); List<FieldSchema> tsLst = toSch.getFields(); List<LogicalExpression> args = func.getArguments(); int i=-1; for (FieldSchema fFSch : fsLst) { ++i; //if we get to the vararg field (if defined) : take it repeatedly FieldSchema tFSch = ((toSchType == SchemaType.VARARG) && i >= tsLst.size()) ? tsLst.get(tsLst.size() - 1) : tsLst.get(i); if (fFSch.type == tFSch.type) { continue; } insertCast(func, Util.translateFieldSchema(tFSch), args.get(i)); } }
Example 5
Source File: TOBAG.java From spork with Apache License 2.0 | 5 votes |
@Override public Schema outputSchema(Schema inputSch) { byte type = DataType.ERROR; Schema innerSchema = null; if(inputSch != null){ for(FieldSchema fs : inputSch.getFields()){ if(type == DataType.ERROR){ type = fs.type; innerSchema = fs.schema; }else{ if( type != fs.type || !nullEquals(innerSchema, fs.schema)){ // invalidate the type type = DataType.ERROR; break; } } } } try { if(type == DataType.ERROR){ return Schema.generateNestedSchema(DataType.BAG, DataType.NULL); } FieldSchema innerFs = new Schema.FieldSchema(null, innerSchema, type); Schema innerSch = new Schema(innerFs); Schema bagSchema = new Schema(new FieldSchema(null, innerSch, DataType.BAG)); return bagSchema; } catch (FrontendException e) { //This should not happen throw new RuntimeException("Bug : exception thrown while " + "creating output schema for TOBAG udf", e); } }
Example 6
Source File: AliasableEvalFunc.java From datafu with Apache License 2.0 | 5 votes |
private void constructFieldAliases(Map<String, Integer> aliases, Schema tupleSchema, String prefix) { int position = 0; for (Schema.FieldSchema field : tupleSchema.getFields()) { String alias = getPrefixedAliasName(prefix, field.alias); if (field.alias != null && !field.alias.equals("null")) { aliases.put(alias, position); log.debug("In instance: "+getInstanceName()+", stored alias " + alias + " as position " + position); } if (field.schema != null) { constructFieldAliases(aliases, field.schema, alias); } position++; } }
Example 7
Source File: SchemaTupleFactory.java From spork with Apache License 2.0 | 5 votes |
/** * This method inspects a Schema to see whether or * not a SchemaTuple implementation can be generated * for the types present. Currently, bags and maps * are not supported. * @param s as Schema * @return boolean type value, true if it is generatable */ public static boolean isGeneratable(Schema s) { if (s == null || s.size() == 0) { return false; } for (Schema.FieldSchema fs : s.getFields()) { if (fs.type == DataType.TUPLE && !isGeneratable(fs.schema)) { return false; } } return true; }
Example 8
Source File: SchemaTupleFrontend.java From spork with Apache License 2.0 | 5 votes |
private static void stripAliases(Schema s) { for (Schema.FieldSchema fs : s.getFields()) { fs.alias = null; if (fs.schema != null) { stripAliases(fs.schema); } } }
Example 9
Source File: TOBAG2.java From spork with Apache License 2.0 | 5 votes |
@Override public Schema outputSchema(Schema inputSch) { byte type = DataType.ERROR; Schema innerSchema = null; if(inputSch != null){ for(FieldSchema fs : inputSch.getFields()){ if(type == DataType.ERROR){ type = fs.type; innerSchema = fs.schema; }else{ if( type != fs.type || !nullEquals(innerSchema, fs.schema)){ // invalidate the type type = DataType.ERROR; break; } } } } try { if(type == DataType.ERROR){ return Schema.generateNestedSchema(DataType.BAG, DataType.NULL); } FieldSchema innerFs = new Schema.FieldSchema(null, innerSchema, type); Schema innerSch = new Schema(innerFs); Schema bagSchema = new Schema(new FieldSchema(null, innerSch, DataType.BAG)); return bagSchema; } catch (FrontendException e) { //This should not happen throw new RuntimeException("Bug : exception thrown while " + "creating output schema for TOBAG udf", e); } }
Example 10
Source File: RubySchema.java From spork with Apache License 2.0 | 5 votes |
/** * This allows the users to set an index or a range of values to * a specified RubySchema. The first argument must be a Fixnum or Range, * and the second argument may optionally be a Fixnum. The given index * (or range of indices) will be replaced by a RubySchema instantiated * based on the remaining arguments. * * @param context the contextthe method is being executed in * @param args a varargs which has to be at least length two. * @return the RubySchema that was added */ @JRubyMethod(name = {"[]=", "set"}, required = 2, rest = true) public RubySchema set(ThreadContext context, IRubyObject[] args) { IRubyObject arg1 = args[0]; IRubyObject arg2 = args[1]; IRubyObject[] arg3 = Arrays.copyOfRange(args, 1, args.length); Schema s = internalSchema; Ruby runtime = context.getRuntime(); List<Schema.FieldSchema> lfs = s.getFields(); int min, max; if (arg1 instanceof RubyFixnum && arg2 instanceof RubyFixnum) { min = (int)((RubyFixnum)arg1).getLongValue(); max = (int)((RubyFixnum)arg2).getLongValue(); arg3 = Arrays.copyOfRange(args, 2, args.length); } else if (arg1 instanceof RubyFixnum) { min = (int)((RubyFixnum)arg1).getLongValue(); max = min + 1; } else if (arg1 instanceof RubyRange) { min = (int)((RubyFixnum)((RubyRange)arg1).min(context, Block.NULL_BLOCK)).getLongValue(); max = (int)((RubyFixnum)((RubyRange)arg1).max(context, Block.NULL_BLOCK)).getLongValue() + 1; } else { throw new RuntimeException("Bad arguments given to get function: ( " + arg1.toString() + " , " + arg2.toString()+ " )"); } for (int i = min; i < max; i++) lfs.remove(min); if (arg3 == null || arg3.length == 0) throw new RuntimeException("Must have schema argument for []="); RubySchema rs = new RubySchema(runtime, runtime.getClass("Schema")).initialize(arg3); for (Schema.FieldSchema fs : rs.getInternalSchema().getFields()) lfs.add(min++, fs); RubySchema.fixSchemaNames(internalSchema); return rs; }
Example 11
Source File: TestResourceSchema.java From spork with Apache License 2.0 | 5 votes |
private boolean CheckTwoLevelAccess(Schema s) { if (s == null) return false; for (Schema.FieldSchema fs : s.getFields()) { if (fs.type == DataType.BAG && fs.schema != null && fs.schema.isTwoLevelAccessRequired()) { return true; } if (CheckTwoLevelAccess(fs.schema)) return true; } return false; }
Example 12
Source File: ParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
private void convertToElephantBirdCompatibleSchema(Schema schema) { if (schema == null) { return; } for(FieldSchema fieldSchema:schema.getFields()){ if (fieldSchema.type== DataType.BOOLEAN) { fieldSchema.type=DataType.INTEGER; } convertToElephantBirdCompatibleSchema(fieldSchema.schema); } }
Example 13
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private Type[] convertTypes(Schema pigSchema) { List<FieldSchema> fields = pigSchema.getFields(); Type[] types = new Type[fields.size()]; for (int i = 0; i < types.length; i++) { types[i] = convert(fields.get(i), i); } return types; }
Example 14
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) { List<FieldSchema> fields = requestedPigSchema.getFields(); List<Type> newFields = new ArrayList<Type>(); for (int i = 0; i < fields.size(); i++) { FieldSchema fieldSchema = fields.get(i); String name = name(fieldSchema.alias, "field_"+i); if (schemaToFilter.containsField(name)) { newFields.add(filter(schemaToFilter.getType(name), fieldSchema)); } } return newFields; }
Example 15
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList) { List<Type> newFields = new ArrayList<Type>(); List<Pair<FieldSchema,Integer>> indexedFields = new ArrayList<Pair<FieldSchema,Integer>>(); try { if(requiredFieldsList == null) { int index = 0; for(FieldSchema fs : pigSchema.getFields()) { indexedFields.add(new Pair<FieldSchema, Integer>(fs, index++)); } } else { for(RequiredField rf : requiredFieldsList.getFields()) { indexedFields.add(new Pair<FieldSchema, Integer>(pigSchema.getField(rf.getAlias()), rf.getIndex())); } } for (Pair<FieldSchema, Integer> p : indexedFields) { FieldSchema fieldSchema = pigSchema.getField(p.first.alias); if (p.second < schemaToFilter.getFieldCount()) { Type type = schemaToFilter.getFields().get(p.second); newFields.add(filter(type, fieldSchema)); } } } catch (FrontendException e) { throw new RuntimeException("Failed to filter requested fields", e); } return newFields; }
Example 16
Source File: TestSchemaTuple.java From spork with Apache License 2.0 | 5 votes |
private void checkNullGetThrowsError(SchemaTuple<?> st) throws ExecException { Schema schema = st.getSchema(); int i = 0; for (Schema.FieldSchema fs : schema.getFields()) { boolean fieldIsNull = false; try { switch (fs.type) { case DataType.BIGDECIMAL: st.getBigDecimal(i); break; case DataType.BIGINTEGER: st.getBigInteger(i); break; case DataType.BOOLEAN: st.getBoolean(i); break; case DataType.BYTEARRAY: st.getBytes(i); break; case DataType.CHARARRAY: st.getString(i); break; case DataType.INTEGER: st.getInt(i); break; case DataType.LONG: st.getLong(i); break; case DataType.FLOAT: st.getFloat(i); break; case DataType.DOUBLE: st.getDouble(i); break; case DataType.DATETIME: st.getDateTime(i); break; case DataType.TUPLE: st.getTuple(i); break; case DataType.BAG: st.getDataBag(i); break; case DataType.MAP: st.getMap(i); break; default: throw new RuntimeException("Unsupported FieldSchema in SchemaTuple: " + fs); } } catch (FieldIsNullException e) { fieldIsNull = true; } assertTrue(fieldIsNull); i++; } }
Example 17
Source File: PhoenixHBaseLoaderIT.java From phoenix with Apache License 2.0 | 5 votes |
/** * Validates the schema returned when a SQL SELECT query is given as part of LOAD . * @throws Exception */ @Test public void testSchemaForQuery() throws Exception { //create the table. String ddl = String.format("CREATE TABLE " + TABLE_FULL_NAME + " (A_STRING VARCHAR NOT NULL, A_DECIMAL DECIMAL NOT NULL, CF1.A_INTEGER INTEGER, CF2.A_DOUBLE DOUBLE" + " CONSTRAINT pk PRIMARY KEY (A_STRING, A_DECIMAL))\n", TABLE_FULL_NAME); conn.createStatement().execute(ddl); //sql query for LOAD final String sqlQuery = "SELECT A_STRING,CF1.A_INTEGER,CF2.A_DOUBLE FROM " + TABLE_FULL_NAME; pigServer.registerQuery(String.format( "A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", sqlQuery, zkQuorum)); //assert the schema. Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(3, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("a_string")); assertTrue(fields.get(0).type == DataType.CHARARRAY); assertTrue(fields.get(1).alias.equalsIgnoreCase("a_integer")); assertTrue(fields.get(1).type == DataType.INTEGER); assertTrue(fields.get(2).alias.equalsIgnoreCase("a_double")); assertTrue(fields.get(2).type == DataType.DOUBLE); }
Example 18
Source File: WeightedReservoirSample.java From datafu with Apache License 2.0 | 4 votes |
@Override public Schema outputSchema(Schema input) { try { Schema.FieldSchema inputFieldSchema = input.getField(0); if (inputFieldSchema.type != DataType.BAG) { throw new RuntimeException("Expected a BAG as input"); } Schema inputBagSchema = inputFieldSchema.schema; if (inputBagSchema.getField(0).type != DataType.TUPLE) { throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s", DataType.findTypeName(inputBagSchema.getField(0).type))); } Schema tupleSchema = inputBagSchema.getField(0).schema; if(tupleSchema == null) { throw new RuntimeException("The tuple of input bag has no schema"); } List<Schema.FieldSchema> fieldSchemaList = tupleSchema.getFields(); if(fieldSchemaList == null || fieldSchemaList.size() <= Math.max(0, this.weightIdx)) { throw new RuntimeException("The field schema of the input tuple is null " + "or the tuple size is no more than the weight field index: " + this.weightIdx); } if(fieldSchemaList.get(this.weightIdx).type != DataType.INTEGER && fieldSchemaList.get(this.weightIdx).type != DataType.LONG && fieldSchemaList.get(this.weightIdx).type != DataType.FLOAT && fieldSchemaList.get(this.weightIdx).type != DataType.DOUBLE) { String[] expectedTypes = new String[] {DataType.findTypeName(DataType.INTEGER), DataType.findTypeName(DataType.LONG), DataType.findTypeName(DataType.FLOAT), DataType.findTypeName(DataType.DOUBLE)}; throw new RuntimeException("Expect the type of the weight field of the input tuple to be of (" + java.util.Arrays.toString(expectedTypes) + "), but instead found (" + DataType.findTypeName(fieldSchemaList.get(this.weightIdx).type) + "), weight field: " + this.weightIdx); } return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), inputFieldSchema.schema, DataType.BAG)); } catch (FrontendException e) { e.printStackTrace(); throw new RuntimeException(e); } }
Example 19
Source File: Coalesce.java From datafu with Apache License 2.0 | 4 votes |
@Override public Schema getOutputSchema(Schema input) { if (input.getFields().size() == 0) { throw new RuntimeException("Expected at least one parameter"); } Byte outputType = null; int pos = 0; for (FieldSchema field : input.getFields()) { if (DataType.isSchemaType(field.type)) { throw new RuntimeException(String.format("Not supported on schema types. Found %s in position %d.",DataType.findTypeName(field.type),pos)); } if (DataType.isComplex(field.type)) { throw new RuntimeException(String.format("Not supported on complex types. Found %s in position %d.",DataType.findTypeName(field.type),pos)); } if (!DataType.isUsableType(field.type)) { throw new RuntimeException(String.format("Not a usable type. Found %s in position %d.",DataType.findTypeName(field.type),pos)); } if (outputType == null) { outputType = field.type; } else if (!outputType.equals(field.type)) { if (strict) { throw new RuntimeException(String.format("Expected all types to be equal, but found '%s' in position %d. First element has type '%s'. " + "If you'd like to attempt merging types, use the '%s' option, as '%s' is the default.", DataType.findTypeName(field.type),pos,DataType.findTypeName((byte)outputType),LAZY_OPTION,STRICT_OPTION)); } else { byte merged = DataType.mergeType(outputType, field.type); if (merged == DataType.ERROR) { throw new RuntimeException(String.format("Expected all types to be equal, but found '%s' in position %d, where output type is '%s', and types could not be merged.", DataType.findTypeName(field.type),pos,DataType.findTypeName((byte)outputType))); } outputType = merged; } } pos++; } getInstanceProperties().put("type", outputType); return new Schema(new Schema.FieldSchema("item",outputType)); }
Example 20
Source File: TestProjectRange.java From spork with Apache License 2.0 | 4 votes |
private void setAliasesToNull(Schema schema) { for(FieldSchema fs : schema.getFields()){ fs.alias = null; } }