org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema Java Examples
The following examples show how to use
org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetLoader.java From parquet-mr with Apache License 2.0 | 6 votes |
private Schema getSchemaFromRequiredFieldList(Schema schema, List<RequiredField> fieldList) throws FrontendException { Schema s = new Schema(); for (RequiredField rf : fieldList) { FieldSchema f; try { f = schema.getField(rf.getAlias()).clone(); } catch (CloneNotSupportedException e) { throw new FrontendException("Clone not supported for the fieldschema", e); } if (rf.getSubFields() == null) { s.add(f); } else { Schema innerSchema = getSchemaFromRequiredFieldList(f.schema, rf.getSubFields()); if (innerSchema == null) { return null; } else { f.schema = innerSchema; s.add(f); } } } return s; }
Example #2
Source File: PhoenixHBaseLoaderIT.java From phoenix with Apache License 2.0 | 6 votes |
/** * Validates the schema returned when specific columns of a table are given as part of LOAD . * @throws Exception */ @Test public void testSchemaForTableWithSpecificColumns() throws Exception { //create the table final String ddl = "CREATE TABLE " + TABLE_FULL_NAME + " (ID INTEGER NOT NULL PRIMARY KEY,NAME VARCHAR, AGE INTEGER) "; conn.createStatement().execute(ddl); final String selectColumns = "ID,NAME"; pigServer.registerQuery(String.format( "A = load 'hbase://table/%s/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE_FULL_NAME, selectColumns, zkQuorum)); Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(2, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("ID")); assertTrue(fields.get(0).type == DataType.INTEGER); assertTrue(fields.get(1).alias.equalsIgnoreCase("NAME")); assertTrue(fields.get(1).type == DataType.CHARARRAY); }
Example #3
Source File: PigStreamingUDF.java From spork with Apache License 2.0 | 6 votes |
private Tuple deserializeTuple(FieldSchema fs, byte[] buf, int startIndex, int endIndex) throws IOException { Schema tupleSchema = fs.schema; ArrayList<Object> protoTuple = new ArrayList<Object>(tupleSchema.size()); int depth = 0; int fieldNum = 0; int fieldStart = startIndex; for (int index = startIndex; index <= endIndex; index++) { depth = DELIMS.updateDepth(buf, depth, index); if (StreamingDelimiters.isDelimiter(DELIMS.getFieldDelim(), buf, index, depth, endIndex)) { protoTuple.add(deserialize(tupleSchema.getField(fieldNum), buf, fieldStart, index - 1)); fieldStart = index + 3; fieldNum++; } } return tupleFactory.newTupleNoCopy(protoTuple); }
Example #4
Source File: PhoenixHBaseLoaderIT.java From phoenix with Apache License 2.0 | 6 votes |
/** * Validates the schema returned for a table with Pig data types. * @throws Exception */ @Test public void testSchemaForTable() throws Exception { final String ddl = String.format("CREATE TABLE %s " + " (a_string varchar not null, a_binary varbinary not null, a_integer integer, cf1.a_float float" + " CONSTRAINT pk PRIMARY KEY (a_string, a_binary))\n", TABLE_FULL_NAME); conn.createStatement().execute(ddl); pigServer.registerQuery(String.format( "A = load 'hbase://table/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE_FULL_NAME, zkQuorum)); final Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(4, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("a_string")); assertTrue(fields.get(0).type == DataType.CHARARRAY); assertTrue(fields.get(1).alias.equalsIgnoreCase("a_binary")); assertTrue(fields.get(1).type == DataType.BYTEARRAY); assertTrue(fields.get(2).alias.equalsIgnoreCase("a_integer")); assertTrue(fields.get(2).type == DataType.INTEGER); assertTrue(fields.get(3).alias.equalsIgnoreCase("a_float")); assertTrue(fields.get(3).type == DataType.FLOAT); }
Example #5
Source File: SchemaUtils.java From Cubert with Apache License 2.0 | 6 votes |
public static ColumnType coltypeFromFieldSchema(String colName, FieldSchema colSchema) { ColumnType t = new ColumnType(); t.setName(colName); t.setType(convertoRCFTypeName(DataType.findTypeName(colSchema.type))); if (colSchema.schema != null) { try { t.setColumnSchema(convertToBlockSchema(colSchema.schema)); } catch (FrontendException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return t; }
Example #6
Source File: SchemaUtils.java From Cubert with Apache License 2.0 | 6 votes |
public static Schema convertFromBlockSchema(BlockSchema blockSchema) throws FrontendException { List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>(); for (int i = 0; i < blockSchema.getNumColumns(); i++) { ColumnType ctype = blockSchema.getColumnType(i); byte pigtype = convertToPigType(ctype.getType().toString()); if (ctype.getColumnSchema() != null) { Schema nestedSchema = convertFromBlockSchema(ctype.getColumnSchema()); fieldSchemas.add(new FieldSchema(ctype.getName(), nestedSchema, pigtype)); } else fieldSchemas.add(new FieldSchema(ctype.getName(), pigtype)); } return new Schema(fieldSchemas); }
Example #7
Source File: ScorePMML_AuditTest.java From Surus with Apache License 2.0 | 6 votes |
private Schema buildAuditInputSchema() throws FrontendException { // Build Field Schema List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>(); fieldSchemas.add(new Schema.FieldSchema("id" , DataType.LONG)); fieldSchemas.add(new Schema.FieldSchema("age" , DataType.INTEGER)); fieldSchemas.add(new Schema.FieldSchema("employment" , DataType.CHARARRAY)); fieldSchemas.add(new Schema.FieldSchema("education" , DataType.CHARARRAY)); fieldSchemas.add(new Schema.FieldSchema("marital" , DataType.CHARARRAY)); fieldSchemas.add(new Schema.FieldSchema("occupation" , DataType.CHARARRAY)); fieldSchemas.add(new Schema.FieldSchema("income" , DataType.DOUBLE)); fieldSchemas.add(new Schema.FieldSchema("gender" , DataType.CHARARRAY)); fieldSchemas.add(new Schema.FieldSchema("deductions" , DataType.DOUBLE)); fieldSchemas.add(new Schema.FieldSchema("hours" , DataType.INTEGER)); fieldSchemas.add(new Schema.FieldSchema("ignore_accounts", DataType.CHARARRAY)); fieldSchemas.add(new Schema.FieldSchema("risk_adjustment", DataType.INTEGER)); fieldSchemas.add(new Schema.FieldSchema("target_adjusted", DataType.INTEGER)); return new Schema(fieldSchemas); }
Example #8
Source File: PigUtils.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
private static void addField(Schema schema, List<String> fields, FieldAlias fa, String currentNode) { for (FieldSchema field : schema.getFields()) { String node; if (field.alias != null) { // if no field node = fa.toES(field.alias); node = (currentNode != null ? currentNode + "." + node : node); } else { node = currentNode; } // && field.type != DataType.TUPLE if (field.schema != null) { addField(field.schema, fields, fa, node); } else { if (!StringUtils.hasText(node)) { LogFactory.getLog(PigUtils.class).warn("Cannot detect alias for field in schema" + schema); } if (node != null) { fields.add(fa.toES(node)); } } } }
Example #9
Source File: TestHiveColumnarStorage.java From spork with Apache License 2.0 | 6 votes |
private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff, String schema) throws SerDeException { Pattern pcols = Pattern.compile("[a-zA-Z_0-9]*[ ]"); List<String> types = HiveRCSchemaUtil.parseSchemaTypes(schema); List<String> cols = HiveRCSchemaUtil.parseSchema(pcols, schema); List<FieldSchema> fieldSchemaList = new ArrayList<FieldSchema>( cols.size()); for (int i = 0; i < cols.size(); i++) { fieldSchemaList.add(new FieldSchema(cols.get(i), HiveRCSchemaUtil .findPigDataType(types.get(i)))); } Properties props = new Properties(); props.setProperty(Constants.LIST_COLUMNS, HiveRCSchemaUtil.listToString(cols)); props.setProperty(Constants.LIST_COLUMN_TYPES, HiveRCSchemaUtil.listToString(types)); Configuration hiveConf = new HiveConf(conf, SessionState.class); ColumnarSerDe serde = new ColumnarSerDe(); serde.initialize(hiveConf, props); return (ColumnarStruct) serde.deserialize(buff); }
Example #10
Source File: CountDistinctUpTo.java From datafu with Apache License 2.0 | 6 votes |
@Override public Schema outputSchema(Schema input) { if (input.size() != 1) { throw new RuntimeException("Expected a single field of type bag, but found " + input.size() + " fields"); } FieldSchema field; try { field = input.getField(0); if (field.type != DataType.BAG) { throw new RuntimeException("Expected a bag but got: " + DataType.findTypeName(field.type)); } } catch (FrontendException e) { throw new RuntimeException(e); } return new Schema(new FieldSchema("CountDistinctUpTo", DataType.INTEGER)); }
Example #11
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
private Schema convertFields(List<Type> parquetFields) { List<FieldSchema> fields = new ArrayList<Schema.FieldSchema>(); for (Type parquetType : parquetFields) { try{ FieldSchema innerfieldSchema = getFieldSchema(parquetType); if (parquetType.isRepetition(Repetition.REPEATED)) { Schema bagSchema = new Schema(Arrays.asList(innerfieldSchema)); fields.add(new FieldSchema(null, bagSchema, DataType.BAG)); } else { fields.add(innerfieldSchema); } } catch (FrontendException fe) { throw new SchemaConversionException("can't convert "+ parquetType, fe); } } return new Schema(fields); }
Example #12
Source File: TypeCheckingExpVisitor.java From spork with Apache License 2.0 | 6 votes |
private void insertCastsForUDF(UserFuncExpression func, Schema fromSch, Schema toSch, SchemaType toSchType) throws FrontendException { List<FieldSchema> fsLst = fromSch.getFields(); List<FieldSchema> tsLst = toSch.getFields(); List<LogicalExpression> args = func.getArguments(); int i=-1; for (FieldSchema fFSch : fsLst) { ++i; //if we get to the vararg field (if defined) : take it repeatedly FieldSchema tFSch = ((toSchType == SchemaType.VARARG) && i >= tsLst.size()) ? tsLst.get(tsLst.size() - 1) : tsLst.get(i); if (fFSch.type == tFSch.type) { continue; } insertCast(func, Util.translateFieldSchema(tFSch), args.get(i)); } }
Example #13
Source File: Quantile.java From datafu with Apache License 2.0 | 6 votes |
@Override public Schema outputSchema(Schema input) { Schema tupleSchema = new Schema(); if (ordinalOutputSchema) { for (int i = 0; i < this.quantiles.size(); i++) { tupleSchema.add(new Schema.FieldSchema("quantile_" + i, DataType.DOUBLE)); } } else { for (Double x : this.quantiles) tupleSchema.add(new Schema.FieldSchema("quantile_" + x.toString().replace(".", "_"), DataType.DOUBLE)); } try { return new Schema(new FieldSchema(null, tupleSchema, DataType.TUPLE)); } catch(FrontendException e) { throw new RuntimeException(e); } }
Example #14
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * * @param alias * @param fieldSchema * @return an optional group containing one repeated group field (key, value) * @throws FrontendException */ private GroupType convertMap(String alias, FieldSchema fieldSchema) { Schema innerSchema = fieldSchema.schema; if (innerSchema == null || innerSchema.size() != 1) { throw new SchemaConversionException("Invalid map Schema, schema should contain exactly one field: " + fieldSchema); } FieldSchema innerField = null; try { innerField = innerSchema.getField(0); } catch (FrontendException fe) { throw new SchemaConversionException("Invalid map schema, cannot infer innerschema: ", fe); } Type convertedValue = convertWithName(innerField, "value"); return ConversionPatterns.stringKeyMapType(Repetition.OPTIONAL, alias, name(innerField.alias, "map"), convertedValue); }
Example #15
Source File: TypeCheckingExpVisitor.java From spork with Apache License 2.0 | 6 votes |
private static void convertEmptyTupleToBytearrayTuple( FieldSchema fs) { if(fs.type == DataType.TUPLE && fs.schema != null && fs.schema.size() == 0){ fs.schema.add(new FieldSchema(null, DataType.BYTEARRAY)); return; } if(fs.schema != null){ for(FieldSchema inFs : fs.schema.getFields()){ convertEmptyTupleToBytearrayTuple(inFs); } } }
Example #16
Source File: MarkovPairs.java From datafu with Apache License 2.0 | 6 votes |
@Override public Schema outputSchema(Schema input) { try { Schema tupleSchema = new Schema(); FieldSchema fieldSchema = input.getField(0); if (fieldSchema.type != DataType.BAG) { throw new RuntimeException(String.format("Expected input schema to be BAG, but instead found %s", DataType.findTypeName(fieldSchema.type))); } FieldSchema fieldSchema2 = fieldSchema.schema.getField(0); tupleSchema.add(new Schema.FieldSchema("elem1", fieldSchema2.schema)); tupleSchema.add(new Schema.FieldSchema("elem2", fieldSchema2.schema)); return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), tupleSchema, DataType.BAG)); } catch (Exception e) { return null; } }
Example #17
Source File: TypeCheckingExpVisitor.java From spork with Apache License 2.0 | 6 votes |
/** * Gets the positions in the schema which are byte arrays * @param func * * @param s - * input schema * @throws VisitorException */ private List<Integer> getByteArrayPositions(UserFuncExpression func, Schema s) throws VisitorException { List<Integer> result = new ArrayList<Integer>(); for (int i = 0; i < s.size(); i++) { try { FieldSchema fs = s.getField(i); if (fs.type == DataType.BYTEARRAY) { result.add(i); } } catch (FrontendException fee) { int errCode = 1043; String msg = "Unable to retrieve field schema."; throw new TypeCheckerException(func, msg, errCode, PigException.INPUT, fee); } } return result; }
Example #18
Source File: TypeCheckingExpVisitor.java From spork with Apache License 2.0 | 6 votes |
/** * Checks to see if any field of the input schema is a byte array * @param func * @param s - input schema * @return true if found else false * @throws VisitorException */ private boolean byteArrayFound(UserFuncExpression func, Schema s) throws VisitorException { for(int i=0;i<s.size();i++){ try { FieldSchema fs=s.getField(i); if(fs == null) return false; if(fs.type==DataType.BYTEARRAY){ return true; } } catch (FrontendException fee) { int errCode = 1043; String msg = "Unable to retrieve field schema."; throw new TypeCheckerException(func, msg, errCode, PigException.INPUT, fee); } } return false; }
Example #19
Source File: ParquetLoader.java From parquet-mr with Apache License 2.0 | 6 votes |
@Override public List<String> getPredicateFields(String s, Job job) throws IOException { if(!job.getConfiguration().getBoolean(ENABLE_PREDICATE_FILTER_PUSHDOWN, DEFAULT_PREDICATE_PUSHDOWN_ENABLED)) { return null; } List<String> fields = new ArrayList<String>(); for(FieldSchema field : schema.getFields()) { switch(field.type) { case DataType.BOOLEAN: case DataType.INTEGER: case DataType.LONG: case DataType.FLOAT: case DataType.DOUBLE: case DataType.CHARARRAY: fields.add(field.alias); break; default: // Skip BYTEARRAY, TUPLE, MAP, BAG, DATETIME, BIGINTEGER, BIGDECIMAL break; } } return fields; }
Example #20
Source File: TestTypeCheckingValidatorNewLP.java From spork with Apache License 2.0 | 6 votes |
@Override public Schema outputSchema(Schema input) { Schema.FieldSchema charFs = new FieldSchema(null, DataType.CHARARRAY); Schema.FieldSchema intFs = new FieldSchema(null, DataType.INTEGER); Schema.FieldSchema floatFs = new FieldSchema(null, DataType.FLOAT); Schema bagSchema = new Schema(); bagSchema.add(charFs); bagSchema.add(intFs); bagSchema.add(floatFs); Schema.FieldSchema bagFs; try { bagFs = new Schema.FieldSchema(null, bagSchema, DataType.BAG); } catch (FrontendException fee) { return null; } return new Schema(bagFs); }
Example #21
Source File: TestPigStreamingUDF.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDeserialize__emptyString() throws IOException { byte[] input = "|_".getBytes(); FieldSchema schema = new FieldSchema("", DataType.CHARARRAY); PigStreamingUDF sp = new PigStreamingUDF(schema); Object out = sp.deserialize(input, 0, input.length); Assert.assertEquals(tf.newTuple(""), out); }
Example #22
Source File: TupleDiff.java From datafu with Apache License 2.0 | 5 votes |
private FieldSchema getFieldSchema(FieldSchema fieldSchema, int fieldNum) throws ExecException, FrontendException { if (fieldSchema == null) { return null; } Schema schema = fieldSchema.schema; return schema.size() < (fieldNum + 1) ? null : schema.getField(fieldNum); }
Example #23
Source File: ParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
private void convertToElephantBirdCompatibleSchema(Schema schema) { if (schema == null) { return; } for(FieldSchema fieldSchema:schema.getFields()){ if (fieldSchema.type== DataType.BOOLEAN) { fieldSchema.type=DataType.INTEGER; } convertToElephantBirdCompatibleSchema(fieldSchema.schema); } }
Example #24
Source File: TestConstructorArgs.java From spork with Apache License 2.0 | 5 votes |
@Override public List<FuncSpec> getArgToFuncMapping() throws FrontendException { List<FuncSpec> funcList = new ArrayList<FuncSpec>(); funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new FieldSchema(null, DataType.CHARARRAY)))); funcList.add(new FuncSpec(IntTest.class.getName(), new Schema(new FieldSchema(null, DataType.INTEGER)))); return funcList; }
Example #25
Source File: TestPigStreamingUDF.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDeserialize__bug() throws Exception { byte[] input = "|(_|-_|,_32|,_987654321098765432|,_987654321098765432|)_|_".getBytes(); FieldSchema f1 = new FieldSchema("", DataType.CHARARRAY); FieldSchema f2 = new FieldSchema("", DataType.INTEGER); FieldSchema f3 = new FieldSchema("", DataType.LONG); FieldSchema f4 = new FieldSchema("", DataType.LONG); List<FieldSchema> fsl = new ArrayList<FieldSchema>(); fsl.add(f1); fsl.add(f2); fsl.add(f3); fsl.add(f4); Schema schema = new Schema(fsl); FieldSchema fs = new FieldSchema("", schema, DataType.TUPLE); PigStreamingUDF sp = new PigStreamingUDF(fs); Tuple expectedOutput1 = tf.newTuple(4); expectedOutput1.set(0, null); expectedOutput1.set(1, 32); expectedOutput1.set(2, 987654321098765432L); expectedOutput1.set(3, 987654321098765432L); Object out = sp.deserialize(input, 0, input.length); Assert.assertEquals(tf.newTuple(expectedOutput1), out); }
Example #26
Source File: TestPigStreamingUDF.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDeserialize__boolean() throws IOException { byte[] input = "true|_".getBytes(); FieldSchema schema = new FieldSchema("", DataType.BOOLEAN); PigStreamingUDF sp = new PigStreamingUDF(schema); Object out = sp.deserialize(input, 0, input.length); Assert.assertEquals(tf.newTuple(Boolean.TRUE), out); }
Example #27
Source File: TypeCheckingExpVisitor.java From spork with Apache License 2.0 | 5 votes |
/** * Check if the fieldSch is a bag with empty tuple schema * @param fieldSch * @return * @throws FrontendException */ private static boolean isNotBagWithEmptyTuple(FieldSchema fieldSch) throws FrontendException { boolean isBagWithEmptyTuple = false; if(fieldSch.type == DataType.BAG && fieldSch.schema != null && fieldSch.schema.getField(0) != null && fieldSch.schema.getField(0).type == DataType.TUPLE && fieldSch.schema.getField(0).schema == null ){ isBagWithEmptyTuple = true; } return !isBagWithEmptyTuple; }
Example #28
Source File: LSHFunc.java From datafu with Apache License 2.0 | 5 votes |
/** * Validate the input schema to ensure that our input is consistent and that we fail fast. * @param input * @throws FrontendException */ private void validateInputSchema(Schema input) throws FrontendException { FieldSchema vectorSchema = input.getField(0); if(!DataTypeUtil.isValidVector(vectorSchema, getDimension())) { throw new FrontendException("Invalid vector element: Expected either a tuple or a bag, but found " + vectorSchema); } }
Example #29
Source File: MetricUDF.java From datafu with Apache License 2.0 | 5 votes |
/** * Validate the input schema to ensure that our input is consistent and that we fail fast. * @param input input schema * @throws FrontendException */ private void validateInputSchema(Schema input) throws FrontendException { { FieldSchema vectorSchema = input.getField(0); if(!DataTypeUtil.isValidVector(vectorSchema, dim)) { throw new FrontendException("Invalid vector element: Expected either a tuple or a bag, but found " + vectorSchema); } } { FieldSchema distanceSchema = input.getField(1); if(distanceSchema.type != DataType.DOUBLE && distanceSchema.type != DataType.INTEGER && distanceSchema.type != DataType.LONG ) { throw new FrontendException("Invalid distance element: Expected a number, but found " + distanceSchema); } } { FieldSchema pointsSchema = input.getField(2); if( pointsSchema.type != DataType.BAG) { throw new FrontendException("Invalid points element: Expected a bag, but found " + pointsSchema); } FieldSchema tupleInBag = pointsSchema.schema.getField(0); FieldSchema vectorInTuple = tupleInBag.schema.getField(0); if(!DataTypeUtil.isValidVector(vectorInTuple, dim)) { throw new FrontendException("Invalid points element: Expected a bag of vectors, but found " + vectorInTuple.schema); } } }
Example #30
Source File: ParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
private FilterPredicate buildFilter(OpType op, Column col, Const value) { String name = col.getName(); try { FieldSchema f = schema.getField(name); switch (f.type) { case DataType.BOOLEAN: Operators.BooleanColumn boolCol = booleanColumn(name); switch(op) { case OP_EQ: return eq(boolCol, getValue(value, boolCol.getColumnType())); case OP_NE: return notEq(boolCol, getValue(value, boolCol.getColumnType())); default: throw new RuntimeException( "Operation " + op + " not supported for boolean column: " + name); } case DataType.INTEGER: Operators.IntColumn intCol = intColumn(name); return op(op, intCol, value); case DataType.LONG: Operators.LongColumn longCol = longColumn(name); return op(op, longCol, value); case DataType.FLOAT: Operators.FloatColumn floatCol = floatColumn(name); return op(op, floatCol, value); case DataType.DOUBLE: Operators.DoubleColumn doubleCol = doubleColumn(name); return op(op, doubleCol, value); case DataType.CHARARRAY: Operators.BinaryColumn binaryCol = binaryColumn(name); return op(op, binaryCol, value); default: throw new RuntimeException("Unsupported type " + f.type + " for field: " + name); } } catch (FrontendException e) { throw new RuntimeException("Error processing pushdown for column:" + col, e); } }