Java Code Examples for org.apache.pig.impl.util.Utils#getSchemaFromString()
The following examples show how to use
org.apache.pig.impl.util.Utils#getSchemaFromString() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestProjectStarRangeInUdf.java From spork with Apache License 2.0 | 6 votes |
@Test public void testProjMixExpand1NoSchema() throws IOException { String query; query = " l1 = load '" + INP_FILE_5FIELDS + "';" + "f = foreach l1 generate TOBAG(*, $0 .. $2) as tt;" ; Schema sch = Utils.getSchemaFromString("tt : {(NullALias)}"); sch.getField(0).schema.getField(0).schema.getField(0).alias = null; sch.getField(0).schema.getField(0).schema.getField(0).type = DataType.NULL; compileAndCompareSchema(sch, query, "f"); Iterator<Tuple> it = pigServer.openIterator("f"); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStringAsByteArray( new String[] { "({('10'),('20'),('30'),('40'),('50'),('10'),('20'),('30')})", "({('11'),('21'),('31'),('41'),('51'),('11'),('21'),('31')})", }); Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example 2
Source File: FixedWidthLoader.java From spork with Apache License 2.0 | 6 votes |
@Override public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { // Save reader to use in getNext() this.reader = reader; splitIndex = split.getSplitIndex(); // Get schema from front-end UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfContextSignature }); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema == null) { throw new IOException("Could not find schema in UDF context"); } schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); requiredFields = (boolean[]) ObjectSerializer.deserialize(p.getProperty(REQUIRED_FIELDS_SIGNATURE)); if (requiredFields != null) { numRequiredFields = 0; for (int i = 0; i < requiredFields.length; i++) { if (requiredFields[i]) numRequiredFields++; } } }
Example 3
Source File: TestProjectStarExpander.java From spork with Apache License 2.0 | 6 votes |
/** * Test projecting multiple * * @throws IOException * @throws ParseException */ @Test public void testProjectStarMulti() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);" + "f = foreach l1 generate * as (aa, bb, cc), *;" ; Util.registerMultiLineQuery(pig, query); Schema expectedSch = Utils.getSchemaFromString( "aa : int, bb : int, cc : int, a : int, b : int, c : int"); Schema sch = pig.dumpSchema("f"); assertEquals("Checking expected schema", expectedSch, sch); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(10,20,30,10,20,30)", "(11,21,31,11,21,31)", }); Iterator<Tuple> it = pig.openIterator("f"); Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example 4
Source File: TestPigServer.java From spork with Apache License 2.0 | 6 votes |
private void registerScalarScript(boolean useScalar, String expectedSchemaStr) throws IOException { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("A = load 'adata' AS (a: int, b: int);"); //scalar pig.registerQuery("C = FOREACH A GENERATE *;"); String overrideScalar = useScalar ? "C = FILTER A BY b % 2 == 0; " : ""; pig.registerQuery("B = FOREACH (GROUP A BY a) { " + overrideScalar + "D = FILTER A BY b % 2 == 1;" + "GENERATE group AS a, A.b AS every, C.b AS even, D.b AS odd;" + "};"); Schema dumpedSchema = pig.dumpSchema("B"); Schema expectedSchema = Utils.getSchemaFromString( expectedSchemaStr); assertEquals(expectedSchema, dumpedSchema); }
Example 5
Source File: TestSchema.java From spork with Apache License 2.0 | 5 votes |
@Test // See PIG-730 public void testMergeSchemaWithTwoLevelAccess() throws Exception { // Generate two schemas Schema s1 = Utils.getSchemaFromString("a:{t:(a0:int, a1:int)}"); Schema s2 = Utils.getSchemaFromString("b:{t:(b0:int, b1:int)}"); s1.getField(0).schema.setTwoLevelAccessRequired(true); s1.getField(0).schema.setTwoLevelAccessRequired(false); Schema s3 = Schema.mergeSchema(s1, s2, true); assertEquals(s3, s2); }
Example 6
Source File: TestTextDataParser.java From spork with Apache License 2.0 | 5 votes |
@Test public void testMapFloatValueType() throws Exception{ String myMap = "[key1#0.1f]"; Schema schema = Utils.getSchemaFromString("m:map[float]"); ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0]; Map<String, Object> map = ps.getLoadCaster().bytesToMap(myMap.getBytes(), rfs); String key = map.keySet().iterator().next(); Object v = map.get("key1"); assertEquals("key1", key); assertTrue(v instanceof Float); String value = String.valueOf(v); assertEquals("0.1", value); }
Example 7
Source File: TestPigSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private void testConversion(String pigSchemaString, String schemaString) throws Exception { Schema pigSchema = Utils.getSchemaFromString(pigSchemaString); MessageType schema = pigSchemaConverter.convert(pigSchema); MessageType expectedMT = MessageTypeParser.parseMessageType(schemaString); assertEquals("converting "+pigSchemaString+" to "+schemaString, expectedMT, schema); MessageType filtered = pigSchemaConverter.filter(schema, pigSchema, null); assertEquals("converting "+pigSchemaString+" to "+schemaString+" and filtering", schema.toString(), filtered.toString()); }
Example 8
Source File: TestPigServer.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDescribeLimit() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ; pig.registerQuery("b = limit a 10;") ; Schema dumpedSchema = pig.dumpSchema("b") ; Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray"); assertEquals(expectedSchema, dumpedSchema); }
Example 9
Source File: TestPigServer.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDescribeFilter() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ; pig.registerQuery("b = filter a by field1 > 10;") ; Schema dumpedSchema = pig.dumpSchema("b") ; Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray"); assertEquals(expectedSchema, dumpedSchema); }
Example 10
Source File: PigSerializationEventConverterTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
private ResourceSchema createSchema(String schema) { try { return new ResourceSchema(Utils.getSchemaFromString(schema)); } catch (Exception ex) { throw new RuntimeException(ex); } }
Example 11
Source File: DBStorage.java From spork with Apache License 2.0 | 5 votes |
/** * Initialise the database connection and prepared statement here. */ @SuppressWarnings("unchecked") @Override public void prepareToWrite(RecordWriter writer) throws IOException { ps = null; con = null; if (insertQuery == null) { throw new IOException("SQL Insert command not specified"); } try { if (user == null || pass == null) { con = DriverManager.getConnection(jdbcURL); } else { con = DriverManager.getConnection(jdbcURL, user, pass); } con.setAutoCommit(false); ps = con.prepareStatement(insertQuery); } catch (SQLException e) { log.error("Unable to connect to JDBC @" + jdbcURL); throw new IOException("JDBC Error", e); } count = 0; // Try to get the schema from the UDFContext object. UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature}); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema != null) { // Parse the schema from the string stored in the properties object. schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); } }
Example 12
Source File: TestPigServer.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDescribeCross() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ; pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );") ; pig.registerQuery("c = cross a, b;") ; Schema dumpedSchema = pig.dumpSchema("c") ; Schema expectedSchema = Utils.getSchemaFromString("a::field1: int,a::field2: float,a::field3: chararray,b::field4: bytearray,b::field5: double,b::field6: chararray"); assertEquals(expectedSchema, dumpedSchema); }
Example 13
Source File: TestPigServer.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDescribeCogroup() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ; pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );") ; pig.registerQuery("c = cogroup a by field1, b by field4;") ; Schema dumpedSchema = pig.dumpSchema("c") ; Schema expectedSchema = Utils.getSchemaFromString("group:int,a:{(field1:int,field2:float,field3:chararray)},b:{(field4:bytearray,field5:double,field6:chararray)}"); assertEquals(expectedSchema, dumpedSchema); }
Example 14
Source File: TestUnionOnSchema.java From spork with Apache License 2.0 | 5 votes |
/** * Test UNION ONSCHEMA where a common column has additional 'namespace' part * in the column name in both the inputs * @throws IOException * @throws ParserException */ @Test public void testUnionOnSchemaScopedColumnNameBothInp2() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " + " l2 = load '" + INP_FILE_2NUMS + "' as (i : int, x : chararray); " + " cg1 = cogroup l1 by i, l2 by i; " + " f1 = foreach cg1 generate group as gkey, flatten(l1), flatten(l2); " + " cg2 = cogroup l2 by i, l1 by i; " + " f2 = foreach cg1 generate group as gkey, flatten(l2), flatten(l1); " + "u = union onschema f1, f2; " ; Util.registerMultiLineQuery(pig, query); Schema sch = pig.dumpSchema("u"); Schema expectedSch = Utils.getSchemaFromString("gkey: int, l1::i: int, l1::j: int, l2::i: int, l2::x: chararray"); assertEquals("Checking expected schema",sch, expectedSch); Iterator<Tuple> it = pig.openIterator("u"); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(1,1,2,1,'2')", "(5,5,3,5,'3')", "(1,1,2,1,'2')", "(5,5,3,5,'3')", }); Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example 15
Source File: TestPigServer.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDescribeSort() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ; pig.registerQuery("b = order a by * desc;") ; Schema dumpedSchema = pig.dumpSchema("b") ; Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray"); assertEquals(expectedSchema, dumpedSchema); }
Example 16
Source File: TestTupleRecordConsumer.java From parquet-mr with Apache License 2.0 | 4 votes |
private MessageType getMessageType(String pigSchemaString) throws ParserException { Schema pigSchema = Utils.getSchemaFromString(pigSchemaString); return new PigSchemaConverter().convert(pigSchema); }
Example 17
Source File: GroovyEvalFunc.java From spork with Apache License 2.0 | 4 votes |
public GroovyEvalFunc(String path, String namespace, String methodName, Object target) throws IOException { String fqmn = "".equals(namespace) ? methodName : namespace + ScriptEngine.NAMESPACE_SEPARATOR + methodName; Class c = scriptClasses.get(path); if (null == c) { try { c = GroovyScriptEngine.getEngine().loadScriptByName(new File(path).toURI().toString()); } catch (ScriptException se) { throw new IOException(se); } catch (ResourceException re) { throw new IOException(re); } } scriptClasses.put(path, c); Method[] methods = c.getMethods(); int matches = 0; for (Method m : methods) { if (m.getName().equals(methodName)) { this.method = m; matches++; } } if (null == this.method) { throw new IOException("Method " + methodName + " was not found in '" + path + "'"); } if (matches > 1) { throw new IOException("There are " + matches + " methods with name '" + methodName + "', please make sure method names are unique within the Groovy class."); } // // Extract schema // Annotation[] annotations = this.method.getAnnotations(); for (Annotation annotation : annotations) { if (annotation.annotationType().equals(OutputSchemaFunction.class)) { this.schemaFunction = new GroovyEvalFuncObject(path, namespace, ((OutputSchemaFunction) annotation).value()); break; } else if (annotation.annotationType().equals(OutputSchema.class)) { this.schema = Utils.getSchemaFromString(((OutputSchema) annotation).value()); break; } } // // For static method, invocation target is null, for non // static method, create/set invocation target unless passed // to the constructor // if (!Modifier.isStatic(this.method.getModifiers())) { if (null != target) { this.invocationTarget = target; } else { try { this.invocationTarget = c.newInstance(); } catch (InstantiationException ie) { throw new IOException(ie); } catch (IllegalAccessException iae) { throw new IOException(iae); } } } }
Example 18
Source File: PigSchemaSaveTest.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Test public void testSchemaSerializationPlusBase64() throws Exception { Schema schemaFromString = Utils.getSchemaFromString("name:bytearray,links:{(missing:chararray)}"); Schema schemaSaved = IOUtils.deserializeFromBase64(IOUtils.serializeToBase64(schemaFromString)); assertEquals(schemaFromString.toString(), schemaSaved.toString()); }
Example 19
Source File: TestPlanGeneration.java From spork with Apache License 2.0 | 4 votes |
public SchemaLoader(String schemaString) throws ParserException { schema = Utils.getSchemaFromString(schemaString); }
Example 20
Source File: EvalFunc.java From spork with Apache License 2.0 | 3 votes |
/** * Report the schema of the output of this UDF. Pig will make use of * this in error checking, optimization, and planning. The schema * of input data to this UDF is provided. * <p> * The default implementation interprets the {@link OutputSchema} annotation, * if one is present. Otherwise, it returns <code>null</code> (no known output schema). * * @param input Schema of the input * @return Schema of the output */ public Schema outputSchema(Schema input) { OutputSchema schema = this.getClass().getAnnotation(OutputSchema.class); try { return (schema == null) ? null : Utils.getSchemaFromString(schema.value()); } catch (ParserException e) { throw new RuntimeException(e); } }