Java Code Examples for org.apache.pig.PigServer#explain()
The following examples show how to use
org.apache.pig.PigServer#explain() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestForEachStar.java From spork with Apache License 2.0 | 6 votes |
@Test public void testForeachStarSchemaUnkown() throws IOException, ParserException{ PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INPUT_FILE + "' ;" + "f1 = foreach l1 generate * ;" ; Util.registerMultiLineQuery(pig, query); pig.explain("f1",System.out); Iterator<Tuple> it = pig.openIterator("f1"); Tuple expectedResCharArray = (Tuple)Util.getPigConstant("('one','two')"); Tuple expectedRes = TupleFactory.getInstance().newTuple(); for(Object field : expectedResCharArray.getAll() ){ expectedRes.append(new DataByteArray(field.toString())); } assertTrue("has output", it.hasNext()); assertEquals(expectedRes, it.next()); }
Example 2
Source File: Util.java From spork with Apache License 2.0 | 6 votes |
/** * Check if query plan for alias argument produces exception with expected * error message in expectedErr argument. * @param query * @param alias * @param expectedErr * @throws IOException */ static public void checkExceptionMessage(String query, String alias, String expectedErr) throws IOException { PigServer pig = new PigServer(ExecType.LOCAL); boolean foundEx = false; try{ Util.registerMultiLineQuery(pig, query); pig.explain(alias, System.out); }catch(FrontendException e){ foundEx = true; checkMessageInException(e, expectedErr); } if(!foundEx) fail("No exception thrown. Exception is expected."); }
Example 3
Source File: BoundScript.java From spork with Apache License 2.0 | 5 votes |
/** * Explain this pipeline. Results will be printed to stdout. * @throws IOException if explain fails. */ public void explain() throws IOException { if (queries.isEmpty()) { LOG.info("No bound query to explain"); return; } PigServer pigServer = new PigServer(scriptContext.getPigContext(), false); registerQuery(pigServer, queries.get(0)); pigServer.explain(null, System.out); }
Example 4
Source File: TestQueryParser.java From spork with Apache License 2.0 | 5 votes |
@Test //PIG-2083 public void testNullInBinCondNoSpace() throws IOException{ String query = "a = load '1.txt' as (a0, a1);" + "b = foreach a generate (a0==0?null:2);"; //no space around the null keyword, so the lexer doesn't emit a NULL token PigServer pig = new PigServer(ExecType.LOCAL); Util.registerMultiLineQuery(pig, query); pig.explain("b", System.out); }
Example 5
Source File: TestUnionOnSchema.java From spork with Apache License 2.0 | 5 votes |
/** * Test UNION ONSCHEMA on two inputs , one input with additional columns * @throws IOException * @throws ParserException */ @Test public void testUnionOnSchemaAdditionalColumn() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int);" + "l2 = load '" + INP_FILE_2NUM_1CHAR_1BAG + "' as " + " (i : long, c : chararray, j : int " + ", b : bag { t : tuple (c1 : int, c2 : chararray)}" + ", t : tuple (tc1 : int, tc2 : chararray) );" + "l3 = load '" + INP_FILE_EMPTY + "' as (i : int, x : long);" + "u = union onschema l1, l2, l3;" ; Util.registerMultiLineQuery(pig, query); pig.explain("u", System.out); Iterator<Tuple> it = pig.openIterator("u"); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(1L,2,null,null,null,null)", "(5L,3,null,null,null,null)", "(1L,2,'abc',{(1,'a'),(1,'b')},(1,'c'),null)", "(5L,3,'def',{(2,'a'),(2,'b')},(2,'c'),null)", }); Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example 6
Source File: TestUnionOnSchema.java From spork with Apache License 2.0 | 5 votes |
/** * Test UNION ONSCHEMA on 3 inputs * @throws IOException * @throws ParserException */ @Test public void testUnionOnSchema3Inputs() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " + "l2 = load '" + INP_FILE_2NUMS + "' as (i : double, x : int); " + "l3 = load '" + INP_FILE_2NUM_1CHAR_1BAG + "' as " + " (i : long, c : chararray, j : int " + ", b : bag { t : tuple (c1 : int, c2 : chararray)} ); " + "u = union onschema l1, l2, l3;" ; Util.registerMultiLineQuery(pig, query); pig.explain("u", System.out); Iterator<Tuple> it = pig.openIterator("u"); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(1.0,2,null,null,null)", "(5.0,3,null,null,null)", "(1.0,null,2,null,null)", "(5.0,null,3,null,null)", "(1.0,2,null,'abc',{(1,'a'),(1,'b')})", "(5.0,3,null,'def',{(2,'a'),(2,'b')})", }); Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example 7
Source File: TestUnionOnSchema.java From spork with Apache License 2.0 | 5 votes |
/** * Test UNION ONSCHEMA with bytearray type * @throws IOException * @throws ParserException */ @Test public void testUnionOnSchemaByteArrayConversions() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INP_FILE_2NUM_1CHAR_1BAG + "' as " + " (i : bytearray, x : bytearray, j : bytearray " + ", b : bytearray); " + "l2 = load '" + INP_FILE_2NUM_1CHAR_1BAG + "' as " + " (i : long, c : chararray, j : int " + ", b : bag { t : tuple (c1 : int, c2 : chararray)} ); " + "u = union onSchema l1, l2;" ; Util.registerMultiLineQuery(pig, query); pig.explain("u", System.out); Iterator<Tuple> it = pig.openIterator("u"); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(1L,null,2,{(1,'a'),(1,'b')},'abc')", "(1L,'abc',2,{(1,'a'),(1,'b')},null)", "(5L,null,3,{(2,'a'),(2,'b')},'def')", "(5L,'def',3,{(2,'a'),(2,'b')},null)", }); //update expectedRes to use bytearray instead of chararray in 2nd field for(Tuple t : expectedRes){ if(t.get(1) != null){ t.set(1, new DataByteArray(t.get(1).toString())); } } Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example 8
Source File: TestCombiner.java From spork with Apache License 2.0 | 5 votes |
private void checkCombinerUsed(PigServer pigServer, String string, boolean combineExpected) throws IOException { // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); boolean combinerFound = baos.toString().matches("(?si).*combine plan.*"); System.out.println(baos.toString()); assertEquals("is combiner present as expected", combineExpected, combinerFound); }
Example 9
Source File: TestCombiner.java From spork with Apache License 2.0 | 5 votes |
@Test public void testJiraPig1030() throws Exception { // test that combiner is NOT invoked when // one of the elements in the foreach generate // has a non-algebraic UDF that have multiple inputs // (one of them is distinct). String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; try { Util.createInputFile(cluster, "forEachNoCombinerInput.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'forEachNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a all;"); pigServer.registerQuery("c = foreach b {" + " d = distinct a.age;" + " generate group, " + JiraPig1030.class.getName() + "(d, 0);};"); // make sure there isn't a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertFalse(baos.toString().matches("(?si).*combine plan.*")); pigServer.shutdown(); } finally { Util.deleteFile(cluster, "forEachNoCombinerInput.txt"); } }
Example 10
Source File: TestUDFContext.java From spork with Apache License 2.0 | 5 votes |
/** * Test that UDFContext is reset each time the plan is regenerated * @throws Exception */ @Test public void testUDFContextReset() throws Exception { PigServer pig = new PigServer(ExecType.LOCAL); pig.registerQuery(" l = load 'file' as (a :int, b : int, c : int);"); pig.registerQuery(" f = foreach l generate a, b;"); pig.explain("f", System.out); Properties props = UDFContext.getUDFContext().getUDFProperties(PigStorage.class); // required fields property should be set because f results does not // require the third column c, so properties should not be null assertTrue( "properties in udf context for load should not be empty: "+props, props.keySet().size()>0); // the new statement for alias f below will require all columns, // so this time required fields property for loader should not be set pig.registerQuery(" f = foreach l generate a, b, c;"); pig.explain("f", System.out); props = UDFContext.getUDFContext().getUDFProperties(PigStorage.class); assertTrue( "properties in udf context for load should be empty: "+props, props.keySet().size() == 0); }
Example 11
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testMultiCombinerUse() throws Exception { // test the scenario where the combiner is called multiple // times - this can happen when the output of the map > io.sort.mb // let's set the io.sort.mb to 1MB and > 1 MB map data. String[] input = new String[500 * 1024]; for (int i = 0; i < input.length; i++) { if (i % 2 == 0) { input[i] = Integer.toString(1); } else { input[i] = Integer.toString(0); } } Util.createInputFile(cluster, "MultiCombinerUseInput.txt", input); String oldValue = properties.getProperty("io.sort.mb"); properties.setProperty("io.sort.mb", "1"); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.getPigContext().getProperties().setProperty(MRConfiguration.CHILD_JAVA_OPTS, "-Xmx1024m"); pigServer.registerQuery("a = load 'MultiCombinerUseInput.txt' as (x:int);"); pigServer.registerQuery("b = group a all;"); pigServer.registerQuery("c = foreach b generate COUNT(a), SUM(a.$0), " + "MIN(a.$0), MAX(a.$0), AVG(a.$0), ((double)SUM(a.$0))/COUNT(a.$0)," + " COUNT(a.$0) + SUM(a.$0) + MAX(a.$0);"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertTrue(baos.toString().matches("(?si).*combine plan.*")); Iterator<Tuple> it = pigServer.openIterator("c"); Tuple t = it.next(); assertEquals(512000L, t.get(0)); assertEquals(256000L, t.get(1)); assertEquals(0, t.get(2)); assertEquals(1, t.get(3)); assertEquals(0.5, t.get(4)); assertEquals(0.5, t.get(5)); assertEquals(512000L + 256000L + 1, t.get(6)); assertFalse(it.hasNext()); Util.deleteFile(cluster, "MultiCombinerUseInput.txt"); // Reset io.sort.mb to the original value before exit if (oldValue == null) { properties.remove("io.sort.mb"); } else { properties.setProperty("io.sort.mb", oldValue); } pigServer.shutdown(); }
Example 12
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testDistinctAggs1() throws Exception { // test the use of combiner for distinct aggs: String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; Util.createInputFile(cluster, "distinctAggs1Input.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'distinctAggs1Input.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b {" + " x = distinct a.age;" + " y = distinct a.gpa;" + " z = distinct a;" + " generate group, COUNT(x), SUM(x.age), SUM(y.gpa), SUM(a.age), " + " SUM(a.gpa), COUNT(z.age), COUNT(z), SUM(z.age);};"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertTrue(baos.toString().matches("(?si).*combine plan.*")); HashMap<String, Object[]> results = new HashMap<String, Object[]>(); results.put("pig1", new Object[] { "pig1", 3L, 57L, 5.2, 75L, 9.4, 3L, 3L, 57L }); results.put("pig2", new Object[] { "pig2", 1L, 24L, 7.8, 48L, 7.8, 2L, 2L, 48L }); results.put("pig5", new Object[] { "pig5", 1L, 45L, 2.4, 45L, 2.4, 1L, 1L, 45L }); Iterator<Tuple> it = pigServer.openIterator("c"); while (it.hasNext()) { Tuple t = it.next(); List<Object> fields = t.getAll(); Object[] expected = results.get(fields.get(0)); int i = 0; for (Object field : fields) { assertEquals(expected[i++], field); } } Util.deleteFile(cluster, "distinctAggs1Input.txt"); pigServer.shutdown(); }
Example 13
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testDistinctNoCombiner() throws Exception { // test that combiner is NOT invoked when // one of the elements in the foreach generate // is a distinct() as the leaf String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; Util.createInputFile(cluster, "distinctNoCombinerInput.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'distinctNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b {" + " z = distinct a;" + " generate group, z, SUM(a.age), SUM(a.gpa);};"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertFalse(baos.toString().matches("(?si).*combine plan.*")); HashMap<String, Object[]> results = new HashMap<String, Object[]>(); results.put("pig1", new Object[] { "pig1", "bag-place-holder", 75L, 9.4 }); results.put("pig2", new Object[] { "pig2", "bag-place-holder", 48L, 7.8 }); results.put("pig5", new Object[] { "pig5", "bag-place-holder", 45L, 2.4 }); Iterator<Tuple> it = pigServer.openIterator("c"); while (it.hasNext()) { Tuple t = it.next(); List<Object> fields = t.getAll(); Object[] expected = results.get(fields.get(0)); int i = 0; for (Object field : fields) { if (i == 1) { // ignore the second field which is a bag // for comparison here continue; } assertEquals(expected[i++], field); } } Util.deleteFile(cluster, "distinctNoCombinerInput.txt"); pigServer.shutdown(); }
Example 14
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testForEachNoCombiner() throws Exception { // test that combiner is NOT invoked when // one of the elements in the foreach generate // has a foreach in the plan without a distinct agg String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; Util.createInputFile(cluster, "forEachNoCombinerInput.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'forEachNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b {" + " z = a.age;" + " generate group, z, SUM(a.age), SUM(a.gpa);};"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertFalse(baos.toString().matches("(?si).*combine plan.*")); HashMap<String, Object[]> results = new HashMap<String, Object[]>(); results.put("pig1", new Object[] { "pig1", "bag-place-holder", 75L, 9.4 }); results.put("pig2", new Object[] { "pig2", "bag-place-holder", 48L, 7.8 }); results.put("pig5", new Object[] { "pig5", "bag-place-holder", 45L, 2.4 }); Iterator<Tuple> it = pigServer.openIterator("c"); while (it.hasNext()) { Tuple t = it.next(); List<Object> fields = t.getAll(); Object[] expected = results.get(fields.get(0)); int i = 0; for (Object field : fields) { if (i == 1) { // ignore the second field which is a bag // for comparison here continue; } assertEquals(expected[i++], field); } } Util.deleteFile(cluster, "forEachNoCombinerInput.txt"); pigServer.shutdown(); }
Example 15
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testJiraPig746() throws Exception { // test that combiner is NOT invoked when // one of the elements in the foreach generate // has a foreach in the plan without a distinct agg String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; String expected[] = { "(pig1,75,{(pig1,18,2.1),(pig1,18,2.1),(pig1,19,2.1),(pig1,20,3.1)})", "(pig2,48,{(pig2,24,3.3),(pig2,24,4.5)})", "(pig5,45,{(pig5,45,2.4)})" }; try { Util.createInputFile(cluster, "forEachNoCombinerInput.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'forEachNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b generate group, SUM(a.age), a;"); // make sure there isn't a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertFalse(baos.toString().matches("(?si).*combine plan.*")); Iterator<Tuple> it = pigServer.openIterator("c"); Util.checkQueryOutputsAfterSortRecursive(it, expected, "group:chararray,age:long,b:{t:(name:chararray,age:int,gpa:double)}"); pigServer.shutdown(); } finally { Util.deleteFile(cluster, "forEachNoCombinerInput.txt"); } }
Example 16
Source File: TestPigServer.java From spork with Apache License 2.0 | 4 votes |
@Test public void testExplainXmlComplex() throws Throwable { // TODO: Explain XML output is not supported in non-MR mode. Remove the // following condition once it's implemented in Tez. if (cluster.getExecType() != ExecType.MAPREDUCE) { return; } PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("a = load 'a' as (site: chararray, count: int, itemCounts: bag { itemCountsTuple: tuple (type: chararray, typeCount: int, f: float, m: map[]) } ) ;") ; pig.registerQuery("b = foreach a generate site, count, FLATTEN(itemCounts);") ; pig.registerQuery("c = group b by site;"); pig.registerQuery("d = foreach c generate FLATTEN($1);"); pig.registerQuery("e = group d by $2;"); ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pig.explain("e", "xml", true, false, ps, ps, null, null); ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(bais); //Verify Logical and Physical Plans aren't supported. NodeList logicalPlan = doc.getElementsByTagName("logicalPlan"); assertEquals(1, logicalPlan.getLength()); assertTrue(logicalPlan.item(0).getTextContent().contains("Not Supported")); NodeList physicalPlan = doc.getElementsByTagName("physicalPlan"); assertEquals(1, physicalPlan.getLength()); assertTrue(physicalPlan.item(0).getTextContent().contains("Not Supported")); //Verify we have two loads and one is temporary NodeList loads = doc.getElementsByTagName("POLoad"); assertEquals(2, loads.getLength()); boolean sawTempLoad = false; boolean sawNonTempLoad = false; for (int i = 0; i < loads.getLength(); i++) { Boolean isTempLoad = null; boolean hasAlias = false; Node poLoad = loads.item(i); NodeList children = poLoad.getChildNodes(); for (int j = 0; j < children.getLength(); j++) { Node child = children.item(j); if (child.getNodeName().equals("alias")) { hasAlias = true; } if (child.getNodeName().equals("isTmpLoad")) { if (child.getTextContent().equals("false")) { isTempLoad = false; } else if (child.getTextContent().equals("true")) { isTempLoad = true; } } } if (isTempLoad == null) { fail("POLoad elements should have isTmpLoad child node."); } else if (isTempLoad && hasAlias) { fail("Temp loads should not have aliases"); } else if (!isTempLoad && !hasAlias) { fail("Non temporary loads should be associated with alias."); } sawTempLoad = sawTempLoad || isTempLoad; sawNonTempLoad = sawNonTempLoad || !isTempLoad; } assertTrue(sawTempLoad && sawNonTempLoad); }