Java Code Examples for org.apache.pig.PigServer#shutdown()
The following examples show how to use
org.apache.pig.PigServer#shutdown() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPigServerWithMacros.java From spork with Apache License 2.0 | 6 votes |
@Test public void testInlineMacro() throws Throwable { PigServer pig = new PigServer(ExecType.LOCAL); Storage.Data data = resetData(pig); data.set("some_path", "(l:chararray)", tuple("first row"), tuple("second row")); pig.registerQuery("DEFINE row_count(X) RETURNS Z { Y = group $X all; $Z = foreach Y generate COUNT($X); };"); pig.registerQuery("a = load 'some_path' USING mock.Storage();"); pig.registerQuery("b = row_count(a);"); Iterator<Tuple> iter = pig.openIterator("b"); assertEquals(2L, ((Long)iter.next().get(0)).longValue()); pig.shutdown(); }
Example 2
Source File: Sty.java From validatar with Apache License 2.0 | 5 votes |
@Override public void execute(Query query) { String queryName = query.name; String queryValue = query.value; Map<String, String> queryMetadata = query.getMetadata(); String execType = Query.getKey(queryMetadata, METADATA_EXEC_TYPE_KEY).orElse(defaultExecType); String alias = Query.getKey(queryMetadata, METADATA_ALIAS_KEY).orElse(defaultOutputAlias); log.info("Running {} for alias {}: {}", queryName, alias, queryValue); try { PigServer server = getPigServer(execType); server.registerScript(new ByteArrayInputStream(queryValue.getBytes())); Iterator<Tuple> queryResults = server.openIterator(alias); Result result = query.createResults(); // dumpSchema will also, unfortunately, print the schema to stdout. List<FieldDetail> metadata = getFieldDetails(server.dumpSchema(alias)); populateColumns(metadata, result); while (queryResults.hasNext()) { populateRow(queryResults.next(), metadata, result); } server.shutdown(); } catch (IOException ioe) { log.error("Problem with Pig query: {}\n{}", queryValue, ioe); query.setFailure(ioe.toString()); } catch (Exception e) { log.error("Error occurred while processing Pig query: {}\n{}", queryValue, e); query.setFailure(e.toString()); } }
Example 3
Source File: TestCombiner.java From spork with Apache License 2.0 | 5 votes |
@Test public void testSuccessiveUserFuncs1() throws Exception { String query = "a = load 'students.txt' as (c1,c2,c3,c4); " + "c = group a by c2; " + "f = foreach c generate COUNT(org.apache.pig.builtin.Distinct($1.$2)); " + "store f into 'out';"; PigServer pigServer = new PigServer(cluster.getExecType(), properties); PigContext pc = pigServer.getPigContext(); assertTrue((Util.buildMRPlan(Util.buildPp(pigServer, query), pc).getRoots().get(0).combinePlan .isEmpty())); pigServer.shutdown(); }
Example 4
Source File: TestCombiner.java From spork with Apache License 2.0 | 5 votes |
@Test public void testSuccessiveUserFuncs2() throws Exception { String dummyUDF = JiraPig1030.class.getName(); String query = "a = load 'students.txt' as (c1,c2,c3,c4); " + "c = group a by c2; " + "f = foreach c generate COUNT(" + dummyUDF + "" + "(org.apache.pig.builtin.Distinct($1.$2)," + dummyUDF + "())); " + "store f into 'out';"; PigServer pigServer = new PigServer(cluster.getExecType(), properties); PigContext pc = pigServer.getPigContext(); assertTrue((Util.buildMRPlan(Util.buildPp(pigServer, query), pc).getRoots().get(0).combinePlan .isEmpty())); pigServer.shutdown(); }
Example 5
Source File: TestCombiner.java From spork with Apache License 2.0 | 5 votes |
@Test public void testOnCluster() throws Exception { // run the test on cluster PigServer pigServer = new PigServer(cluster.getExecType(), properties); String inputFileName = runTest(pigServer); Util.deleteFile(cluster, inputFileName); pigServer.shutdown(); }
Example 6
Source File: TestCombiner.java From spork with Apache License 2.0 | 5 votes |
@Test public void testGroupByLimit() throws Exception { // test use of combiner when group elements are accessed in the foreach String input[] = { "ABC 1", "ABC 2", "DEF 1", "XYZ 1", "XYZ 2", "XYZ 3", }; Util.createInputFile(cluster, "testGroupLimit.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'testGroupLimit.txt' using PigStorage(' ') " + "as (str:chararray, num1:int) ;"); pigServer.registerQuery("b = group a by str;"); pigServer.registerQuery("c = foreach b generate group, COUNT(a.num1) ; "); // check if combiner is present pigServer.registerQuery("d = limit c 2 ; "); checkCombinerUsed(pigServer, "d", true); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "('ABC',2L)", "('DEF',1L)", }); Iterator<Tuple> it = pigServer.openIterator("d"); Util.checkQueryOutputsAfterSort(it, expectedRes); pigServer.shutdown(); }
Example 7
Source File: TestCombiner.java From spork with Apache License 2.0 | 5 votes |
@Test public void testJiraPig1030() throws Exception { // test that combiner is NOT invoked when // one of the elements in the foreach generate // has a non-algebraic UDF that have multiple inputs // (one of them is distinct). String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; try { Util.createInputFile(cluster, "forEachNoCombinerInput.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'forEachNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a all;"); pigServer.registerQuery("c = foreach b {" + " d = distinct a.age;" + " generate group, " + JiraPig1030.class.getName() + "(d, 0);};"); // make sure there isn't a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertFalse(baos.toString().matches("(?si).*combine plan.*")); pigServer.shutdown(); } finally { Util.deleteFile(cluster, "forEachNoCombinerInput.txt"); } }
Example 8
Source File: TestPigServerWithMacros.java From spork with Apache License 2.0 | 5 votes |
@Test public void testRegisterRemoteMacro() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), cluster.getProperties()); String macroName = "util.pig"; File macroFile = File.createTempFile("tmp", ""); PrintWriter pw = new PrintWriter(new FileWriter(macroFile)); pw.println("DEFINE row_count(X) RETURNS Z { Y = group $X all; $Z = foreach Y generate COUNT($X); };"); pw.close(); FileSystem fs = cluster.getFileSystem(); fs.copyFromLocalFile(new Path(macroFile.getAbsolutePath()), new Path(macroName)); // find the absolute path for the directory so that it does not // depend on configuration String absPath = fs.getFileStatus(new Path(macroName)).getPath().toString(); Util.createInputFile(cluster, "testRegisterRemoteMacro_input", new String[]{"1", "2"}); pig.registerQuery("import '" + absPath + "';"); pig.registerQuery("a = load 'testRegisterRemoteMacro_input';"); pig.registerQuery("b = row_count(a);"); Iterator<Tuple> iter = pig.openIterator("b"); assertEquals(2L, ((Long)iter.next().get(0)).longValue()); pig.shutdown(); }
Example 9
Source File: TestPigServerWithMacros.java From spork with Apache License 2.0 | 5 votes |
@Test public void testRegisterResourceMacro() throws Throwable { PigServer pig = new PigServer(ExecType.LOCAL); String macrosFile = "test/pig/macros.pig"; File macrosJarFile = File.createTempFile("macros", ".jar"); System.out.println("Creating macros jar " + macrosJarFile); Manifest manifest = new Manifest(); manifest.getMainAttributes().put(Attributes.Name.MANIFEST_VERSION, "1.0"); JarOutputStream jarStream = new JarOutputStream(new FileOutputStream(macrosJarFile), manifest); JarEntry jarEntry = new JarEntry(macrosFile); jarEntry.setTime(System.currentTimeMillis()); jarStream.putNextEntry(jarEntry); PrintWriter pw = new PrintWriter(jarStream); pw.println("DEFINE row_count_in_jar(X) RETURNS Z { Y = group $X all; $Z = foreach Y generate COUNT($X); };"); pw.close(); jarStream.close(); Storage.Data data = resetData(pig); data.set("some_path", "(l:int)", tuple(tuple("1")), tuple(tuple("2")), tuple(tuple("3")), tuple(tuple("10")), tuple(tuple("11"))); System.out.println("Registering macros jar " + macrosJarFile); pig.registerJar(macrosJarFile.toString()); pig.registerQuery("import '" + macrosFile + "';"); pig.registerQuery("a = load 'some_path' USING mock.Storage();"); pig.registerQuery("b = row_count_in_jar(a);"); Iterator<Tuple> iter = pig.openIterator("b"); assertTrue(((Long)iter.next().get(0))==5); pig.shutdown(); }
Example 10
Source File: TestRegisteredJarVisibility.java From spork with Apache License 2.0 | 5 votes |
public void testRegisteredJarVisibility(PigServer pigServer, String inputPath) throws IOException { String query = "register " + jarFile.getAbsolutePath() + ";\n" + "a = load '" + Util.generateURI(inputPath, pigServer.getPigContext()) + "' using org.apache.pig.test.RegisteredJarVisibilityLoader();\n" // register again to test classloader consistency + "register " + jarFile.getAbsolutePath() + ";\n" + "b = load 'non_existent' " + "using org.apache.pig.test.RegisteredJarVisibilityLoader();"; LOG.info("Running pig script:\n" + query); pigServer.registerScript(new ByteArrayInputStream(query.getBytes())); pigServer.openIterator("a"); pigServer.shutdown(); }
Example 11
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testMultiCombinerUse() throws Exception { // test the scenario where the combiner is called multiple // times - this can happen when the output of the map > io.sort.mb // let's set the io.sort.mb to 1MB and > 1 MB map data. String[] input = new String[500 * 1024]; for (int i = 0; i < input.length; i++) { if (i % 2 == 0) { input[i] = Integer.toString(1); } else { input[i] = Integer.toString(0); } } Util.createInputFile(cluster, "MultiCombinerUseInput.txt", input); String oldValue = properties.getProperty("io.sort.mb"); properties.setProperty("io.sort.mb", "1"); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.getPigContext().getProperties().setProperty(MRConfiguration.CHILD_JAVA_OPTS, "-Xmx1024m"); pigServer.registerQuery("a = load 'MultiCombinerUseInput.txt' as (x:int);"); pigServer.registerQuery("b = group a all;"); pigServer.registerQuery("c = foreach b generate COUNT(a), SUM(a.$0), " + "MIN(a.$0), MAX(a.$0), AVG(a.$0), ((double)SUM(a.$0))/COUNT(a.$0)," + " COUNT(a.$0) + SUM(a.$0) + MAX(a.$0);"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertTrue(baos.toString().matches("(?si).*combine plan.*")); Iterator<Tuple> it = pigServer.openIterator("c"); Tuple t = it.next(); assertEquals(512000L, t.get(0)); assertEquals(256000L, t.get(1)); assertEquals(0, t.get(2)); assertEquals(1, t.get(3)); assertEquals(0.5, t.get(4)); assertEquals(0.5, t.get(5)); assertEquals(512000L + 256000L + 1, t.get(6)); assertFalse(it.hasNext()); Util.deleteFile(cluster, "MultiCombinerUseInput.txt"); // Reset io.sort.mb to the original value before exit if (oldValue == null) { properties.remove("io.sort.mb"); } else { properties.setProperty("io.sort.mb", oldValue); } pigServer.shutdown(); }
Example 12
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testDistinctAggs1() throws Exception { // test the use of combiner for distinct aggs: String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; Util.createInputFile(cluster, "distinctAggs1Input.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'distinctAggs1Input.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b {" + " x = distinct a.age;" + " y = distinct a.gpa;" + " z = distinct a;" + " generate group, COUNT(x), SUM(x.age), SUM(y.gpa), SUM(a.age), " + " SUM(a.gpa), COUNT(z.age), COUNT(z), SUM(z.age);};"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertTrue(baos.toString().matches("(?si).*combine plan.*")); HashMap<String, Object[]> results = new HashMap<String, Object[]>(); results.put("pig1", new Object[] { "pig1", 3L, 57L, 5.2, 75L, 9.4, 3L, 3L, 57L }); results.put("pig2", new Object[] { "pig2", 1L, 24L, 7.8, 48L, 7.8, 2L, 2L, 48L }); results.put("pig5", new Object[] { "pig5", 1L, 45L, 2.4, 45L, 2.4, 1L, 1L, 45L }); Iterator<Tuple> it = pigServer.openIterator("c"); while (it.hasNext()) { Tuple t = it.next(); List<Object> fields = t.getAll(); Object[] expected = results.get(fields.get(0)); int i = 0; for (Object field : fields) { assertEquals(expected[i++], field); } } Util.deleteFile(cluster, "distinctAggs1Input.txt"); pigServer.shutdown(); }
Example 13
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testGroupElements() throws Exception { // test use of combiner when group elements are accessed in the foreach String input[] = { "ABC\t1\ta\t1", "ABC\t1\tb\t2", "ABC\t1\ta\t3", "ABC\t2\tb\t4", "DEF\t1\td\t1", "XYZ\t1\tx\t2" }; Util.createInputFile(cluster, "testGroupElements.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'testGroupElements.txt' as (str:chararray, num1:int, alph : chararray, num2 : int);"); pigServer.registerQuery("b = group a by (str, num1);"); // check if combiner is present or not for various forms of foreach pigServer.registerQuery("c = foreach b generate flatten(group), COUNT(a.alph), SUM(a.num2); "); checkCombinerUsed(pigServer, "c", true); pigServer.registerQuery("c = foreach b generate group, COUNT(a.alph), SUM(a.num2); "); checkCombinerUsed(pigServer, "c", true); // projecting bag - combiner should not be used pigServer.registerQuery("c = foreach b generate group, a, COUNT(a.alph), SUM(a.num2); "); checkCombinerUsed(pigServer, "c", false); // projecting bag - combiner should not be used pigServer.registerQuery("c = foreach b generate group, a.num2, COUNT(a.alph), SUM(a.num2); "); checkCombinerUsed(pigServer, "c", false); pigServer.registerQuery("c = foreach b generate group.$0, group.$1, COUNT(a.alph), SUM(a.num2); "); checkCombinerUsed(pigServer, "c", true); pigServer.registerQuery("c = foreach b generate group.$0, group.$1 + COUNT(a.alph), SUM(a.num2); "); checkCombinerUsed(pigServer, "c", true); pigServer.registerQuery("c = foreach b generate group.str, group.$1, COUNT(a.alph), SUM(a.num2); "); checkCombinerUsed(pigServer, "c", true); pigServer.registerQuery("c = foreach b generate group.str, group.$1, COUNT(a.alph), SUM(a.num2), " + " (group.num1 == 1 ? (COUNT(a.num2) + 1) : (SUM(a.num2) + 10)) ; "); checkCombinerUsed(pigServer, "c", true); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "('ABC',1,3L,6L,4L)", "('ABC',2,1L,4L,14L)", "('DEF',1,1L,1L,2L)", "('XYZ',1,1L,2L,2L)", }); Iterator<Tuple> it = pigServer.openIterator("c"); Util.checkQueryOutputsAfterSort(it, expectedRes); Util.deleteFile(cluster, "distinctAggs1Input.txt"); pigServer.shutdown(); }
Example 14
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testDistinctNoCombiner() throws Exception { // test that combiner is NOT invoked when // one of the elements in the foreach generate // is a distinct() as the leaf String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; Util.createInputFile(cluster, "distinctNoCombinerInput.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'distinctNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b {" + " z = distinct a;" + " generate group, z, SUM(a.age), SUM(a.gpa);};"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertFalse(baos.toString().matches("(?si).*combine plan.*")); HashMap<String, Object[]> results = new HashMap<String, Object[]>(); results.put("pig1", new Object[] { "pig1", "bag-place-holder", 75L, 9.4 }); results.put("pig2", new Object[] { "pig2", "bag-place-holder", 48L, 7.8 }); results.put("pig5", new Object[] { "pig5", "bag-place-holder", 45L, 2.4 }); Iterator<Tuple> it = pigServer.openIterator("c"); while (it.hasNext()) { Tuple t = it.next(); List<Object> fields = t.getAll(); Object[] expected = results.get(fields.get(0)); int i = 0; for (Object field : fields) { if (i == 1) { // ignore the second field which is a bag // for comparison here continue; } assertEquals(expected[i++], field); } } Util.deleteFile(cluster, "distinctNoCombinerInput.txt"); pigServer.shutdown(); }
Example 15
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testForEachNoCombiner() throws Exception { // test that combiner is NOT invoked when // one of the elements in the foreach generate // has a foreach in the plan without a distinct agg String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; Util.createInputFile(cluster, "forEachNoCombinerInput.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'forEachNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b {" + " z = a.age;" + " generate group, z, SUM(a.age), SUM(a.gpa);};"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertFalse(baos.toString().matches("(?si).*combine plan.*")); HashMap<String, Object[]> results = new HashMap<String, Object[]>(); results.put("pig1", new Object[] { "pig1", "bag-place-holder", 75L, 9.4 }); results.put("pig2", new Object[] { "pig2", "bag-place-holder", 48L, 7.8 }); results.put("pig5", new Object[] { "pig5", "bag-place-holder", 45L, 2.4 }); Iterator<Tuple> it = pigServer.openIterator("c"); while (it.hasNext()) { Tuple t = it.next(); List<Object> fields = t.getAll(); Object[] expected = results.get(fields.get(0)); int i = 0; for (Object field : fields) { if (i == 1) { // ignore the second field which is a bag // for comparison here continue; } assertEquals(expected[i++], field); } } Util.deleteFile(cluster, "forEachNoCombinerInput.txt"); pigServer.shutdown(); }
Example 16
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testJiraPig746() throws Exception { // test that combiner is NOT invoked when // one of the elements in the foreach generate // has a foreach in the plan without a distinct agg String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; String expected[] = { "(pig1,75,{(pig1,18,2.1),(pig1,18,2.1),(pig1,19,2.1),(pig1,20,3.1)})", "(pig2,48,{(pig2,24,3.3),(pig2,24,4.5)})", "(pig5,45,{(pig5,45,2.4)})" }; try { Util.createInputFile(cluster, "forEachNoCombinerInput.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'forEachNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b generate group, SUM(a.age), a;"); // make sure there isn't a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertFalse(baos.toString().matches("(?si).*combine plan.*")); Iterator<Tuple> it = pigServer.openIterator("c"); Util.checkQueryOutputsAfterSortRecursive(it, expected, "group:chararray,age:long,b:{t:(name:chararray,age:int,gpa:double)}"); pigServer.shutdown(); } finally { Util.deleteFile(cluster, "forEachNoCombinerInput.txt"); } }