Java Code Examples for org.apache.pig.PigServer#registerQuery()
The following examples show how to use
org.apache.pig.PigServer#registerQuery() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestCombiner.java From spork with Apache License 2.0 | 6 votes |
private String loadWithTestLoadFunc(String loadAlias, PigServer pig, List<String> inputLines) throws IOException { File inputFile = File.createTempFile("test", "txt"); inputFile.deleteOnExit(); String inputFileName = inputFile.getAbsolutePath(); if (pig.getPigContext().getExecType().isLocal()) { PrintStream ps = new PrintStream(new FileOutputStream(inputFile)); for (String line : inputLines) { ps.println(line); } ps.close(); } else { inputFileName = Util.removeColon(inputFileName); Util.createInputFile(cluster, inputFileName, inputLines.toArray(new String[] {})); } pig.registerQuery(loadAlias + " = load '" + Util.encodeEscape(inputFileName) + "' using " + PigStorage.class.getName() + "(',');"); return inputFileName; }
Example 2
Source File: TestIn.java From spork with Apache License 2.0 | 6 votes |
/** * Verify that IN operator throws FrontendException when no operand is given. * @throws Exception */ @Test(expected = FrontendException.class) public void testMissingRhsOperand() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = resetData(pigServer); data.set("foo", tuple(1), tuple(2), tuple(3), tuple(4), tuple(5) ); pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);"); pigServer.registerQuery("B = FILTER A BY i IN ();"); // No operand pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();"); fail("FrontendException must be thrown since no rhs operand is given to IN."); }
Example 3
Source File: TestXMLLoader.java From spork with Apache License 2.0 | 6 votes |
public void testShouldReturn0TupleCountIfSearchTagIsNotFound () throws Exception { String filename = TestHelper.createTempFile(data, ""); PigServer pig = new PigServer(LOCAL); filename = filename.replace("\\", "\\\\"); String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('invalid') as (doc:chararray);"; pig.registerQuery(query); Iterator<?> it = pig.openIterator("A"); int tupleCount = 0; while (it.hasNext()) { Tuple tuple = (Tuple) it.next(); if (tuple == null) break; else { if (tuple.size() > 0) { tupleCount++; } } } assertEquals(0, tupleCount); }
Example 4
Source File: TestBuiltInBagToTupleOrString.java From spork with Apache License 2.0 | 6 votes |
@Test public void testPigScriptrForBagToStringUDF() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = resetData(pigServer); data.set("foo", "myBag:bag{t:(l:chararray)}", tuple(bag(tuple("a"), tuple("b"), tuple("c")))); pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage();"); pigServer.registerQuery("B = FOREACH A GENERATE BagToString(myBag) as myBag;"); pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();"); pigServer.registerQuery("C = FOREACH A GENERATE BagToString(myBag, '==') as myBag;"); pigServer.registerQuery("STORE C INTO 'baz' USING mock.Storage();"); List<Tuple> out = data.get("bar"); assertEquals(schema("myBag:chararray"), data.getSchema("bar")); assertEquals(tuple("a_b_c"), out.get(0)); out = data.get("baz"); assertEquals(tuple("a==b==c"), out.get(0)); }
Example 5
Source File: TestCollectedGroup.java From spork with Apache License 2.0 | 6 votes |
@Test public void testMapsideGroupParserNoSupportForMultipleInputs() throws IOException { pigServer = new PigServer(cluster.getExecType(), cluster.getProperties()); pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (id, name, grade);"); pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (id, name, grade);"); try { pigServer.registerQuery("C = group A by id, B by id using 'collected';"); pigServer.openIterator( "C" ); Assert.fail("Pig doesn't support multi-input collected group."); } catch (Exception e) { String msg = "pig script failed to validate: Collected group is only supported for single input"; Assert.assertTrue( e.getMessage().contains( msg ) ); } }
Example 6
Source File: TestPigServer.java From spork with Apache License 2.0 | 6 votes |
@Test // See PIG-3967 public void testGruntValidation() throws IOException { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = resetData(pigServer); data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), tuple("c", 3, "d")); pigServer.setValidateEachStatement(true); pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);"); pigServer.registerQuery("store A into '" + Util.generateURI(tempDir.toString(), pigServer.getPigContext()) + "/testGruntValidation1';"); pigServer.registerQuery("B = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);"); pigServer.registerQuery("store B into '" + Util.generateURI(tempDir.toString(), pigServer.getPigContext()) + "/testGruntValidation2';"); // This should pass boolean validationExceptionCaptured = false; try { // This should fail due to output validation pigServer.registerQuery("store A into '" + Util.generateURI(tempDir.toString(),pigServer.getPigContext()) + "/testGruntValidation1';"); } catch (FrontendException e) { validationExceptionCaptured = true; } assertTrue(validationExceptionCaptured); }
Example 7
Source File: TestScalarAliases.java From spork with Apache License 2.0 | 6 votes |
@Test public void testScalarErrMultipleRowsInInput() throws Exception{ Assume.assumeTrue("Skip this test for TEZ. See PIG-3994", Util.isMapredExecType(cluster.getExecType())); Util.resetStateForExecModeSwitch(); pigServer = new PigServer(cluster.getExecType(), cluster.getProperties()); String[] input = { "1\t5", "2\t10", "3\t20" }; String INPUT_FILE = "table_testScalarAliasesMulRows"; Util.createInputFile(cluster, INPUT_FILE, input); pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (a0: long, a1: double);"); pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (b0: long, b1: double);"); pigServer.registerQuery("C = foreach A generate $0, B.$0;"); try { pigServer.openIterator("C"); fail("exception expected - scalar input has multiple rows"); } catch (IOException pe){ Util.checkStrContainsSubStr(pe.getCause().getMessage(), "Scalar has more than one row in the output" ); } }
Example 8
Source File: TestParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testColumnIndexAccessProjection() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); pigServer.setValidateEachStatement(true); String out = "target/out"; int rows = 10; Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < rows; i++) { list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i)); } data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.deleteFile(out); pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();"); pigServer.executeBatch(); pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('n1:int, n2:double, n3:long, n4:chararray', 'true');"); pigServer.registerQuery("C = foreach B generate n1, n3;"); pigServer.registerQuery("STORE C into 'out' using mock.Storage();"); pigServer.executeBatch(); List<Tuple> actualList = data.get("out"); assertEquals(rows, actualList.size()); for(int i = 0; i < rows; i++) { Tuple t = actualList.get(i); assertEquals(2, t.size()); assertEquals(i, t.get(0)); assertEquals(i*2L, t.get(1)); } }
Example 9
Source File: TestPigServer.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDescribeTuple2Elem() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("a = load 'a' as (field1: int, field2: int, field3: int );") ; pig.registerQuery("b = foreach a generate field1, (field2, field3);") ; Schema dumpedSchema = pig.dumpSchema("b") ; assertTrue(dumpedSchema.getField(0).type==DataType.INTEGER); assertTrue(dumpedSchema.getField(1).type==DataType.TUPLE); }
Example 10
Source File: TestUnion.java From spork with Apache License 2.0 | 5 votes |
@Test public void testCastingAfterUnionWithMultipleLoadersDifferentCasters2() throws Exception { // A bit more complicated pattern but still same requirement of each // field coming from the same Loader. // 'a' is coming from A(PigStorage) // 'i' is coming from B and C but both from the TextLoader. File f1 = Util.createInputFile("tmp", "i1.txt", new String[] {"b","c", "1", "3"}); File f2 = Util.createInputFile("tmp", "i2.txt", new String[] {"a","b","c"}); File f3 = Util.createInputFile("tmp", "i3.txt", new String[] {"1","2","3"}); PigServer ps = new PigServer(ExecType.LOCAL, new Properties()); ps.registerQuery("A = load '" + Util.encodeEscape(f1.getAbsolutePath()) + "' as (a:bytearray);"); // Using PigStorage() ps.registerQuery("B = load '" + Util.encodeEscape(f2.getAbsolutePath()) + "' using TextLoader() as (i:bytearray);"); ps.registerQuery("C = load '" + Util.encodeEscape(f3.getAbsolutePath()) + "' using TextLoader() as (i:bytearray);"); ps.registerQuery("B2 = join B by i, A by a;"); //{A::a: bytearray,B::i: bytearray} ps.registerQuery("B3 = foreach B2 generate a, B::i as i;"); //{A::a: bytearray,i: bytearray} ps.registerQuery("C2 = join C by i, A by a;"); //{A::a: bytearray,C::i: bytearray} ps.registerQuery("C3 = foreach C2 generate a, C::i as i;"); //{A::a: bytearray,i: bytearray} ps.registerQuery("D = union onschema B3,C3;"); // {A::a: bytearray,i: bytearray} ps.registerQuery("E = foreach D generate (chararray) a, (chararray) i;");//{A::a: chararray,i: chararray} Iterator<Tuple> itr = ps.openIterator("E"); int recordCount = 0; while(itr.next() != null) ++recordCount; assertEquals(4, recordCount); }
Example 11
Source File: TestCompressedFiles.java From spork with Apache License 2.0 | 5 votes |
@Test public void testCompressed1() throws Throwable { pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("A = foreach (cogroup (load '" + Util.generateURI(gzFile.toString(), pig.getPigContext()) + "') by $1, (load '" + Util.generateURI(datFile.toString(), pig.getPigContext()) + "') by $1) generate flatten( " + DIFF.class.getName() + "($1.$1,$2.$1)) ;"); Iterator<Tuple> it = pig.openIterator("A"); assertFalse(it.hasNext()); }
Example 12
Source File: TestPigServer.java From spork with Apache License 2.0 | 5 votes |
@Test // PIG-2059 public void test1() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.setValidateEachStatement(true); pig.registerQuery("A = load 'x' as (u, v);") ; try { pig.registerQuery("B = foreach A generate $2;") ; fail("Query is supposed to fail."); } catch(FrontendException ex) { String msg = "Out of bound access. " + "Trying to access non-existent column: 2"; Util.checkMessageInException(ex, msg); } }
Example 13
Source File: TestHiveColumnarLoader.java From spork with Apache License 2.0 | 5 votes |
@Test public void testYearMonthDayHourPartitionedFilesWithProjection() throws IOException { int count = 0; String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader", new FuncSpec(funcSpecString)); server.registerQuery("a = LOAD '" + Util.encodeEscape(yearMonthDayHourPartitionedDir.getAbsolutePath()) + "' using " + funcSpecString + ";"); server.registerQuery("f = FILTER a by year=='2010';"); server.registerQuery("b = foreach f generate f1,f2;"); Iterator<Tuple> result = server.openIterator("b"); Tuple t = null; while ((t = result.next()) != null) { assertEquals(2, t.size()); assertEquals(DataType.CHARARRAY, t.getType(0)); count++; } Assert.assertEquals(240, count); }
Example 14
Source File: TestPigServer.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDescribeLimit() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ; pig.registerQuery("b = limit a 10;") ; Schema dumpedSchema = pig.dumpSchema("b") ; Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray"); assertEquals(expectedSchema, dumpedSchema); }
Example 15
Source File: TestPigServer.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDescribeForeach() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ; pig.registerQuery("b = foreach a generate field1 + 10;") ; Schema dumpedSchema = pig.dumpSchema("b") ; Schema expectedSchema = new Schema(new Schema.FieldSchema(null, DataType.INTEGER)); assertEquals(expectedSchema, dumpedSchema); }
Example 16
Source File: TestExampleGenerator.java From spork with Apache License 2.0 | 5 votes |
@Test public void testOrderBy() throws Exception { PigServer pigServer = new PigServer(pigContext); pigServer.registerQuery("A = load " + A.toString() + " as (x, y);"); pigServer.registerQuery("B = order A by x;"); Map<Operator, DataBag> derivedData = pigServer.getExamples("B"); assertNotNull(derivedData); }
Example 17
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testDistinctNoCombiner() throws Exception { // test that combiner is NOT invoked when // one of the elements in the foreach generate // is a distinct() as the leaf String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; Util.createInputFile(cluster, "distinctNoCombinerInput.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'distinctNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b {" + " z = distinct a;" + " generate group, z, SUM(a.age), SUM(a.gpa);};"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertFalse(baos.toString().matches("(?si).*combine plan.*")); HashMap<String, Object[]> results = new HashMap<String, Object[]>(); results.put("pig1", new Object[] { "pig1", "bag-place-holder", 75L, 9.4 }); results.put("pig2", new Object[] { "pig2", "bag-place-holder", 48L, 7.8 }); results.put("pig5", new Object[] { "pig5", "bag-place-holder", 45L, 2.4 }); Iterator<Tuple> it = pigServer.openIterator("c"); while (it.hasNext()) { Tuple t = it.next(); List<Object> fields = t.getAll(); Object[] expected = results.get(fields.get(0)); int i = 0; for (Object field : fields) { if (i == 1) { // ignore the second field which is a bag // for comparison here continue; } assertEquals(expected[i++], field); } } Util.deleteFile(cluster, "distinctNoCombinerInput.txt"); pigServer.shutdown(); }
Example 18
Source File: TestBZip.java From spork with Apache License 2.0 | 4 votes |
/** * Tests the end-to-end writing and reading of an empty BZip file. */ @Test public void testEmptyBzipInPig() throws Exception { PigServer pig = new PigServer(cluster.getExecType(), properties); File in = File.createTempFile("junit", ".tmp"); in.deleteOnExit(); File out = File.createTempFile("junit", ".bz2"); out.delete(); String clusterOutputFilePath = Util.removeColon(out.getAbsolutePath()); FileOutputStream fos = new FileOutputStream(in); fos.write("55\n".getBytes()); fos.close(); System.out.println(in.getAbsolutePath()); pig.registerQuery("AA = load '" + Util.generateURI(in.getAbsolutePath(), pig.getPigContext()) + "';"); pig.registerQuery("A=foreach (group (filter AA by $0 < '0') all) generate flatten($1);"); pig.registerQuery("store A into '" + Util.encodeEscape(clusterOutputFilePath) + "';"); FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration( pig.getPigContext().getProperties())); FileStatus[] outputFiles = fs.listStatus(new Path(clusterOutputFilePath), Util.getSuccessMarkerPathFilter()); FSDataInputStream is = fs.open(outputFiles[0].getPath()); CBZip2InputStream cis = new CBZip2InputStream(is, -1, out.length()); // Just a sanity check, to make sure it was a bzip file; we // will do the value verification later assertEquals(-1, cis.read(new byte[100])); cis.close(); pig.registerQuery("B = load '" + Util.encodeEscape(clusterOutputFilePath) + "';"); pig.openIterator("B"); in.delete(); Util.deleteFile(cluster, clusterOutputFilePath); }
Example 19
Source File: TestPigContext.java From spork with Apache License 2.0 | 4 votes |
@Test // See PIG-832 public void testImportList() throws Exception { String FILE_SEPARATOR = System.getProperty("file.separator"); File tmpDir = File.createTempFile("test", ""); tmpDir.delete(); tmpDir.mkdir(); File udf1Dir = new File(tmpDir.getAbsolutePath() + FILE_SEPARATOR + "com" + FILE_SEPARATOR + "xxx" + FILE_SEPARATOR + "udf1"); udf1Dir.mkdirs(); File udf2Dir = new File(tmpDir.getAbsolutePath() + FILE_SEPARATOR + "com" + FILE_SEPARATOR + "xxx" + FILE_SEPARATOR + "udf2"); udf2Dir.mkdirs(); String udf1Src = new String("package com.xxx.udf1;\n" + "import java.io.IOException;\n" + "import org.apache.pig.EvalFunc;\n" + "import org.apache.pig.data.Tuple;\n" + "public class TestUDF1 extends EvalFunc<Integer>{\n" + "public Integer exec(Tuple input) throws IOException {\n" + "return 1;}\n" + "}"); String udf2Src = new String("package com.xxx.udf2;\n" + "import org.apache.pig.builtin.PigStorage;\n" + "public class TestUDF2 extends PigStorage { }\n"); // compile JavaCompilerHelper javaCompilerHelper = new JavaCompilerHelper(); javaCompilerHelper.compile(tmpDir.getAbsolutePath(), new JavaCompilerHelper.JavaSourceFromString("com.xxx.udf1.TestUDF1", udf1Src), new JavaCompilerHelper.JavaSourceFromString("com.xxx.udf2.TestUDF2", udf2Src)); // generate jar file String jarName = "TestUDFJar.jar"; String jarFile = tmpDir.getAbsolutePath() + FILE_SEPARATOR + jarName; int status = Util.executeJavaCommand("jar -cf " + jarFile + " -C " + tmpDir.getAbsolutePath() + " " + "com"); assertEquals(0, status); Util.resetStateForExecModeSwitch(); PigContext localPigContext = new PigContext(cluster.getExecType(), properties); // register jar using properties localPigContext.getProperties().setProperty("pig.additional.jars", jarFile); PigServer pigServer = new PigServer(localPigContext); PigContext.initializeImportList("com.xxx.udf1:com.xxx.udf2."); ArrayList<String> importList = PigContext.getPackageImportList(); assertEquals(6, importList.size()); assertEquals("", importList.get(0)); assertEquals("com.xxx.udf1.", importList.get(1)); assertEquals("com.xxx.udf2.", importList.get(2)); assertEquals("java.lang.", importList.get(3)); assertEquals("org.apache.pig.builtin.", importList.get(4)); assertEquals("org.apache.pig.impl.builtin.", importList.get(5)); Object udf = PigContext.instantiateFuncFromSpec("TestUDF1"); assertTrue(udf.getClass().toString().endsWith("com.xxx.udf1.TestUDF1")); int LOOP_COUNT = 40; File tmpFile = File.createTempFile("test", "txt"); tmpFile.delete(); // don't actually want the file, just the filename String clusterTmpPath = Util.removeColon(tmpFile.getCanonicalPath()); String localInput[] = new String[LOOP_COUNT]; Random r = new Random(1); int rand; for(int i = 0; i < LOOP_COUNT; i++) { rand = r.nextInt(100); localInput[i] = Integer.toString(rand); } Util.createInputFile(cluster, clusterTmpPath, localInput); FileLocalizer.deleteTempFiles(); pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(clusterTmpPath) + "' using TestUDF2() AS (num:chararray);"); pigServer.registerQuery("B = foreach A generate TestUDF1(num);"); Iterator<Tuple> iter = pigServer.openIterator("B"); assertTrue("No Output received", iter.hasNext()); while (iter.hasNext()) { Tuple t = iter.next(); assertTrue(t.get(0) instanceof Integer); assertEquals(Integer.valueOf(1), (Integer)t.get(0)); } Util.deleteFile(cluster, clusterTmpPath); Util.deleteDirectory(tmpDir); }
Example 20
Source File: TestCounters.java From spork with Apache License 2.0 | 4 votes |
@Test public void testMapReduceOnly() throws IOException, ExecException { int count = 0; PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file)); int [] nos = new int[10]; for(int i = 0; i < 10; i++) nos[i] = 0; for(int i = 0; i < MAX; i++) { int index = r.nextInt(10); int value = r.nextInt(100); nos[index] += value; pw.println(index + "\t" + value); } pw.close(); for(int i = 0; i < 10; i++) { if(nos[i] > 0) count ++; } PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); pigServer.registerQuery("a = load '" + file + "';"); pigServer.registerQuery("b = group a by $0;"); pigServer.registerQuery("c = foreach b generate group;"); ExecJob job = pigServer.store("c", "output"); PigStats pigStats = job.getStatistics(); InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output", pigServer.getPigContext()), pigServer.getPigContext()); long filesize = 0; while(is.read() != -1) filesize++; is.close(); cluster.getFileSystem().delete(new Path(file), true); cluster.getFileSystem().delete(new Path("output"), true); System.out.println("============================================"); System.out.println("Test case MapReduce"); System.out.println("============================================"); JobGraph jp = pigStats.getJobGraph(); Iterator<JobStats> iter = jp.iterator(); while (iter.hasNext()) { MRJobStats js = (MRJobStats) iter.next(); System.out.println("Map input records : " + js.getMapInputRecords()); assertEquals(MAX, js.getMapInputRecords()); System.out.println("Map output records : " + js.getMapOutputRecords()); assertEquals(MAX, js.getMapOutputRecords()); System.out.println("Reduce input records : " + js.getReduceInputRecords()); assertEquals(MAX, js.getReduceInputRecords()); System.out.println("Reduce output records : " + js.getReduceOutputRecords()); assertEquals(count, js.getReduceOutputRecords()); } System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten()); assertEquals(filesize, pigStats.getBytesWritten()); }