Java Code Examples for org.apache.pig.PigServer#setBatchOn()
The following examples show how to use
org.apache.pig.PigServer#setBatchOn() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMultiStorageCompression.java From spork with Apache License 2.0 | 6 votes |
private void runQuery(String outputPath, String compressionType) throws Exception, ExecException, IOException, FrontendException { // create a data file String filename = TestHelper.createTempFile(data, ""); PigServer pig = new PigServer(LOCAL); filename = filename.replace("\\", "\\\\"); patternString = patternString.replace("\\", "\\\\"); String query = "A = LOAD '" + Util.encodeEscape(filename) + "' USING PigStorage(',') as (a,b,c);"; String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath) + "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('" + Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');"; // Run Pig pig.setBatchOn(); pig.registerQuery(query); pig.registerQuery(query2); pig.executeBatch(); }
Example 2
Source File: TestHiveColumnarLoader.java From spork with Apache License 2.0 | 6 votes |
@Test public void testDatePartitionedFiles() throws IOException { int count = 0; String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'" + ", '" + startingDate + ":" + endingDate + "')"; System.out.println(funcSpecString); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader", new FuncSpec(funcSpecString)); server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using " + funcSpecString + ";"); Iterator<Tuple> result = server.openIterator("a"); while ((result.next()) != null) { count++; } Assert.assertEquals(datePartitionedRowCount, count); }
Example 3
Source File: BoundScript.java From spork with Apache License 2.0 | 6 votes |
private PigStats exec(String query) throws IOException { LOG.info("Query to run:\n" + query); List<PigProgressNotificationListener> listeners = ScriptState.get().getAllListeners(); PigContext pc = scriptContext.getPigContext(); ScriptState scriptState = pc.getExecutionEngine().instantiateScriptState(); ScriptState.start(scriptState); ScriptState.get().setScript(query); for (PigProgressNotificationListener listener : listeners) { ScriptState.get().registerListener(listener); } PigServer pigServer = new PigServer(scriptContext.getPigContext(), false); pigServer.setBatchOn(); GruntParser grunt = new GruntParser(new StringReader(query), pigServer); grunt.setInteractive(false); try { grunt.parseStopOnError(true); } catch (ParseException e) { throw new IOException("Failed to parse script " + e.getMessage(), e); } pigServer.executeBatch(); return PigStats.get(); }
Example 4
Source File: TestAssert.java From spork with Apache License 2.0 | 6 votes |
/** * Verify that ASSERT operator works * @throws Exception */ @Test public void testPositive() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = resetData(pigServer); data.set("foo", tuple(1), tuple(2), tuple(3) ); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);"); pigServer.registerQuery("ASSERT A BY i > 0;"); pigServer.registerQuery("STORE A INTO 'bar' USING mock.Storage();"); pigServer.executeBatch(); List<Tuple> out = data.get("bar"); assertEquals(3, out.size()); assertEquals(tuple(1), out.get(0)); assertEquals(tuple(2), out.get(1)); assertEquals(tuple(3), out.get(2)); }
Example 5
Source File: TestHiveColumnarStorage.java From spork with Apache License 2.0 | 5 votes |
@Test public void testShouldStoreTupleAsHiveArray() throws IOException, InterruptedException, SerDeException { String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()"; String singlePartitionedFile = simpleDataFile.getAbsolutePath(); File outputFile = new File("testhiveColumnarStore"); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString + ";"); server.registerQuery("b = FOREACH a GENERATE f1, TOTUPLE(f2,f3);"); //when server.store("b", outputFile.getAbsolutePath(), storeString); //then Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc"); ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 array<string>"); assertEquals(2, struct.getFieldsAsList().size()); Object o = struct.getField(0); assertEquals(LazyString.class, o.getClass()); o = struct.getField(1); assertEquals(LazyArray.class, o.getClass()); LazyArray arr = (LazyArray)o; List<Object> values = arr.getList(); for(Object value : values) { assertEquals(LazyString.class, value.getClass()); String valueStr =((LazyString) value).getWritableObject().toString(); assertEquals("Sample value", valueStr); } }
Example 6
Source File: TestParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testColumnIndexAccessProjection() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); pigServer.setValidateEachStatement(true); String out = "target/out"; int rows = 10; Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < rows; i++) { list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i)); } data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.deleteFile(out); pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();"); pigServer.executeBatch(); pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('n1:int, n2:double, n3:long, n4:chararray', 'true');"); pigServer.registerQuery("C = foreach B generate n1, n3;"); pigServer.registerQuery("STORE C into 'out' using mock.Storage();"); pigServer.executeBatch(); List<Tuple> actualList = data.get("out"); assertEquals(rows, actualList.size()); for(int i = 0; i < rows; i++) { Tuple t = actualList.get(i); assertEquals(2, t.size()); assertEquals(i, t.get(0)); assertEquals(i*2L, t.get(1)); } }
Example 7
Source File: TestHiveColumnarLoader.java From spork with Apache License 2.0 | 5 votes |
@Test public void testYearMonthDayHourPartitionedFilesWithProjectionAndPartitionColumns() throws IOException { int count = 0; String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader", new FuncSpec(funcSpecString)); server.registerQuery("a = LOAD '" + Util.encodeEscape(yearMonthDayHourPartitionedDir.getAbsolutePath()) + "' using " + funcSpecString + ";"); server.registerQuery("f = FILTER a by year=='2010';"); server.registerQuery("r = foreach f generate year, f2, f3, month, day, hour;"); server.registerQuery("b = ORDER r BY year, month, day, hour;"); Iterator<Tuple> result = server.openIterator("b"); Tuple t = null; while ((t = result.next()) != null) { System.out.println("Tuple: " + t); assertEquals(6, t.size()); count++; } System.out.println("Count: " + count); Assert.assertEquals(240, count); }
Example 8
Source File: TestParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testReqestedSchemaColumnPruning() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); pigServer.setValidateEachStatement(true); String out = "target/out"; int rows = 10; Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < rows; i++) { list.add(Storage.tuple(i, "a"+i, i*2)); } data.set("in", "i:int, a:chararray, b:int", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.deleteFile(out); pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();"); pigServer.executeBatch(); //Test Null Padding at the end pigServer.registerQuery("C = LOAD '" + out + "' using " + ParquetLoader.class.getName()+"('i:int, a:chararray, b:int, n1:int, n2:chararray');"); pigServer.registerQuery("G = foreach C generate n1,b,n2,i;"); pigServer.registerQuery("STORE G into 'out' using mock.Storage();"); pigServer.executeBatch(); List<Tuple> actualList = data.get("out"); assertEquals(rows, actualList.size()); for(Tuple t : actualList) { assertEquals(4, t.size()); assertTrue(t.isNull(0)); assertTrue(t.isNull(2)); } }
Example 9
Source File: TestHiveColumnarLoader.java From spork with Apache License 2.0 | 5 votes |
@Test public void testReadingMultipleNonPartitionedFiles() throws IOException { String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; String singlePartitionedDir = simpleDataDir.getAbsolutePath(); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader", new FuncSpec(funcSpecString)); server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedDir) + "' using " + funcSpecString + ";"); server.registerQuery("b = foreach a generate f1;"); Iterator<Tuple> result = server.openIterator("b"); int count = 0; Tuple t = null; while ((t = result.next()) != null) { assertEquals(1, t.size()); assertEquals(DataType.CHARARRAY, t.getType(0)); count++; } Assert.assertEquals(simpleDirFileCount * simpleRowCount, count); }
Example 10
Source File: VespaQueryTest.java From vespa with Apache License 2.0 | 5 votes |
private PigServer setup(String script, String endpoint) throws Exception { Configuration conf = new HdfsConfiguration(); Map<String, String> parameters = new HashMap<>(); parameters.put("ENDPOINT", endpoint); PigServer ps = new PigServer(ExecType.LOCAL, conf); ps.setBatchOn(); ps.registerScript(script, parameters); return ps; }
Example 11
Source File: TestLocationInPhysicalPlan.java From spork with Apache License 2.0 | 5 votes |
@Test public void test() throws Exception { File input = File.createTempFile("test", "input"); input.deleteOnExit(); File output = File.createTempFile("test", "output"); output.delete(); Util.createLocalInputFile(input.getAbsolutePath(), new String[] { "1,2,3", "1,1,3", "1,1,1", "3,1,1", "1,2,1", }); PigServer pigServer = new PigServer(ExecType.LOCAL); pigServer.setBatchOn(); pigServer.registerQuery( "A = LOAD '" + Util.encodeEscape(input.getAbsolutePath()) + "' using PigStorage();\n" + "B = GROUP A BY $0;\n" + "A = FOREACH B GENERATE COUNT(A);\n" + "STORE A INTO '" + Util.encodeEscape(output.getAbsolutePath()) + "';"); ExecJob job = pigServer.executeBatch().get(0); List<OriginalLocation> originalLocations = job.getPOStore().getOriginalLocations(); Assert.assertEquals(1, originalLocations.size()); OriginalLocation originalLocation = originalLocations.get(0); Assert.assertEquals(4, originalLocation.getLine()); Assert.assertEquals(0, originalLocation.getOffset()); Assert.assertEquals("A", originalLocation.getAlias()); JobStats jStats = (JobStats)job.getStatistics().getJobGraph().getSinks().get(0); Assert.assertEquals("M: A[1,4],A[3,4],B[2,4] C: A[3,4],B[2,4] R: A[3,4]", jStats.getAliasLocation()); }
Example 12
Source File: TestMockStorage.java From spork with Apache License 2.0 | 5 votes |
@Test public void testBadUsage2() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = resetData(pigServer); data.set("input", tuple("a"), tuple("b"), tuple("c") ); pigServer.setBatchOn(); pigServer.registerQuery( "A = LOAD 'input' USING mock.Storage();" +"B = LOAD 'input' USING mock.Storage();" +"STORE A INTO 'output' USING mock.Storage();" +"STORE B INTO 'output' USING mock.Storage();"); List<ExecJob> results = pigServer.executeBatch(); boolean failed = false; for (ExecJob execJob : results) { if (execJob.getStatus() == JOB_STATUS.FAILED) { failed = true; break; } } assertTrue("job should have failed for storing twice in the same location", failed); }
Example 13
Source File: TestGrunt.java From spork with Apache License 2.0 | 5 votes |
@Test public void testShellCommandOrder() throws Throwable { PigServer server = new PigServer(ExecType.LOCAL, new Properties()); String strRemove = "rm"; if (Util.WINDOWS) { strRemove = "del"; } File inputFile = File.createTempFile("testInputFile", ".txt"); PrintWriter pwInput = new PrintWriter(new FileWriter(inputFile)); pwInput.println("1"); pwInput.close(); File inputScript = File.createTempFile("testInputScript", ""); File outputFile = File.createTempFile("testOutputFile", ".txt"); outputFile.delete(); PrintWriter pwScript = new PrintWriter(new FileWriter(inputScript)); pwScript.println("a = load '" + Util.encodeEscape(inputFile.getAbsolutePath()) + "';"); pwScript.println("store a into '" + Util.encodeEscape(outputFile.getAbsolutePath()) + "';"); pwScript.println("sh " + strRemove + " " + Util.encodeEscape(inputFile.getAbsolutePath())); pwScript.close(); InputStream inputStream = new FileInputStream(inputScript.getAbsoluteFile()); server.setBatchOn(); server.registerScript(inputStream); List<ExecJob> execJobs = server.executeBatch(); assertTrue(execJobs.get(0).getStatus() == JOB_STATUS.COMPLETED); }
Example 14
Source File: TestHiveColumnarLoader.java From spork with Apache License 2.0 | 5 votes |
@Test public void test1DayDatePartitionedFilesWithProjection() throws IOException { int count = 0; String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'" + ", '" + startingDate + ":" + startingDate + "')"; System.out.println(funcSpecString); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader", new FuncSpec(funcSpecString)); server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using " + funcSpecString + ";"); server.registerQuery("b = FOREACH a GENERATE f2 as p;"); Iterator<Tuple> result = server.openIterator("b"); Tuple t = null; while ((t = result.next()) != null) { assertEquals(1, t.size()); assertEquals(DataType.CHARARRAY, t.getType(0)); count++; } Assert.assertEquals(50, count); }
Example 15
Source File: TestUnionOnSchema.java From spork with Apache License 2.0 | 5 votes |
@Test public void testUnionOnSchemaAdditionalColumnsWithImplicitSplit() throws IOException { PigServer pig = new PigServer(ExecType.LOCAL); Data data = Storage.resetData(pig); // Use batch to force multiple outputs from relation l3. This causes // ImplicitSplitInsertVisitor to call SchemaResetter. pig.setBatchOn(); String query = " l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j: int);" + "l2 = load '" + INP_FILE_3NUMS + "' as (i : int, j : int, k : int);" + "l3 = load '" + INP_FILE_EMPTY + "' as (i : int, j : int, k : int, l :int);" + "u = union onschema l1, l2, l3;" + "store u into 'out1' using mock.Storage;" + "store l3 into 'out2' using mock.Storage;" ; Util.registerMultiLineQuery(pig, query); pig.executeBatch(); List<Tuple> list1 = data.get("out1"); List<Tuple> list2 = data.get("out2"); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(1,2,null,null)", "(5,3,null,null)", "(1,2,3,null)", "(4,5,6,null)", }); Util.checkQueryOutputsAfterSort(list1, expectedRes); assertEquals(0, list2.size()); }
Example 16
Source File: TestHiveColumnarLoader.java From spork with Apache License 2.0 | 5 votes |
@Test public void testReadingSingleFile() throws IOException { String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; String singlePartitionedFile = simpleDataFile.getAbsolutePath(); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader", new FuncSpec(funcSpecString)); server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + funcSpecString + ";"); server.registerQuery("b = foreach a generate f1;"); Iterator<Tuple> result = server.openIterator("b"); int count = 0; Tuple t = null; while ((t = result.next()) != null) { assertEquals(1, t.size()); assertEquals(DataType.CHARARRAY, t.getType(0)); count++; } Assert.assertEquals(simpleRowCount, count); }
Example 17
Source File: TestBZip.java From spork with Apache License 2.0 | 5 votes |
@Test public void testBzipStoreInMultiQuery2() throws Exception { String[] inputData = new String[] { "1\t2\r3\t4" }; String inputFileName = "input2.txt"; Util.createInputFile(cluster, inputFileName, inputData); PigServer pig = new PigServer(cluster.getExecType(), properties); PigContext pigContext = pig.getPigContext(); pigContext.getProperties().setProperty( "output.compression.enabled", "true" ); pigContext.getProperties().setProperty( "output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec" ); pig.setBatchOn(); pig.registerQuery("a = load '" + inputFileName + "';"); pig.registerQuery("store a into 'output2.bz2';"); pig.registerQuery("store a into 'output2';"); pig.executeBatch(); FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration( pig.getPigContext().getProperties())); FileStatus[] outputFiles = fs.listStatus(new Path("output2"), Util.getSuccessMarkerPathFilter()); assertTrue(outputFiles[0].getLen() > 0); outputFiles = fs.listStatus(new Path("output2.bz2"), Util.getSuccessMarkerPathFilter()); assertTrue(outputFiles[0].getLen() > 0); }
Example 18
Source File: TestParquetLoader.java From parquet-mr with Apache License 2.0 | 4 votes |
@Test public void testTypePersuasion() throws Exception { Properties p = new Properties(); p.setProperty(STRICT_TYPE_CHECKING, Boolean.FALSE.toString()); PigServer pigServer = new PigServer(ExecType.LOCAL, p); pigServer.setValidateEachStatement(true); String out = "target/out"; int rows = 10; Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < rows; i++) { list.add(Storage.tuple(i, (long)i, (float)i, (double)i, Integer.toString(i), Boolean.TRUE)); } data.set("in", "i:int, l:long, f:float, d:double, s:chararray, b:boolean", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.deleteFile(out); pigServer.registerQuery("Store A into '"+out+"' using " + ParquetStorer.class.getName()+"();"); pigServer.executeBatch(); List<Tuple> actualList = null; byte [] types = { INTEGER, LONG, FLOAT, DOUBLE, CHARARRAY, BOOLEAN }; //Test extracting values using each type. for(int i=0; i<types.length; i++) { String query = "B = LOAD '" + out + "' using " + ParquetLoader.class.getName()+ "('i:" + DataType.findTypeName(types[i%types.length])+"," + " l:" + DataType.findTypeName(types[(i+1)%types.length]) +"," + " f:" + DataType.findTypeName(types[(i+2)%types.length]) +"," + " d:" + DataType.findTypeName(types[(i+3)%types.length]) +"," + " s:" + DataType.findTypeName(types[(i+4)%types.length]) +"," + " b:" + DataType.findTypeName(types[(i+5)%types.length]) +"');"; System.out.println("Query: " + query); pigServer.registerQuery(query); pigServer.registerQuery("STORE B into 'out"+i+"' using mock.Storage();"); pigServer.executeBatch(); actualList = data.get("out" + i); assertEquals(rows, actualList.size()); for(Tuple t : actualList) { assertTrue(t.getType(0) == types[i%types.length]); assertTrue(t.getType(1) == types[(i+1)%types.length]); assertTrue(t.getType(2) == types[(i+2)%types.length]); assertTrue(t.getType(3) == types[(i+3)%types.length]); assertTrue(t.getType(4) == types[(i+4)%types.length]); assertTrue(t.getType(5) == types[(i+5)%types.length]); } } }
Example 19
Source File: TestParser.java From spork with Apache License 2.0 | 4 votes |
@Test public void testRemoteServerList2() throws ExecException, IOException { pigServer = new PigServer(LOCAL); Properties pigProperties = pigServer.getPigContext().getProperties(); pigProperties.setProperty("fs.default.name", "hdfs://a.com:8020"); Configuration conf; pigServer.setBatchOn(); Data data = Storage.resetData(pigServer.getPigContext()); data.set("/user/pig/1.txt");// no data pigServer.registerQuery("a = load '/user/pig/1.txt' using mock.Storage;"); pigServer.registerQuery("store a into '/user/pig/1.txt';"); System.out.println("hdfs-servers: " + pigProperties.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); conf = ConfigurationUtil.toConfiguration(pigProperties); assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) == null || conf.get(MRConfiguration.JOB_HDFS_SERVERS).equals(pigProperties.get("fs.default.name"))|| conf.get(MRConfiguration.JOB_HDFS_SERVERS).equals(pigProperties.get("fs.defaultFS"))); pigServer.registerQuery("store a into 'hdfs://b.com/user/pig/1.txt' using mock.Storage;"); System.out.println("hdfs-servers: " + pigProperties.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); conf = ConfigurationUtil.toConfiguration(pigProperties); assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) != null && conf.get(MRConfiguration.JOB_HDFS_SERVERS).contains("hdfs://b.com")); pigServer.registerQuery("store a into 'har://hdfs-c.com:8020/user/pig/1.txt' using mock.Storage;"); System.out.println("hdfs-servers: " + pigProperties.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); conf = ConfigurationUtil.toConfiguration(pigProperties); assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) != null && conf.get(MRConfiguration.JOB_HDFS_SERVERS).contains("hdfs://c.com:8020")); pigServer.registerQuery("store a into 'hdfs://d.com:8020/user/pig/1.txt' using mock.Storage;"); System.out.println("hdfs-servers: " + pigProperties.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); conf = ConfigurationUtil.toConfiguration(pigProperties); assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) != null && conf.get(MRConfiguration.JOB_HDFS_SERVERS).contains("hdfs://d.com:8020")); }
Example 20
Source File: TestMRJobStats.java From spork with Apache License 2.0 | 4 votes |
@Test public void testNoTaskReportProperty() throws IOException{ MiniGenericCluster cluster = MiniGenericCluster.buildCluster(MiniGenericCluster.EXECTYPE_MR); Properties properties = cluster.getProperties(); String inputFile = "input"; PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, inputFile)); pw.println("100\tapple"); pw.println("200\torange"); pw.close(); // Enable task reports in job statistics properties.setProperty(PigConfiguration.PIG_NO_TASK_REPORT, "false"); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.setBatchOn(); // Launch a map-only job pigServer.registerQuery("A = load '" + inputFile + "' as (id:int, fruit:chararray);"); pigServer.registerQuery("store A into 'task_reports';"); List<ExecJob> jobs = pigServer.executeBatch(); PigStats pigStats = jobs.get(0).getStatistics(); MRJobStats jobStats = (MRJobStats) pigStats.getJobGraph().getJobList().get(0); // Make sure JobStats includes TaskReports information long minMapTime = jobStats.getMinMapTime(); long maxMapTime = jobStats.getMaxMapTime(); long avgMapTime = jobStats.getAvgMapTime(); assertTrue("TaskReports are enabled, so minMapTime shouldn't be -1", minMapTime != -1l); assertTrue("TaskReports are enabled, so maxMapTime shouldn't be -1", maxMapTime != -1l); assertTrue("TaskReports are enabled, so avgMapTime shouldn't be -1", avgMapTime != -1l); // Disable task reports in job statistics properties.setProperty(PigConfiguration.PIG_NO_TASK_REPORT, "true"); // Launch another map-only job pigServer.registerQuery("B = load '" + inputFile + "' as (id:int, fruit:chararray);"); pigServer.registerQuery("store B into 'no_task_reports';"); jobs = pigServer.executeBatch(); pigStats = jobs.get(0).getStatistics(); jobStats = (MRJobStats) pigStats.getJobGraph().getJobList().get(0); // Make sure JobStats doesn't include any TaskReports information minMapTime = jobStats.getMinMapTime(); maxMapTime = jobStats.getMaxMapTime(); avgMapTime = jobStats.getAvgMapTime(); assertEquals("TaskReports are disabled, so minMapTime should be -1", -1l, minMapTime); assertEquals("TaskReports are disabled, so maxMapTime should be -1", -1l, maxMapTime); assertEquals("TaskReports are disabled, so avgMapTime should be -1", -1l, avgMapTime); cluster.shutDown(); }