Java Code Examples for org.apache.pig.PigServer#store()
The following examples show how to use
org.apache.pig.PigServer#store() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPigStats.java From spork with Apache License 2.0 | 6 votes |
@Test public void testBytesWritten_JIRA_1027() { File outputFile = null; try { String fileName = this.getClass().getName() + "_" + "testBytesWritten_JIRA_1027"; outputFile = File.createTempFile(fileName, ".out"); String filePath = outputFile.getAbsolutePath(); outputFile.delete(); PigServer pig = new PigServer(ExecType.LOCAL); pig.registerQuery("A = load 'test/org/apache/pig/test/data/passwd';"); ExecJob job = pig.store("A", filePath); PigStats stats = job.getStatistics(); File dataFile = new File( outputFile.getAbsoluteFile() + File.separator + "part-00000" ); assertEquals(dataFile.length(), stats.getBytesWritten()); } catch (IOException e) { LOG.error("Error while generating file", e); fail("Encountered IOException"); } finally { if (outputFile != null) { // Hadoop Local mode creates a directory // Hence we need to delete a directory recursively deleteDirectory(outputFile); } } }
Example 2
Source File: TestPigContext.java From spork with Apache License 2.0 | 5 votes |
private void registerAndStore(PigServer pigServer) throws IOException { // pigServer.debugOn(); List<String> commands = getCommands(); for (final String command : commands) { pigServer.registerQuery(command); } String outFileName = Util.removeColon(input.getAbsolutePath() + ".out"); pigServer.store("counts", outFileName); Util.deleteFile(cluster, outFileName); }
Example 3
Source File: TestPigStats.java From spork with Apache License 2.0 | 5 votes |
@Test public void testPigStatsGetList() { File outputFile = null; try { String filename = this.getClass().getSimpleName() + "_" + "testPigStatsGetList"; outputFile = File.createTempFile(filename, ".out"); String filePath = outputFile.getAbsolutePath(); outputFile.delete(); PigServer pigServer = new PigServer(ExecType.LOCAL); pigServer.registerQuery("a = load 'test/org/apache/pig/test/data/passwd';"); pigServer.registerQuery("b = group a by $0;"); pigServer.registerQuery("c = foreach b generate group, COUNT(a) as cnt;"); pigServer.registerQuery("d = group c by cnt;"); pigServer.registerQuery("e = foreach d generate group;"); ExecJob job = pigServer.store("e", filePath); JobGraph jobGraph = job.getStatistics().getJobGraph(); assertEquals(2, jobGraph.getJobList().size()); } catch (IOException e) { LOG.error("IOException while creating file ", e); fail("Encountered IOException"); } finally { if (outputFile != null) { // delete the directory before returning deleteDirectory(outputFile); } } }
Example 4
Source File: TestIndexedStorage.java From spork with Apache License 2.0 | 5 votes |
private static void createInputFile(PigServer pigServer, String[] inputs, int id, File outputDir) throws IOException { File input = File.createTempFile("tmp", ""); input.delete(); Util.createLocalInputFile(input.getAbsolutePath(), inputs); pigServer.registerQuery("A = load '" + Util.encodeEscape(input.getAbsolutePath()) + "' as (a0:int, a1:int);"); File output = new File(outputDir, "/" + id); pigServer.store("A", output.getAbsolutePath(), "org.apache.pig.piggybank.storage.IndexedStorage('\t','0,1')"); }
Example 5
Source File: TestHiveColumnarStorage.java From spork with Apache License 2.0 | 5 votes |
@Test public void testShouldStoreRowInHiveFormat() throws IOException, InterruptedException, SerDeException { String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()"; String singlePartitionedFile = simpleDataFile.getAbsolutePath(); File outputFile = new File("testhiveColumnarStore"); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString + ";"); //when server.store("a", outputFile.getAbsolutePath(), storeString); //then Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc"); ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 string,f3 string"); assertEquals(3, struct.getFieldsAsList().size()); Object o = struct.getField(0); assertEquals(LazyString.class, o.getClass()); o = struct.getField(1); assertEquals(LazyString.class, o.getClass()); o = struct.getField(2); assertEquals(LazyString.class, o.getClass()); }
Example 6
Source File: TestHiveColumnarStorage.java From spork with Apache License 2.0 | 5 votes |
@Test public void testShouldStoreTupleAsHiveArray() throws IOException, InterruptedException, SerDeException { String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()"; String singlePartitionedFile = simpleDataFile.getAbsolutePath(); File outputFile = new File("testhiveColumnarStore"); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString + ";"); server.registerQuery("b = FOREACH a GENERATE f1, TOTUPLE(f2,f3);"); //when server.store("b", outputFile.getAbsolutePath(), storeString); //then Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc"); ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 array<string>"); assertEquals(2, struct.getFieldsAsList().size()); Object o = struct.getField(0); assertEquals(LazyString.class, o.getClass()); o = struct.getField(1); assertEquals(LazyArray.class, o.getClass()); LazyArray arr = (LazyArray)o; List<Object> values = arr.getList(); for(Object value : values) { assertEquals(LazyString.class, value.getClass()); String valueStr =((LazyString) value).getWritableObject().toString(); assertEquals("Sample value", valueStr); } }
Example 7
Source File: TestHiveColumnarStorage.java From spork with Apache License 2.0 | 5 votes |
@Test public void testShouldStoreBagAsHiveArray() throws IOException, InterruptedException, SerDeException { String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()"; String singlePartitionedFile = simpleDataFile.getAbsolutePath(); File outputFile = new File("testhiveColumnarStore"); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString + ";"); server.registerQuery("b = FOREACH a GENERATE f1, TOBAG(f2,f3);"); //when server.store("b", outputFile.getAbsolutePath(), storeString); //then Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc"); ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 array<string>"); assertEquals(2, struct.getFieldsAsList().size()); Object o = struct.getField(0); assertEquals(LazyString.class, o.getClass()); o = struct.getField(1); assertEquals(LazyArray.class, o.getClass()); LazyArray arr = (LazyArray)o; List<Object> values = arr.getList(); for(Object value : values) { assertEquals(LazyString.class, value.getClass()); String valueStr =((LazyString) value).getWritableObject().toString(); assertEquals("Sample value", valueStr); } }
Example 8
Source File: TestCounters.java From spork with Apache License 2.0 | 4 votes |
@Test public void testMapOnly() throws IOException, ExecException { int count = 0; PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file)); for(int i = 0; i < MAX; i++) { int t = r.nextInt(100); pw.println(t); if(t > 50) count ++; } pw.close(); PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); pigServer.registerQuery("a = load '" + file + "';"); pigServer.registerQuery("b = filter a by $0 > 50;"); pigServer.registerQuery("c = foreach b generate $0 - 50;"); ExecJob job = pigServer.store("c", "output_map_only"); PigStats pigStats = job.getStatistics(); //counting the no. of bytes in the output file //long filesize = cluster.getFileSystem().getFileStatus(new Path("output_map_only")).getLen(); InputStream is = FileLocalizer.open(FileLocalizer.fullPath( "output_map_only", pigServer.getPigContext()), pigServer .getPigContext()); long filesize = 0; while(is.read() != -1) filesize++; is.close(); cluster.getFileSystem().delete(new Path(file), true); cluster.getFileSystem().delete(new Path("output_map_only"), true); System.out.println("============================================"); System.out.println("Test case Map Only"); System.out.println("============================================"); JobGraph jg = pigStats.getJobGraph(); Iterator<JobStats> iter = jg.iterator(); while (iter.hasNext()) { MRJobStats js = (MRJobStats) iter.next(); System.out.println("Map input records : " + js.getMapInputRecords()); assertEquals(MAX, js.getMapInputRecords()); System.out.println("Map output records : " + js.getMapOutputRecords()); assertEquals(count, js.getMapOutputRecords()); assertEquals(0, js.getReduceInputRecords()); assertEquals(0, js.getReduceOutputRecords()); System.out.println("Hdfs bytes written : " + js.getHdfsBytesWritten()); assertEquals(filesize, js.getHdfsBytesWritten()); } }
Example 9
Source File: TestCounters.java From spork with Apache License 2.0 | 4 votes |
@Test public void testMapOnlyBinStorage() throws IOException, ExecException { int count = 0; PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file)); for(int i = 0; i < MAX; i++) { int t = r.nextInt(100); pw.println(t); if(t > 50) count ++; } pw.close(); PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); pigServer.registerQuery("a = load '" + file + "';"); pigServer.registerQuery("b = filter a by $0 > 50;"); pigServer.registerQuery("c = foreach b generate $0 - 50;"); ExecJob job = pigServer.store("c", "output_map_only", "BinStorage"); PigStats pigStats = job.getStatistics(); InputStream is = FileLocalizer.open(FileLocalizer.fullPath( "output_map_only", pigServer.getPigContext()), pigServer.getPigContext()); long filesize = 0; while(is.read() != -1) filesize++; is.close(); cluster.getFileSystem().delete(new Path(file), true); cluster.getFileSystem().delete(new Path("output_map_only"), true); System.out.println("============================================"); System.out.println("Test case Map Only"); System.out.println("============================================"); JobGraph jp = pigStats.getJobGraph(); Iterator<JobStats> iter = jp.iterator(); while (iter.hasNext()) { MRJobStats js = (MRJobStats) iter.next(); System.out.println("Map input records : " + js.getMapInputRecords()); assertEquals(MAX, js.getMapInputRecords()); System.out.println("Map output records : " + js.getMapOutputRecords()); assertEquals(count, js.getMapOutputRecords()); assertEquals(0, js.getReduceInputRecords()); assertEquals(0, js.getReduceOutputRecords()); } System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten()); assertEquals(filesize, pigStats.getBytesWritten()); }
Example 10
Source File: TestCounters.java From spork with Apache License 2.0 | 4 votes |
@Test public void testMapReduceOnly() throws IOException, ExecException { int count = 0; PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file)); int [] nos = new int[10]; for(int i = 0; i < 10; i++) nos[i] = 0; for(int i = 0; i < MAX; i++) { int index = r.nextInt(10); int value = r.nextInt(100); nos[index] += value; pw.println(index + "\t" + value); } pw.close(); for(int i = 0; i < 10; i++) { if(nos[i] > 0) count ++; } PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); pigServer.registerQuery("a = load '" + file + "';"); pigServer.registerQuery("b = group a by $0;"); pigServer.registerQuery("c = foreach b generate group;"); ExecJob job = pigServer.store("c", "output"); PigStats pigStats = job.getStatistics(); InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output", pigServer.getPigContext()), pigServer.getPigContext()); long filesize = 0; while(is.read() != -1) filesize++; is.close(); cluster.getFileSystem().delete(new Path(file), true); cluster.getFileSystem().delete(new Path("output"), true); System.out.println("============================================"); System.out.println("Test case MapReduce"); System.out.println("============================================"); JobGraph jp = pigStats.getJobGraph(); Iterator<JobStats> iter = jp.iterator(); while (iter.hasNext()) { MRJobStats js = (MRJobStats) iter.next(); System.out.println("Map input records : " + js.getMapInputRecords()); assertEquals(MAX, js.getMapInputRecords()); System.out.println("Map output records : " + js.getMapOutputRecords()); assertEquals(MAX, js.getMapOutputRecords()); System.out.println("Reduce input records : " + js.getReduceInputRecords()); assertEquals(MAX, js.getReduceInputRecords()); System.out.println("Reduce output records : " + js.getReduceOutputRecords()); assertEquals(count, js.getReduceOutputRecords()); } System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten()); assertEquals(filesize, pigStats.getBytesWritten()); }
Example 11
Source File: TestCounters.java From spork with Apache License 2.0 | 4 votes |
@Test public void testMapReduceOnlyBinStorage() throws IOException, ExecException { int count = 0; PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file)); int [] nos = new int[10]; for(int i = 0; i < 10; i++) nos[i] = 0; for(int i = 0; i < MAX; i++) { int index = r.nextInt(10); int value = r.nextInt(100); nos[index] += value; pw.println(index + "\t" + value); } pw.close(); for(int i = 0; i < 10; i++) { if(nos[i] > 0) count ++; } PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); pigServer.registerQuery("a = load '" + file + "';"); pigServer.registerQuery("b = group a by $0;"); pigServer.registerQuery("c = foreach b generate group;"); ExecJob job = pigServer.store("c", "output", "BinStorage"); PigStats pigStats = job.getStatistics(); InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output", pigServer.getPigContext()), pigServer.getPigContext()); long filesize = 0; while(is.read() != -1) filesize++; is.close(); cluster.getFileSystem().delete(new Path(file), true); cluster.getFileSystem().delete(new Path("output"), true); System.out.println("============================================"); System.out.println("Test case MapReduce"); System.out.println("============================================"); JobGraph jp = pigStats.getJobGraph(); Iterator<JobStats> iter = jp.iterator(); while (iter.hasNext()) { MRJobStats js = (MRJobStats) iter.next(); System.out.println("Map input records : " + js.getMapInputRecords()); assertEquals(MAX, js.getMapInputRecords()); System.out.println("Map output records : " + js.getMapOutputRecords()); assertEquals(MAX, js.getMapOutputRecords()); System.out.println("Reduce input records : " + js.getReduceInputRecords()); assertEquals(MAX, js.getReduceInputRecords()); System.out.println("Reduce output records : " + js.getReduceOutputRecords()); assertEquals(count, js.getReduceOutputRecords()); } System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten()); assertEquals(filesize, pigStats.getBytesWritten()); }
Example 12
Source File: TestCounters.java From spork with Apache License 2.0 | 4 votes |
@Test public void testMapCombineReduce() throws IOException, ExecException { int count = 0; PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file)); int [] nos = new int[10]; for(int i = 0; i < 10; i++) nos[i] = 0; for(int i = 0; i < MAX; i++) { int index = r.nextInt(10); int value = r.nextInt(100); nos[index] += value; pw.println(index + "\t" + value); } pw.close(); for(int i = 0; i < 10; i++) { if(nos[i] > 0) count ++; } PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); pigServer.registerQuery("a = load '" + file + "';"); pigServer.registerQuery("b = group a by $0;"); pigServer.registerQuery("c = foreach b generate group, SUM(a.$1);"); ExecJob job = pigServer.store("c", "output"); PigStats pigStats = job.getStatistics(); InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output", pigServer.getPigContext()), pigServer.getPigContext()); long filesize = 0; while(is.read() != -1) filesize++; is.close(); cluster.getFileSystem().delete(new Path(file), true); cluster.getFileSystem().delete(new Path("output"), true); System.out.println("============================================"); System.out.println("Test case MapCombineReduce"); System.out.println("============================================"); JobGraph jp = pigStats.getJobGraph(); Iterator<JobStats> iter = jp.iterator(); while (iter.hasNext()) { MRJobStats js = (MRJobStats) iter.next(); System.out.println("Map input records : " + js.getMapInputRecords()); assertEquals(MAX, js.getMapInputRecords()); System.out.println("Map output records : " + js.getMapOutputRecords()); assertEquals(MAX, js.getMapOutputRecords()); System.out.println("Reduce input records : " + js.getReduceInputRecords()); assertEquals(count, js.getReduceInputRecords()); System.out.println("Reduce output records : " + js.getReduceOutputRecords()); assertEquals(count, js.getReduceOutputRecords()); } System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten()); assertEquals(filesize, pigStats.getBytesWritten()); }
Example 13
Source File: TestCounters.java From spork with Apache License 2.0 | 4 votes |
@Test public void testMapCombineReduceBinStorage() throws IOException, ExecException { int count = 0; PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file)); int [] nos = new int[10]; for(int i = 0; i < 10; i++) nos[i] = 0; for(int i = 0; i < MAX; i++) { int index = r.nextInt(10); int value = r.nextInt(100); nos[index] += value; pw.println(index + "\t" + value); } pw.close(); for(int i = 0; i < 10; i++) { if(nos[i] > 0) count ++; } PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); pigServer.registerQuery("a = load '" + file + "';"); pigServer.registerQuery("b = group a by $0;"); pigServer.registerQuery("c = foreach b generate group, SUM(a.$1);"); ExecJob job = pigServer.store("c", "output", "BinStorage"); PigStats pigStats = job.getStatistics(); InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output", pigServer.getPigContext()), pigServer.getPigContext()); long filesize = 0; while(is.read() != -1) filesize++; is.close(); cluster.getFileSystem().delete(new Path(file), true); cluster.getFileSystem().delete(new Path("output"), true); System.out.println("============================================"); System.out.println("Test case MapCombineReduce"); System.out.println("============================================"); JobGraph jp = pigStats.getJobGraph(); Iterator<JobStats> iter = jp.iterator(); while (iter.hasNext()) { MRJobStats js = (MRJobStats) iter.next(); System.out.println("Map input records : " + js.getMapInputRecords()); assertEquals(MAX, js.getMapInputRecords()); System.out.println("Map output records : " + js.getMapOutputRecords()); assertEquals(MAX, js.getMapOutputRecords()); System.out.println("Reduce input records : " + js.getReduceInputRecords()); assertEquals(count, js.getReduceInputRecords()); System.out.println("Reduce output records : " + js.getReduceOutputRecords()); assertEquals(count, js.getReduceOutputRecords()); } System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten()); assertEquals(filesize, pigStats.getBytesWritten()); }
Example 14
Source File: TestCounters.java From spork with Apache License 2.0 | 4 votes |
@Test public void testMultipleMRJobs() throws IOException, ExecException { int count = 0; PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file)); int [] nos = new int[10]; for(int i = 0; i < 10; i++) nos[i] = 0; for(int i = 0; i < MAX; i++) { int index = r.nextInt(10); int value = r.nextInt(100); nos[index] += value; pw.println(index + "\t" + value); } pw.close(); for(int i = 0; i < 10; i++) { if(nos[i] > 0) count ++; } PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); pigServer.registerQuery("a = load '" + file + "';"); pigServer.registerQuery("b = order a by $0;"); pigServer.registerQuery("c = group b by $0;"); pigServer.registerQuery("d = foreach c generate group, SUM(b.$1);"); ExecJob job = pigServer.store("d", "output"); PigStats pigStats = job.getStatistics(); InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output", pigServer.getPigContext()), pigServer.getPigContext()); long filesize = 0; while(is.read() != -1) filesize++; is.close(); cluster.getFileSystem().delete(new Path(file), true); cluster.getFileSystem().delete(new Path("output"), true); System.out.println("============================================"); System.out.println("Test case MultipleMRJobs"); System.out.println("============================================"); JobGraph jp = pigStats.getJobGraph(); MRJobStats js = (MRJobStats)jp.getSinks().get(0); System.out.println("Job id: " + js.getName()); System.out.println(jp.toString()); System.out.println("Map input records : " + js.getMapInputRecords()); assertEquals(MAX, js.getMapInputRecords()); System.out.println("Map output records : " + js.getMapOutputRecords()); assertEquals(MAX, js.getMapOutputRecords()); System.out.println("Reduce input records : " + js.getReduceInputRecords()); assertEquals(count, js.getReduceInputRecords()); System.out.println("Reduce output records : " + js.getReduceOutputRecords()); assertEquals(count, js.getReduceOutputRecords()); System.out.println("Hdfs bytes written : " + js.getHdfsBytesWritten()); assertEquals(filesize, js.getHdfsBytesWritten()); }
Example 15
Source File: TestHiveColumnarStorage.java From spork with Apache License 2.0 | 4 votes |
@Test public void testShouldStoreMapAsHiveMap() throws IOException, InterruptedException, SerDeException { String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()"; String singlePartitionedFile = simpleDataFile.getAbsolutePath(); File outputFile = new File("testhiveColumnarStore"); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString + ";"); server.registerQuery("b = FOREACH a GENERATE f1, TOMAP(f2,f3);"); //when server.store("b", outputFile.getAbsolutePath(), storeString); //then Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc"); ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 map<string,string>"); assertEquals(2, struct.getFieldsAsList().size()); Object o = struct.getField(0); assertEquals(LazyString.class, o.getClass()); o = struct.getField(1); assertEquals(LazyMap.class, o.getClass()); LazyMap arr = (LazyMap)o; Map<Object,Object> values = arr.getMap(); for(Entry<Object,Object> entry : values.entrySet()) { assertEquals(LazyString.class, entry.getKey().getClass()); assertEquals(LazyString.class, entry.getValue().getClass()); String keyStr =((LazyString) entry.getKey()).getWritableObject().toString(); assertEquals("Sample value", keyStr); String valueStr =((LazyString) entry.getValue()).getWritableObject().toString(); assertEquals("Sample value", valueStr); } }