Java Code Examples for org.apache.pig.PigServer#deleteFile()
The following examples show how to use
org.apache.pig.PigServer#deleteFile() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PerfTest.java From parquet-mr with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { StringBuilder schemaString = new StringBuilder("a0: chararray"); for (int i = 1; i < COLUMN_COUNT; i++) { schemaString.append(", a" + i + ": chararray"); } String out = "target/PerfTest"; { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = Storage.resetData(pigServer); Collection<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < ROW_COUNT; i++) { Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT); for (int j = 0; j < COLUMN_COUNT; j++) { tuple.set(j, "a" + i + "_" + j); } list.add(tuple); } data.set("in", schemaString.toString(), list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.deleteFile(out); pigServer.registerQuery("Store A into '"+out+"' using "+ParquetStorer.class.getName()+"();"); if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) { throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException()); } } load(out, 1); load(out, 2); load(out, 3); load(out, 4); load(out, 5); load(out, 10); load(out, 20); load(out, 50); System.out.println(results); }
Example 2
Source File: TestParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testReqestedSchemaColumnPruning() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); pigServer.setValidateEachStatement(true); String out = "target/out"; int rows = 10; Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < rows; i++) { list.add(Storage.tuple(i, "a"+i, i*2)); } data.set("in", "i:int, a:chararray, b:int", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.deleteFile(out); pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();"); pigServer.executeBatch(); //Test Null Padding at the end pigServer.registerQuery("C = LOAD '" + out + "' using " + ParquetLoader.class.getName()+"('i:int, a:chararray, b:int, n1:int, n2:chararray');"); pigServer.registerQuery("G = foreach C generate n1,b,n2,i;"); pigServer.registerQuery("STORE G into 'out' using mock.Storage();"); pigServer.executeBatch(); List<Tuple> actualList = data.get("out"); assertEquals(rows, actualList.size()); for(Tuple t : actualList) { assertEquals(4, t.size()); assertTrue(t.isNull(0)); assertTrue(t.isNull(2)); } }
Example 3
Source File: TestParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testColumnIndexAccess() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); pigServer.setValidateEachStatement(true); String out = "target/out"; int rows = 10; Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < rows; i++) { list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i)); } data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.deleteFile(out); pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();"); pigServer.executeBatch(); //Test Null Padding at the end pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('n1:int, n2:double, n3:long, n4:chararray', 'true');"); pigServer.registerQuery("STORE B into 'out' using mock.Storage();"); pigServer.executeBatch(); List<Tuple> actualList = data.get("out"); assertEquals(rows, actualList.size()); for(int i = 0; i < rows; i++) { Tuple t = actualList.get(i); assertEquals(4, t.size()); assertEquals(i, t.get(0)); assertEquals(i * 1.0, t.get(1)); assertEquals(i*2L, t.get(2)); assertEquals("v"+i, t.get(3)); } }
Example 4
Source File: TestParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testColumnIndexAccessProjection() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); pigServer.setValidateEachStatement(true); String out = "target/out"; int rows = 10; Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < rows; i++) { list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i)); } data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.deleteFile(out); pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();"); pigServer.executeBatch(); pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('n1:int, n2:double, n3:long, n4:chararray', 'true');"); pigServer.registerQuery("C = foreach B generate n1, n3;"); pigServer.registerQuery("STORE C into 'out' using mock.Storage();"); pigServer.executeBatch(); List<Tuple> actualList = data.get("out"); assertEquals(rows, actualList.size()); for(int i = 0; i < rows; i++) { Tuple t = actualList.get(i); assertEquals(2, t.size()); assertEquals(i, t.get(0)); assertEquals(i*2L, t.get(1)); } }
Example 5
Source File: TestParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testPredicatePushdown() throws Exception { Configuration conf = new Configuration(); conf.setBoolean(ParquetLoader.ENABLE_PREDICATE_FILTER_PUSHDOWN, true); PigServer pigServer = new PigServer(ExecType.LOCAL, conf); pigServer.setValidateEachStatement(true); String out = "target/out"; String out2 = "target/out2"; int rows = 10; Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < rows; i++) { list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i)); } data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.deleteFile(out); pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();"); pigServer.executeBatch(); pigServer.deleteFile(out2); pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('c1:int, c2:double, c3:long, c4:chararray');"); pigServer.registerQuery("C = FILTER B by c1 == 1 or c1 == 5;"); pigServer.registerQuery("STORE C into '" + out2 +"' using mock.Storage();"); List<ExecJob> jobs = pigServer.executeBatch(); long recordsRead = jobs.get(0).getStatistics().getInputStats().get(0).getNumberRecords(); assertEquals(2, recordsRead); }
Example 6
Source File: TestBlackAndWhitelistValidator.java From spork with Apache License 2.0 | 5 votes |
/** * Test deleteFile fails if its disallowed via the blacklist */ @Test(expected = FrontendException.class) public void testBlacklistRemoveWithPigServer() throws Exception { ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "rm"); PigServer pigServer = new PigServer(ctx); pigServer.deleteFile("foo"); }
Example 7
Source File: TestParquetLoader.java From parquet-mr with Apache License 2.0 | 4 votes |
@Test public void testTypePersuasion() throws Exception { Properties p = new Properties(); p.setProperty(STRICT_TYPE_CHECKING, Boolean.FALSE.toString()); PigServer pigServer = new PigServer(ExecType.LOCAL, p); pigServer.setValidateEachStatement(true); String out = "target/out"; int rows = 10; Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < rows; i++) { list.add(Storage.tuple(i, (long)i, (float)i, (double)i, Integer.toString(i), Boolean.TRUE)); } data.set("in", "i:int, l:long, f:float, d:double, s:chararray, b:boolean", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.deleteFile(out); pigServer.registerQuery("Store A into '"+out+"' using " + ParquetStorer.class.getName()+"();"); pigServer.executeBatch(); List<Tuple> actualList = null; byte [] types = { INTEGER, LONG, FLOAT, DOUBLE, CHARARRAY, BOOLEAN }; //Test extracting values using each type. for(int i=0; i<types.length; i++) { String query = "B = LOAD '" + out + "' using " + ParquetLoader.class.getName()+ "('i:" + DataType.findTypeName(types[i%types.length])+"," + " l:" + DataType.findTypeName(types[(i+1)%types.length]) +"," + " f:" + DataType.findTypeName(types[(i+2)%types.length]) +"," + " d:" + DataType.findTypeName(types[(i+3)%types.length]) +"," + " s:" + DataType.findTypeName(types[(i+4)%types.length]) +"," + " b:" + DataType.findTypeName(types[(i+5)%types.length]) +"');"; System.out.println("Query: " + query); pigServer.registerQuery(query); pigServer.registerQuery("STORE B into 'out"+i+"' using mock.Storage();"); pigServer.executeBatch(); actualList = data.get("out" + i); assertEquals(rows, actualList.size()); for(Tuple t : actualList) { assertTrue(t.getType(0) == types[i%types.length]); assertTrue(t.getType(1) == types[(i+1)%types.length]); assertTrue(t.getType(2) == types[(i+2)%types.length]); assertTrue(t.getType(3) == types[(i+3)%types.length]); assertTrue(t.getType(4) == types[(i+4)%types.length]); assertTrue(t.getType(5) == types[(i+5)%types.length]); } } }
Example 8
Source File: TestParquetStorer.java From parquet-mr with Apache License 2.0 | 4 votes |
@Test public void testComplexSchema() throws ExecException, Exception { String out = "target/out"; PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = Storage.resetData(pigServer); Collection<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < 1000; i++) { list.add(tuple("a"+i, bag(tuple("o", "b")))); } for (int i = 10; i < 2000; i++) { list.add(tuple("a"+i, bag(tuple("o", "b"), tuple("o", "b"), tuple("o", "b"), tuple("o", "b")))); } for (int i = 20; i < 3000; i++) { list.add(tuple("a"+i, bag(tuple("o", "b"), tuple("o", null), tuple(null, "b"), tuple(null, null)))); } for (int i = 30; i < 4000; i++) { list.add(tuple("a"+i, null)); } Collections.shuffle((List<?>)list); data.set("in", "a:chararray, b:{t:(c:chararray, d:chararray)}", list ); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.deleteFile(out); pigServer.registerQuery("Store A into '"+out+"' using "+ParquetStorer.class.getName()+"();"); if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) { throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException()); } { pigServer.registerQuery("B = LOAD '"+out+"' USING "+ParquetLoader.class.getName()+"();"); pigServer.registerQuery("Store B into 'out' using mock.Storage();"); if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) { throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException()); } List<Tuple> result = data.get("out"); assertEquals(list, result); final Schema schema = data.getSchema("out"); assertEquals("{a:chararray, b:{t:(c:chararray, d:chararray)}}".replaceAll(" ", ""), schema.toString().replaceAll(" ", "")); } { pigServer.registerQuery("C = LOAD '"+out+"' USING "+ParquetLoader.class.getName()+"('a:chararray');"); pigServer.registerQuery("Store C into 'out2' using mock.Storage();"); if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) { throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException()); } final Function<Tuple,Object> grabFirstColumn = new Function<Tuple,Object>() { @Override public Object apply(Tuple input) { try { return input.get(0); } catch (ExecException e) { throw new RuntimeException(e); } } }; List<Tuple> result2 = data.get("out2"); // Functional programming!! Object[] result2int = Collections2.transform(result2, grabFirstColumn).toArray(); Object[] input2int = Collections2.transform(list, grabFirstColumn).toArray(); assertArrayEquals(input2int, result2int); } }