Java Code Examples for org.apache.pig.data.Tuple#getAll()
The following examples show how to use
org.apache.pig.data.Tuple#getAll() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestForEachStar.java From spork with Apache License 2.0 | 6 votes |
@Test public void testForeachStarSchemaUnkown() throws IOException, ParserException{ PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INPUT_FILE + "' ;" + "f1 = foreach l1 generate * ;" ; Util.registerMultiLineQuery(pig, query); pig.explain("f1",System.out); Iterator<Tuple> it = pig.openIterator("f1"); Tuple expectedResCharArray = (Tuple)Util.getPigConstant("('one','two')"); Tuple expectedRes = TupleFactory.getInstance().newTuple(); for(Object field : expectedResCharArray.getAll() ){ expectedRes.append(new DataByteArray(field.toString())); } assertTrue("has output", it.hasNext()); assertEquals(expectedRes, it.next()); }
Example 2
Source File: TupleUtils.java From Cubert with Apache License 2.0 | 5 votes |
public static void copy(Tuple src, Tuple dest) throws ExecException { int idx = 0; for (Object val : src.getAll()) { dest.set(idx++, val); } }
Example 3
Source File: TupleUtils.java From Cubert with Apache License 2.0 | 5 votes |
public static void deepCopy(Tuple src, Tuple dest) throws ExecException { int idx = 0; for (Object val : src.getAll()) { dest.set(idx++, getFieldDeepCopy(val)); } }
Example 4
Source File: TupleUtils.java From Cubert with Apache License 2.0 | 5 votes |
public static void deepCopyWithReuse(Tuple src, Tuple dest) throws ExecException { int idx = 0; for (Object val : src.getAll()) { deepFieldCopyWithReuse(idx++, val, dest); } }
Example 5
Source File: PigJrubyLibrary.java From spork with Apache License 2.0 | 5 votes |
/** * A type specific conversion routine. * * @param ruby the Ruby runtime to create objects in * @param object object to convert * @return analogous Ruby type * @throws ExecException object contained an object that could not convert */ public static RubyArray pigToRuby(Ruby ruby, Tuple object) throws ExecException{ RubyArray rubyArray = ruby.newArray(); for (Object o : object.getAll()) rubyArray.add(pigToRuby(ruby, o)); return rubyArray; }
Example 6
Source File: JythonUtils.java From spork with Apache License 2.0 | 5 votes |
public static PyTuple pigTupleToPyTuple(Tuple tuple) { PyObject[] pyTuple = new PyObject[tuple.size()]; int i = 0; for (Object object : tuple.getAll()) { pyTuple[i++] = pigToPython(object); } return new PyTuple(pyTuple); }
Example 7
Source File: CubeDimensions.java From spork with Apache License 2.0 | 5 votes |
public static void convertNullToUnknown(Tuple tuple) throws ExecException { int idx = 0; for(Object obj : tuple.getAll()) { if( (obj == null) ) { tuple.set(idx, unknown); } idx++; } }
Example 8
Source File: TestHelper.java From spork with Apache License 2.0 | 5 votes |
private static Tuple trimTuple(Tuple t){ Tuple ret = TupleFactory.getInstance().newTuple(); for (Object o : t.getAll()) { DataByteArray dba = (DataByteArray)o; DataByteArray nDba = new DataByteArray(dba.toString().trim().getBytes()); ret.append(nDba); } return ret; }
Example 9
Source File: TestMultiQueryLocal.java From spork with Apache License 2.0 | 5 votes |
@Override public void putNext(Tuple f) throws IOException { try { Tuple t = TupleFactory.getInstance().newTuple(); for (Object obj : f.getAll()) { t.append(obj); } t.append(suffix); writer.write(null, t); } catch (InterruptedException e) { throw new IOException(e); } }
Example 10
Source File: TestJoin.java From spork with Apache License 2.0 | 4 votes |
@Test public void testJoinSchema2() throws Exception { // test join where one load does not have schema ExecType execType = ExecType.LOCAL; setUp(execType ); String[] input1 = { "1\t2", "2\t3", "3\t4" }; String[] input2 = { "1\thello", "4\tbye", }; String firstInput = createInputFile(execType, "a.txt", input1); String secondInput = createInputFile(execType, "b.txt", input2); Tuple expectedResultCharArray = (Tuple)Util.getPigConstant("('1','2','1','hello','1','2','1','hello')"); Tuple expectedResult = TupleFactory.getInstance().newTuple(); for(Object field : expectedResultCharArray.getAll()){ expectedResult.append(new DataByteArray(field.toString())); } // with schema String script = "a = load '"+ Util.encodeEscape(firstInput) +"' ; " + //re-using alias a for new operator below, doing this intentionally // because such use case has been seen "a = foreach a generate $0 as i, $1 as j ;" + "b = load '"+ Util.encodeEscape(secondInput) +"' as (k, l); " + "c = join a by $0, b by $0;" + "d = foreach c generate i,j,k,l,a::i as ai,a::j as aj,b::k as bk,b::l as bl;"; Util.registerMultiLineQuery(pigServer, script); Iterator<Tuple> it = pigServer.openIterator("d"); assertTrue(it.hasNext()); Tuple res = it.next(); assertEquals(expectedResult, res); assertFalse(it.hasNext()); deleteInputFile(execType, firstInput); deleteInputFile(execType, secondInput); }
Example 11
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testDistinctAggs1() throws Exception { // test the use of combiner for distinct aggs: String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; Util.createInputFile(cluster, "distinctAggs1Input.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'distinctAggs1Input.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b {" + " x = distinct a.age;" + " y = distinct a.gpa;" + " z = distinct a;" + " generate group, COUNT(x), SUM(x.age), SUM(y.gpa), SUM(a.age), " + " SUM(a.gpa), COUNT(z.age), COUNT(z), SUM(z.age);};"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertTrue(baos.toString().matches("(?si).*combine plan.*")); HashMap<String, Object[]> results = new HashMap<String, Object[]>(); results.put("pig1", new Object[] { "pig1", 3L, 57L, 5.2, 75L, 9.4, 3L, 3L, 57L }); results.put("pig2", new Object[] { "pig2", 1L, 24L, 7.8, 48L, 7.8, 2L, 2L, 48L }); results.put("pig5", new Object[] { "pig5", 1L, 45L, 2.4, 45L, 2.4, 1L, 1L, 45L }); Iterator<Tuple> it = pigServer.openIterator("c"); while (it.hasNext()) { Tuple t = it.next(); List<Object> fields = t.getAll(); Object[] expected = results.get(fields.get(0)); int i = 0; for (Object field : fields) { assertEquals(expected[i++], field); } } Util.deleteFile(cluster, "distinctAggs1Input.txt"); pigServer.shutdown(); }
Example 12
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testDistinctNoCombiner() throws Exception { // test that combiner is NOT invoked when // one of the elements in the foreach generate // is a distinct() as the leaf String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; Util.createInputFile(cluster, "distinctNoCombinerInput.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'distinctNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b {" + " z = distinct a;" + " generate group, z, SUM(a.age), SUM(a.gpa);};"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertFalse(baos.toString().matches("(?si).*combine plan.*")); HashMap<String, Object[]> results = new HashMap<String, Object[]>(); results.put("pig1", new Object[] { "pig1", "bag-place-holder", 75L, 9.4 }); results.put("pig2", new Object[] { "pig2", "bag-place-holder", 48L, 7.8 }); results.put("pig5", new Object[] { "pig5", "bag-place-holder", 45L, 2.4 }); Iterator<Tuple> it = pigServer.openIterator("c"); while (it.hasNext()) { Tuple t = it.next(); List<Object> fields = t.getAll(); Object[] expected = results.get(fields.get(0)); int i = 0; for (Object field : fields) { if (i == 1) { // ignore the second field which is a bag // for comparison here continue; } assertEquals(expected[i++], field); } } Util.deleteFile(cluster, "distinctNoCombinerInput.txt"); pigServer.shutdown(); }
Example 13
Source File: TestCombiner.java From spork with Apache License 2.0 | 4 votes |
@Test public void testForEachNoCombiner() throws Exception { // test that combiner is NOT invoked when // one of the elements in the foreach generate // has a foreach in the plan without a distinct agg String input[] = { "pig1\t18\t2.1", "pig2\t24\t3.3", "pig5\t45\t2.4", "pig1\t18\t2.1", "pig1\t19\t2.1", "pig2\t24\t4.5", "pig1\t20\t3.1" }; Util.createInputFile(cluster, "forEachNoCombinerInput.txt", input); PigServer pigServer = new PigServer(cluster.getExecType(), properties); pigServer.registerQuery("a = load 'forEachNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);"); pigServer.registerQuery("b = group a by name;"); pigServer.registerQuery("c = foreach b {" + " z = a.age;" + " generate group, z, SUM(a.age), SUM(a.gpa);};"); // make sure there is a combine plan in the explain output ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("c", ps); assertFalse(baos.toString().matches("(?si).*combine plan.*")); HashMap<String, Object[]> results = new HashMap<String, Object[]>(); results.put("pig1", new Object[] { "pig1", "bag-place-holder", 75L, 9.4 }); results.put("pig2", new Object[] { "pig2", "bag-place-holder", 48L, 7.8 }); results.put("pig5", new Object[] { "pig5", "bag-place-holder", 45L, 2.4 }); Iterator<Tuple> it = pigServer.openIterator("c"); while (it.hasNext()) { Tuple t = it.next(); List<Object> fields = t.getAll(); Object[] expected = results.get(fields.get(0)); int i = 0; for (Object field : fields) { if (i == 1) { // ignore the second field which is a bag // for comparison here continue; } assertEquals(expected[i++], field); } } Util.deleteFile(cluster, "forEachNoCombinerInput.txt"); pigServer.shutdown(); }
Example 14
Source File: TestEvalPipeline.java From spork with Apache License 2.0 | 4 votes |
@Test public void testCogroupAfterDistinct() throws Exception { String[] input1 = { "abc", "abc", "def", "def", "def", "abc", "def", "ghi" }; String[] input2 = { "ghi 4", "rst 12344", "uvw 1", "xyz 4141" }; Util.createInputFile(cluster, "table1", input1); Util.createInputFile(cluster, "table2", input2); pigServer.registerQuery("nonuniqtable1 = LOAD 'table1' AS (f1:chararray);"); pigServer.registerQuery("table1 = DISTINCT nonuniqtable1;"); pigServer.registerQuery("table2 = LOAD 'table2' AS (f1:chararray, f2:int);"); pigServer.registerQuery("temp = COGROUP table1 BY f1 INNER, table2 BY f1;"); Iterator<Tuple> it = pigServer.openIterator("temp"); // results should be: // (abc,{(abc)},{}) // (def,{(def)},{}) // (ghi,{(ghi)},{(ghi,4)}) HashMap<String, Tuple> results = new HashMap<String, Tuple>(); Object[] row = new Object[] { "abc", Util.createBagOfOneColumn(new String[] { "abc"}), mBf.newDefaultBag() }; results.put("abc", Util.createTuple(row)); row = new Object[] { "def", Util.createBagOfOneColumn(new String[] { "def"}), mBf.newDefaultBag() }; results.put("def", Util.createTuple(row)); Object[] thirdColContents = new Object[] { "ghi", 4 }; Tuple t = Util.createTuple(thirdColContents); row = new Object[] { "ghi", Util.createBagOfOneColumn(new String[] { "ghi"}), Util.createBag(new Tuple[] { t })}; results.put("ghi", Util.createTuple(row)); while(it.hasNext()) { Tuple tup = it.next(); List<Object> fields = tup.getAll(); Tuple expected = results.get((String)fields.get(0)); int i = 0; for (Object field : fields) { Assert.assertEquals(expected.get(i++), field); } } Util.deleteFile(cluster, "table1"); Util.deleteFile(cluster, "table2"); }
Example 15
Source File: Stitch.java From spork with Apache License 2.0 | 4 votes |
@Override public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() == 0) return null; List<DataBag> bags = new ArrayList<DataBag>(input.size()); for (int i = 0; i < input.size(); i++) { Object o = input.get(i); try { bags.add((DataBag)o); } catch (ClassCastException cce) { int errCode = 2107; // TODO not sure this is the right one String msg = "Stitch expected bags as input but argument " + i + " is a " + DataType.findTypeName(o); throw new ExecException(msg, errCode, PigException.INPUT); } } if (bags.size() == 1) return bags.get(0); DataBag output = BagFactory.getInstance().newDefaultBag(); List<Iterator<Tuple>> iters = new ArrayList<Iterator<Tuple>>(bags.size()); for (DataBag bag : bags) { iters.add(bag.iterator()); } while (iters.get(0).hasNext()) { Tuple outTuple = TupleFactory.getInstance().newTuple(); for (Iterator<Tuple> iter : iters) { if (iter.hasNext()) { Tuple t = iter.next(); List<Object> fields = t.getAll(); for (Object field : fields) { outTuple.append(field); } } } output.add(outTuple); } return output; }