Java Code Examples for org.apache.pig.data.TupleFactory#getInstance()
The following examples show how to use
org.apache.pig.data.TupleFactory#getInstance() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BagTests.java From datafu with Apache License 2.0 | 6 votes |
@Test public void tupleFromBagAccumulateTest() throws Exception { TupleFactory tf = TupleFactory.getInstance(); BagFactory bf = BagFactory.getInstance(); TupleFromBag op = new TupleFromBag(); Tuple defaultValue = tf.newTuple(1000); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(4))), 0, defaultValue))); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(9))), 0, defaultValue))); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(16))), 0, defaultValue))); assertEquals(op.getValue(), tf.newTuple(4)); op.cleanup(); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(11))), 1, defaultValue))); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(17))), 1, defaultValue))); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(5))), 1, defaultValue))); assertEquals(op.getValue(), tf.newTuple(17)); op.cleanup(); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(), 2, defaultValue))); assertEquals(op.getValue(), defaultValue); op.cleanup(); }
Example 2
Source File: BagTests.java From datafu with Apache License 2.0 | 6 votes |
@Test public void firstTupleFromBagAccumulateTest() throws Exception { TupleFactory tf = TupleFactory.getInstance(); BagFactory bf = BagFactory.getInstance(); FirstTupleFromBag op = new FirstTupleFromBag(); Tuple defaultValue = tf.newTuple(1000); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(4))), defaultValue))); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(9))), defaultValue))); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(16))), defaultValue))); assertEquals(op.getValue(), tf.newTuple(4)); op.cleanup(); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(11))), defaultValue))); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(17))), defaultValue))); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(5))), defaultValue))); assertEquals(op.getValue(), tf.newTuple(11)); op.cleanup(); op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(), defaultValue))); assertEquals(op.getValue(), defaultValue); op.cleanup(); }
Example 3
Source File: TestProject.java From spork with Apache License 2.0 | 5 votes |
@Test public void testGetNextTupleMultipleProjections() throws IOException, ExecException { t = tRandom; proj.attachInput(t); proj.setOverloaded(true); int j = 0; ArrayList<Integer> cols = new ArrayList<Integer>(); while (true) { cols.add(j); cols.add(j + 1); proj.setColumns(cols); res = proj.getNextTuple(); if (res.returnStatus == POStatus.STATUS_EOP) break; TupleFactory tupleFactory = TupleFactory.getInstance(); ArrayList<Object> objList = new ArrayList<Object>(); objList.add(t.get(j)); objList.add(t.get(j + 1)); Tuple expectedResult = tupleFactory.newTuple(objList); assertEquals(POStatus.STATUS_OK, res.returnStatus); assertEquals(expectedResult, res.result); ++j; cols.clear(); } proj.attachInput(t); proj.setColumn(8); proj.setOverloaded(false); res = proj.getNextTuple(); assertEquals(POStatus.STATUS_OK, res.returnStatus); assertEquals(t.get(8), res.result); }
Example 4
Source File: TestEvalPipelineLocal.java From spork with Apache License 2.0 | 5 votes |
@Override public DataBag exec(Tuple input) throws IOException { TupleFactory tf = TupleFactory.getInstance(); DataBag output = BagFactory.getInstance().newDefaultBag(); output.add(tf.newTuple("a")); output.add(tf.newTuple("a")); output.add(tf.newTuple("a")); return output; }
Example 5
Source File: TestPODistinct.java From spork with Apache License 2.0 | 5 votes |
@Test public void testPODistictWithInt() throws ExecException { input = BagFactory.getInstance().newDefaultBag(); TupleFactory tf = TupleFactory.getInstance(); for (int i = 0; i < MAX_SAMPLES; i++) { Tuple t = tf.newTuple(); t.append(r.nextInt(MAX_VALUE)); input.add(t); // System.out.println(t); } confirmDistinct(); }
Example 6
Source File: TestDataModel.java From spork with Apache License 2.0 | 5 votes |
private Tuple giveMeOneOfEach() throws Exception { TupleFactory tf = TupleFactory.getInstance(); Tuple t1 = tf.newTuple(11); Tuple t2 = tf.newTuple(2); t2.set(0, new Integer(3)); t2.set(1, new Float(3.0)); DataBag bag = BagFactory.getInstance().newDefaultBag(); bag.add(tf.newTuple(new Integer(4))); bag.add(tf.newTuple(new String("mary had a little lamb"))); Map<String, Object> map = new LinkedHashMap<String, Object>(2); map.put(new String("hello"), new String("world")); map.put(new String("goodbye"), new String("all")); t1.set(0, t2); t1.set(1, bag); t1.set(2, map); t1.set(3, new Integer(42)); t1.set(4, new Long(5000000000L)); t1.set(5, new Float(3.141592654)); t1.set(6, new Double(2.99792458e8)); t1.set(7, new Boolean(true)); t1.set(8, new DataByteArray("hello")); t1.set(9, new String("goodbye")); return t1; }
Example 7
Source File: TestEvalPipeline.java From spork with Apache License 2.0 | 5 votes |
@Override public Map<String, Object> exec(Tuple input) throws IOException { TupleFactory tupleFactory = TupleFactory.getInstance(); ArrayList<Object> objList = new ArrayList<Object>(); objList.add(new Integer(1)); objList.add(new Double(1.0)); objList.add(new Float(1.0)); objList.add(new String("World!")); Tuple tuple = tupleFactory.newTuple(objList); BagFactory bagFactory = BagFactory.getInstance(); DataBag bag = bagFactory.newDefaultBag(); bag.add(tuple); Map<String, Object> mapInMap = new HashMap<String, Object>(); mapInMap.put("int", new Integer(10)); mapInMap.put("float", new Float(10.0)); Map<String, Object> myMap = new HashMap<String, Object>(); myMap.put("string", new String("Hello")); myMap.put("int", new Integer(1)); myMap.put("long", new Long(1)); myMap.put("float", new Float(1.0)); myMap.put("double", new Double(1.0)); myMap.put("dba", new DataByteArray(new String("bytes").getBytes())); myMap.put("map", mapInMap); myMap.put("tuple", tuple); myMap.put("bag", bag); return myMap; }
Example 8
Source File: POFRJoin.java From spork with Apache License 2.0 | 5 votes |
public POFRJoin(OperatorKey k, int rp, List<PhysicalOperator> inp, List<List<PhysicalPlan>> ppLists, List<List<Byte>> keyTypes, FileSpec[] replFiles, int fragment, boolean isLeftOuter, Tuple nullTuple, Schema[] inputSchemas, Schema[] keySchemas) throws ExecException { super(k, rp, inp); phyPlanLists = ppLists; this.fragment = fragment; this.keyTypes = keyTypes; this.replFiles = replFiles; replicates = new TupleToMapKey[ppLists.size()]; LRs = new POLocalRearrange[ppLists.size()]; constExps = new ConstantExpression[ppLists.size()]; createJoinPlans(k); processingPlan = false; mTupleFactory = TupleFactory.getInstance(); List<Tuple> tupList = new ArrayList<Tuple>(); tupList.add(nullTuple); nullBag = new NonSpillableDataBag(tupList); this.isLeftOuterJoin = isLeftOuter; if (inputSchemas != null) { this.inputSchemas = inputSchemas; } else { this.inputSchemas = new Schema[replFiles == null ? 0 : replFiles.length]; } if (keySchemas != null) { this.keySchemas = keySchemas; } else { this.keySchemas = new Schema[replFiles == null ? 0 : replFiles.length]; } }
Example 9
Source File: TestDataModel.java From spork with Apache License 2.0 | 5 votes |
@Test public void testNestTuple() throws Exception { TupleFactory tf = TupleFactory.getInstance(); int[][] input1 = { { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 } }; int[][] input2 = { { 1, 2 }, { 1, 2 } }; Tuple n1 = Util.loadNestTuple(tf.newTuple(input1.length), input1); Tuple n2 = tf.newTuple(); n2 = Util.loadNestTuple(tf.newTuple(input2.length), input2); }
Example 10
Source File: TestBuiltInBagToTupleOrString.java From spork with Apache License 2.0 | 5 votes |
@Test(expected=org.apache.pig.backend.executionengine.ExecException.class) public void testInvalidInputToBagToTupleUDF() throws Exception { TupleFactory tf = TupleFactory.getInstance(); Tuple udfInput = tf.newTuple(1); // input contains tuple instead of bag udfInput.set(0, tf.newTuple()); BagToTuple udf = new BagToTuple(); // expecting an exception because the input if of type Tuple, not DataBag udf.exec(udfInput); }
Example 11
Source File: TestBuiltInBagToTupleOrString.java From spork with Apache License 2.0 | 5 votes |
@Test public void testNestedTupleForBagToStringUDF() throws Exception { BagFactory bf = BagFactory.getInstance(); TupleFactory tf = TupleFactory.getInstance(); Tuple t1 = tf.newTuple(2); t1.set(0, "a"); t1.set(1, 5); Tuple nestedTuple = tf.newTuple(2); nestedTuple.set(0, "d"); nestedTuple.set(1, 7); Tuple t2 = tf.newTuple(3); t2.set(0, "c"); t2.set(1, 6); t2.set(2, nestedTuple); DataBag inputBag = bf.newDefaultBag(); inputBag.add(t1); inputBag.add(t2); BagToString udf = new BagToString(); Tuple udfInput = tf.newTuple(2); udfInput.set(0, inputBag); udfInput.set(1, "_"); String result = udf.exec(udfInput); assertEquals("a_5_c_6_(d,7)", result); }
Example 12
Source File: PigRelSqlUdfs.java From calcite with Apache License 2.0 | 5 votes |
/** * Implementation for PIG_BAG functions. Builds a Pig DataBag from * the corresponding input * * @param elements Input that contains a bag * @return Pig Tuple */ public static Tuple buildBag(Object... elements) { final TupleFactory tupleFactory = TupleFactory.getInstance(); final BagFactory bagFactory = BagFactory.getInstance(); // Convert each row into a Tuple List<Tuple> tupleList = new ArrayList<>(); if (elements != null) { // The first input contains a list of rows for the bag final List bag = (elements[0] instanceof List) ? (List) elements[0] : Collections.singletonList(elements[0]); for (Object row : bag) { tupleList.add(tupleFactory.newTuple(Arrays.asList(row))); } } // Then build a bag from the tuple list DataBag resultBag = bagFactory.newDefaultBag(tupleList); // The returned result is a new Tuple with the newly constructed DataBag // as the first item. List<Object> finalTuple = new ArrayList<>(); finalTuple.add(resultBag); if (elements != null) { // Add the remaining elements from the input for (int i = 1; i < elements.length; i++) { finalTuple.add(elements[i]); } } return tupleFactory.newTuple(finalTuple); }
Example 13
Source File: SampleEasyCubeAggregatorAsUDAF.java From Cubert with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} * * @throws Exception * @see com.linkedin.cubert.operator.cube.EasyCubeAggregator#output(org.apache.pig.data.Tuple, * com.linkedin.cubert.operator.AggregationBuffer) */ @Override public Object output(Object reUsedOutput, AggregationBuffer aggregationBuffer) throws ExecException { Tuple resultTuple = (Tuple) reUsedOutput; if (resultTuple == null) { TupleFactory mTupleFactory = TupleFactory.getInstance(); resultTuple = mTupleFactory.newTuple(2); } resultTuple.set(sumIndex, ((myAggregator) aggregationBuffer).getSum()); resultTuple.set(sumSqIndex, ((myAggregator) aggregationBuffer).getSumSq()); return resultTuple; }
Example 14
Source File: ExampleEasyCubeAggregator.java From Cubert with Apache License 2.0 | 5 votes |
@Override public Object output(Object reUsedOutput, AggregationBuffer aggregationBuffer) throws ExecException { Tuple resultTuple = (Tuple) reUsedOutput; if (resultTuple == null) { TupleFactory mTupleFactory = TupleFactory.getInstance(); resultTuple = mTupleFactory.newTuple(2); } resultTuple.set(sumIndex, ((myAggregator) aggregationBuffer).getSum()); resultTuple.set(sumSqIndex, ((myAggregator) aggregationBuffer).getSumSq()); return resultTuple; }
Example 15
Source File: TestRegex.java From spork with Apache License 2.0 | 5 votes |
@Test public void testRegexExtractAll() throws IOException { String matchRegex = "^(.+)\\b\\s+is a\\s+\\b(.+)$"; TupleFactory tupleFactory = TupleFactory.getInstance(); Tuple t1 = tupleFactory.newTuple(2); t1.set(0,"this is a match"); t1.set(1, matchRegex); Tuple t2 = tupleFactory.newTuple(2); t2.set(0, "no match"); t2.set(1, matchRegex); Tuple t3 = tupleFactory.newTuple(2); t3.set(0, null); t3.set(1, matchRegex); RegexExtractAll func = new RegexExtractAll(); Tuple r = func.exec(t1); assertEquals(r.size(), 2); assertEquals("this", r.get(0)); assertEquals("match", r.get(1)); r = func.exec(t2); assertTrue(r==null); r = func.exec(t3); assertTrue(r==null); }
Example 16
Source File: BagToTuple.java From spork with Apache License 2.0 | 4 votes |
@Override public Tuple exec(Tuple inputTuple) throws IOException { if (inputTuple.size() != 1) { throw new ExecException("Expecting 1 input, found " + inputTuple.size(), PigException.INPUT); } if (inputTuple.get(0) == null) { return null; } if (!(inputTuple.get(0) instanceof DataBag)) { throw new ExecException("Usage BagToTuple(DataBag)", PigException.INPUT); } DataBag inputBag = (DataBag) (inputTuple.get(0)); try { Tuple outputTuple = null; long outputTupleSize = getOuputTupleSize(inputBag); // TupleFactory.newTuple(int size) can only support up to Integer.MAX_VALUE if (outputTupleSize > Integer.MAX_VALUE) { throw new ExecException("Input bag is too large", 105, PigException.INPUT); } TupleFactory tupleFactory = TupleFactory.getInstance(); outputTuple = tupleFactory.newTuple((int) outputTupleSize); int fieldNum = 0; for (Tuple t : inputBag) { if (t != null) { for (int i = 0; i < t.size(); i++) { outputTuple.set(fieldNum++, t.get(i)); } } } return outputTuple; } catch (Exception e) { String msg = "Encourntered error while flattening a bag to tuple" + this.getClass().getSimpleName(); throw new ExecException(msg, PigException.BUG, e); } }
Example 17
Source File: LogicalPlanBuilder.java From spork with Apache License 2.0 | 4 votes |
static Tuple buildTuple(List<Object> objList) { TupleFactory tf = TupleFactory.getInstance(); return tf.newTuple( objList ); }
Example 18
Source File: POMergeJoin.java From spork with Apache License 2.0 | 4 votes |
private void readObject(ObjectInputStream is) throws IOException, ClassNotFoundException, ExecException{ is.defaultReadObject(); mTupleFactory = TupleFactory.getInstance(); }
Example 19
Source File: PigPerformanceLoader.java From spork with Apache License 2.0 | 4 votes |
public PigPerformanceLoader() { // Assume ^A as a delimiter super(""); bagFactory = BagFactory.getInstance(); tupleFactory = TupleFactory.getInstance(); }
Example 20
Source File: TestPOUserFunc.java From spork with Apache License 2.0 | 4 votes |
public void algebraicAVG( Integer[] input , Double initialExpectedSum, Long initialExpectedCount , Double intermedExpectedSum, Long intermedExpectedCount , Double expectedAvg ) throws IOException, ExecException { // generate data byte INIT = 0; byte INTERMED = 1; byte FINAL = 2; Tuple tup1 = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input); Tuple tup2 = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input); // System.out.println("Input = " + tup1); String funcSpec = AVG.class.getName() + "()"; POUserFunc po = new POUserFunc(new OperatorKey("", r.nextLong()), -1, null, new FuncSpec(funcSpec)); //************ Initial Calculations ****************** TupleFactory tf = TupleFactory.getInstance(); po.setAlgebraicFunction(INIT); po.attachInput(tup1); Tuple t = null; Result res = po.getNextTuple(); Tuple outputInitial1 = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result : null; Tuple outputInitial2 = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result : null; System.out.println(outputInitial1 + " " + outputInitial2); assertEquals(outputInitial1, outputInitial2); Double sum = (Double) outputInitial1.get(0); Long count = (Long) outputInitial1.get(1); assertEquals(initialExpectedSum, sum); assertEquals(initialExpectedCount, count); //************ Intermediate Data and Calculations ****************** DataBag bag = BagFactory.getInstance().newDefaultBag(); bag.add(outputInitial1); bag.add(outputInitial2); Tuple outputInitial = tf.newTuple(); outputInitial.append(bag); // Tuple outputIntermed = intermed.exec(outputInitial); po = new POUserFunc(new OperatorKey("", r.nextLong()), -1, null, new FuncSpec(funcSpec)); po.setAlgebraicFunction(INTERMED); po.attachInput(outputInitial); res = po.getNextTuple(); Tuple outputIntermed = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result : null; sum = (Double) outputIntermed.get(0); count = (Long) outputIntermed.get(1); assertEquals(intermedExpectedSum, sum); assertEquals(intermedExpectedCount, count); System.out.println(outputIntermed); //************ Final Calculations ****************** po = new POUserFunc(new OperatorKey("", r.nextLong()), -1, null, new FuncSpec(funcSpec)); po.setAlgebraicFunction(FINAL); po.attachInput(outputInitial); res = po.getNextTuple(); Double output = (res.returnStatus == POStatus.STATUS_OK) ? (Double) res.result : null; // Double output = fin.exec(outputInitial); assertEquals((Double)expectedAvg, output); // System.out.println("output = " + output); }