Java Code Examples for org.apache.pig.data.Tuple#get()
The following examples show how to use
org.apache.pig.data.Tuple#get() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Reverse.java From spork with Apache License 2.0 | 6 votes |
@Override public String exec(Tuple input) throws IOException { if (input == null || input.size() == 0) { return null; } try { String str = (String) input.get(0); if (str == null) return null; if (str.length() == 0) return str; char[] chars = str.toCharArray(); int lastIndex = chars.length-1; for (int i=0; i<=lastIndex/2; i++) { char c = chars[i]; chars[i] = chars[lastIndex-i]; chars[lastIndex-i] = c; } return new String(chars); } catch (ExecException e) { warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1); return null; } }
Example 2
Source File: PigStreamingUDF.java From spork with Apache License 2.0 | 6 votes |
@Override public WritableByteArray serializeToBytes(Tuple t) throws IOException { out.reset(); int sz; Object field; if (t == null) { sz = 0; } else { sz = t.size(); } for (int i=0; i < sz; i++) { field = t.get(i); StorageUtil.putField(out, field, DELIMS, true); if (i != sz-1) { out.write(DELIMS.getParamDelim()); } } byte[] recordDel = DELIMS.getRecordEnd(); out.write(recordDel, 0, recordDel.length); return out; }
Example 3
Source File: DateExtractor.java From spork with Apache License 2.0 | 6 votes |
@Override public String exec(Tuple input) throws IOException { if (input == null || input.size() == 0 || input.get(0) == null) return null; String str=""; try{ str = (String)input.get(0); Date date = incomingDateFormat.parse(str); return outgoingDateFormat.format(date); } catch (ParseException pe) { System.err.println("piggybank.evaluation.util.apachelogparser.DateExtractor: unable to parse date "+str); return null; } catch(Exception e){ throw new IOException("Caught exception processing input row ", e); } }
Example 4
Source File: AliasableEvalFunc.java From datafu with Apache License 2.0 | 5 votes |
public String getString(Tuple tuple, String alias, String defaultValue) throws ExecException { Integer i = getPosition(alias); if (i == null) throw new FieldNotFound("Attempt to reference unknown alias: "+alias+"\n Instance Properties: "+getInstanceProperties()); if (i >= tuple.size()) throw new FieldNotFound("Attempt to reference outside of tuple for alias: "+alias+"\n Instance Properties: "+getInstanceProperties()); String s = (String)tuple.get(i); if (s == null) return defaultValue; return s; }
Example 5
Source File: FloatVAR.java From datafu with Apache License 2.0 | 5 votes |
@Override public Double exec(Tuple input) throws IOException { try { DataBag b = (DataBag)input.get(0); Tuple combined = combine(b); Double sum = (Double)combined.get(0); Double sumSquare = (Double)combined.get(1); if(sum == null) { return null; } Long count = (Long)combined.get(2); Double var = null; if (count > 0) { Double avg = new Double(sum / count); Double avgSquare = new Double(sumSquare / count); var = avgSquare - avg*avg; } return var; } catch (ExecException ee) { throw ee; } catch (Exception e) { int errCode = 2106; String msg = "Error while computing variance in " + this.getClass().getSimpleName(); throw new ExecException(msg, errCode, PigException.BUG, e); } }
Example 6
Source File: JsFunction.java From spork with Apache License 2.0 | 5 votes |
@Override public Object exec(Tuple tuple) throws IOException { Schema inputSchema = this.getInputSchema(); if (LOG.isDebugEnabled()) { LOG.debug( "CALL " + stringify(outputSchema) + " " + functionName + " " + stringify(inputSchema)); } // UDF always take a tuple: unwrapping when not necessary to simplify UDFs if (inputSchema.size() == 1 && inputSchema.getField(0).type == DataType.TUPLE) { inputSchema = inputSchema.getField(0).schema; } Scriptable params = pigTupleToJS(tuple, inputSchema, 0); Object[] passedParams = new Object[inputSchema.size()]; for (int j = 0; j < passedParams.length; j++) { passedParams[j] = params.get(inputSchema.getField(j).alias, params); } Object result = jsScriptEngine.jsCall(functionName, passedParams); if (LOG.isDebugEnabled()) { LOG.debug( "call "+functionName+"("+Arrays.toString(passedParams)+") => "+toString(result)); } // We wrap the result with an object in the following cases: // 1. Result is not an object type. // 2. OutputSchema is a tuple type. if (!(result instanceof NativeObject) || outputSchema.getField(0).type == DataType.TUPLE) { Scriptable wrapper = jsScriptEngine.jsNewObject(); wrapper.put(outputSchema.getField(0).alias, wrapper, result); result = wrapper; } Tuple evalTuple = jsToPigTuple((Scriptable)result, outputSchema, 0); Object eval = outputSchema.size() == 1 ? evalTuple.get(0) : evalTuple; LOG.debug(eval); return eval; }
Example 7
Source File: BitwiseORAggregation.java From Cubert with Apache License 2.0 | 5 votes |
@Override public void aggregate(Tuple input) throws IOException { Object obj = input.get(inputColumnIndex); if (obj == null) return; nonNullValueSeen = true; long value = ((Number) (input.get(inputColumnIndex))).longValue(); bitmap |= value; }
Example 8
Source File: Over.java From spork with Apache License 2.0 | 5 votes |
@Override public Double exec(Tuple input) throws IOException { DataBag inbag = (DataBag)input.get(0); OverBag.OverBagIterator iter = (OverBag.OverBagIterator)inbag.iterator(); return ((double)++currentRow)/(double)iter.tuples.size(); }
Example 9
Source File: Concat.java From Cubert with Apache License 2.0 | 5 votes |
@Override public Object eval(Tuple tuple) throws ExecException { String str = ""; for (int i = 0; i < nargs; i++) { Object field = tuple.get(i); if (field != null) str = str + field.toString(); } return str; }
Example 10
Source File: POPartitionRearrangeTez.java From spork with Apache License 2.0 | 5 votes |
protected DataBag constructPROutput(List<Result> resLst, Tuple value) throws ExecException{ Tuple t = super.constructLROutput(resLst, null, value); //Construct key Object key = t.get(1); // Construct an output bag and feed in the tuples DataBag opBag = mBagFactory.newDefaultBag(); // Put the index, key, and value in a tuple and return // first -> min, second -> max Pair <Integer, Integer> indexes = reducerMap.get(key); // For non skewed keys, we set the partition index to be -1 if (indexes == null) { indexes = new Pair <Integer, Integer>(-1,0); } for (Integer reducerIdx = indexes.first, cnt = 0; cnt <= indexes.second; reducerIdx++, cnt++) { if (reducerIdx >= totalReducers) { reducerIdx = 0; } Tuple opTuple = mTupleFactory.newTuple(4); opTuple.set(0, t.get(0)); // set the partition index opTuple.set(1, reducerIdx.intValue()); opTuple.set(2, key); opTuple.set(3, t.get(2)); opBag.add(opTuple); } return opBag; }
Example 11
Source File: DaysBetween.java From spork with Apache License 2.0 | 5 votes |
@Override public Long exec(Tuple input) throws IOException { if (input == null || input.size() < 2 || input.get(0) == null || input.get(1) == null) { return null; } DateTime startDate = (DateTime) input.get(0); DateTime endDate = (DateTime) input.get(1); // Larger date first // Subtraction may overflow return (startDate.getMillis() - endDate.getMillis()) / 86400000L; }
Example 12
Source File: TestEvalPipeline2.java From spork with Apache License 2.0 | 5 votes |
@Test public void testLimitAfterSortDesc() throws Exception{ int LOOP_COUNT = 40; File tmpFile = Util.createTempFileDelOnExit("test", "txt"); PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); Random r = new Random(1); int rand; for(int i = 0; i < LOOP_COUNT; i++) { rand = r.nextInt(100); ps.println(rand); } ps.close(); pigServer.registerQuery("A = LOAD '" + Util.generateURI(tmpFile.toString(), pigServer .getPigContext()) + "' AS (num:int);"); pigServer.registerQuery("B = order A by num desc parallel 2;"); pigServer.registerQuery("C = limit B 10;"); Iterator<Tuple> iter = pigServer.openIterator("C"); if(!iter.hasNext()) Assert.fail("No output found"); int numIdentity = 0; int oldNum = Integer.MAX_VALUE; int newNum; while(iter.hasNext()){ Tuple t = iter.next(); newNum = (Integer)t.get(0); Assert.assertTrue(newNum<=oldNum); oldNum = newNum; ++numIdentity; } Assert.assertEquals(10, numIdentity); }
Example 13
Source File: FloatSignum.java From spork with Apache License 2.0 | 5 votes |
/** * java level API * @param input expects a single numeric value * @param output returns a single numeric value, * signum function of the argument */ @Override public Float exec(Tuple input) throws IOException { if (input == null || input.size() == 0 || input.get(0) == null) return null; try{ Float d = (Float)input.get(0); return Math.signum(d); }catch (Exception e){ throw new IOException("Caught exception processing input row ", e); } }
Example 14
Source File: LongAvg.java From spork with Apache License 2.0 | 5 votes |
static protected long count(Tuple input) throws ExecException { DataBag values = (DataBag)input.get(0); Iterator it = values.iterator(); long cnt = 0; while (it.hasNext()){ Tuple t = (Tuple)it.next(); if (t != null && t.size() > 0 && t.get(0) != null) cnt++; } return cnt; }
Example 15
Source File: TestFilterUDF.java From spork with Apache License 2.0 | 5 votes |
@Override public Boolean exec(Tuple input) throws IOException { try { int col = (Integer)input.get(0); if (col > 10) return true; } catch (ExecException e) { // TODO Auto-generated catch block e.printStackTrace(); } return false; }
Example 16
Source File: BagTests.java From datafu with Apache License 2.0 | 5 votes |
@Test public void bagJoinFullOuterTest() throws Exception { PigTest test = createPigTestFromString(bagJoinFullOuterTest); writeLinesToFile("input", "1\t{(K1,A1),(K2,B1),(K3,C1)}\t{(K1,A2),(K2,B2),(K2,B22)}\t{(K1,A3),(K3,C3),(K4,D3)}"); try { test.runScript(); } catch (Exception e) { e.printStackTrace(); throw e; } List<Tuple> tuples = getLinesForAlias(test, "data2"); assertEquals(tuples.size(), 1); Tuple tuple = tuples.get(0); DataBag joined1 = (DataBag)tuple.get(1); DataBag joined2 = (DataBag)tuple.get(2); String joined1Schema = "{(bag1::k: chararray,bag1::v: chararray,bag2::k: chararray,bag2::v: chararray,bag3::k3: chararray,bag3::v3: chararray)}"; String joined2Schema = "{(bag1::k: chararray,bag1::v: chararray,bag3::k3: chararray,bag3::v3: chararray,bag2::k: chararray,bag2::v: chararray)}"; String expectedJoined1 = "{(K1,A1,K1,A2,K1,A3),(K2,B1,K2,B2,,),(K2,B1,K2,B22,,),(K3,C1,,,K3,C3),(,,,,K4,D3)}"; String expectedJoined2 = "{(K1,A1,K1,A3,K1,A2),(K2,B1,,,K2,B2),(K2,B1,,,K2,B22),(K3,C1,K3,C3,,),(,,K4,D3,,)}"; // compare sorted bags because there is no guarantee on the order assertEquals(getSortedBag(joined1).toString(),getSortedBag(expectedJoined1, joined1Schema).toString()); assertEquals(getSortedBag(joined2).toString(),getSortedBag(expectedJoined2, joined2Schema).toString()); }
Example 17
Source File: CubeDimensions.java From Cubert with Apache License 2.0 | 5 votes |
public DimensionKey extractDimensionKey(Tuple tuple) throws ExecException { int[] array = key.getArray(); for (int i = 0; i < inputIndex.length; i++) { Object dim = tuple.get(inputIndex[i]); if (dim == null) throw new RuntimeException("Dimension is null for tuple " + tuple); switch (dimensionTypes[i]) { case BOOLEAN: array[dimensionOffsets[i]] = ((Boolean) dim) ? 1 : 0; break; case INT: array[dimensionOffsets[i]] = ((Number) dim).intValue(); break; case LONG: long val = ((Number) dim).longValue(); array[dimensionOffsets[i]] = (int) (val >> 32); // upper 32 bits array[dimensionOffsets[i] + 1] = (int) val; // lower 32 bits break; case STRING: CodeDictionary dict = dictionaries[i]; int code = dict.getCodeForKey((String) dim); if (code == -1) code = dict.addKey((String) dim); array[dimensionOffsets[i]] = code; break; default: throw new RuntimeException("Type of dimension is not INT, LONG or STRING for tuple " + tuple + " at col " + i); } } return key; }
Example 18
Source File: GetMilliSecond.java From spork with Apache License 2.0 | 5 votes |
@Override public Integer exec(Tuple input) throws IOException { if (input == null || input.size() < 1 || input.get(0) == null) { return null; } return ((DateTime) input.get(0)).getMillisOfSecond(); }
Example 19
Source File: IntVAR.java From datafu with Apache License 2.0 | 4 votes |
static protected Tuple combine(DataBag values) throws ExecException{ long sum = 0; long sumSquare = 0; long totalCount = 0; // combine is called from Intermediate and Final // In either case, Initial would have been called // before and would have sent in valid tuples // Hence we don't need to check if incoming bag // is empty Tuple output = mTupleFactory.newTuple(3); boolean sawNonNull = false; for (Iterator<Tuple> it = values.iterator(); it.hasNext();) { Tuple t = it.next(); Long i = (Long)t.get(0); Long iSquare = (Long)t.get(1); Long count = (Long)t.get(2); // we count nulls in var as contributing 0 // a departure from SQL for performance of // COUNT() which implemented by just inspecting // size of the bag if(i == null) { i = (long)0; iSquare = (long)0; } else { sawNonNull = true; } sum += i; sumSquare += iSquare; totalCount += count; } if(sawNonNull) { output.set(0, new Long(sum)); output.set(1, new Long(sumSquare)); } else { output.set(0, null); output.set(1, null); } output.set(2, Long.valueOf(totalCount)); return output; }
Example 20
Source File: MedianFlattenOperator.java From Cubert with Apache License 2.0 | 4 votes |
private Tuple tupleFlatten(Tuple inTuple) throws ExecException { int outputSchemaSize = schema.getNumColumns(); Tuple outTuple = TupleFactory.getInstance().newTuple(outputSchemaSize); // last column of inTuple is bag for (int i = 0; i < inTuple.size() - 1; i++) { outTuple.set(i, inTuple.get(i)); } // outputSchemaSize is 1 greater than inputSchemaSize, and tuple zero indexed, so // -2 DataBag bag = (DataBag) inTuple.get(outputSchemaSize - 2); Iterator<Tuple> bagIterator = bag.iterator(); Tuple firstTuple = bagIterator.next(); if (firstTuple == null) { throw new RuntimeException("Bag should not be empty"); } outTuple.set(outputSchemaSize - 2, firstTuple.get(0)); outTuple.set(outputSchemaSize - 1, firstTuple.get(1)); if (bagIterator.hasNext()) { Tuple secondTuple = bagIterator.next(); secondOutput = TupleFactory.getInstance().newTuple(outputSchemaSize); // last column of inTuple is bag for (int i = 0; i < inTuple.size() - 1; i++) { secondOutput.set(i, inTuple.get(i)); } secondOutput.set(outputSchemaSize - 2, secondTuple.get(0)); secondOutput.set(outputSchemaSize - 1, secondTuple.get(1)); } return outTuple; /* * Tuple outTuple = TupleFactory.getInstance().newTuple(4); * * outTuple.set(0, inTuple.get(0)); outTuple.set(1, inTuple.get(1)); * * DataBag bag = (DataBag) inTuple.get(2); Iterator<Tuple> bagIterator = * bag.iterator(); Tuple firstTuple = bagIterator.next(); * * if (firstTuple == null) { throw new RuntimeException * ("Bag should not be empty"); } * * outTuple.set(2, firstTuple.get(0)); outTuple.set(3, firstTuple.get(1)); * * // case of two outputs if (bagIterator.hasNext()) { Tuple secondTuple = * bagIterator.next(); secondOutput = TupleFactory.getInstance().newTuple(4); * secondOutput.set(0, inTuple.get(0)); secondOutput.set(1, inTuple.get(1)); * secondOutput.set(2, secondTuple.get(0)); secondOutput.set(3, * secondTuple.get(1)); } * * return outTuple; */ }