org.apache.pig.data.Tuple Java Examples
The following examples show how to use
org.apache.pig.data.Tuple.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ROUND_TO.java From spork with Apache License 2.0 | 6 votes |
/** * java level API * @param input expects a numeric value to round, a number of digits to keep, and an optional rounding mode. * @return output returns a single numeric value, the number with only those digits retained */ @Override public Double exec(Tuple input) throws IOException { if (input == null || input.size() < 2) return null; try { Double num = DataType.toDouble(input.get(0)); Integer digits = DataType.toInteger(input.get(1)); RoundingMode mode = (input.size() >= 3) ? RoundingMode.valueOf(DataType.toInteger(input.get(2))) : RoundingMode.HALF_EVEN; if (num == null) return null; BigDecimal bdnum = BigDecimal.valueOf(num); bdnum = bdnum.setScale(digits, mode); return bdnum.doubleValue(); } catch (NumberFormatException nfe){ System.err.println("Failed to process input; error - " + nfe.getMessage()); return null; } catch (Exception e){ throw new IOException("Caught exception processing input row ", e); } }
Example #2
Source File: TestTuple.java From spork with Apache License 2.0 | 6 votes |
@Test public void testToDelimitedString() { Tuple t = mTupleFactory.newTuple(); t.append(new Integer(1)); t.append(new Long(2)); t.append(new Float(1.1f)); t.append(new Double(2.3)); t.append("howdy howdy howdy"); t.append(null); t.append("woah there"); t.append(new Double(2000000.3000000001)); t.append(new Float(1000000000.1000001f)); t.append(new Long(2001010101)); t.append(new Integer(100010101)); try { String expected = "1,2,1.1,2.3,howdy howdy howdy,,woah there,2000000.3,1.0E9,2001010101,100010101"; assertEquals(expected, t.toDelimitedString(",")); } catch (Exception e) { throw new RuntimeException(e); } }
Example #3
Source File: StorageUtil.java From spork with Apache License 2.0 | 6 votes |
/** * Transform bytes from a byte array up to the specified length to a <code>Tuple</code> * * @param buf the byte array * @param length number of bytes to consume from the byte array * @param fieldDel the field delimiter * @return tuple constructed from the bytes */ public static Tuple bytesToTuple(byte[] buf, int offset, int length, byte fieldDel) { int start = offset; ArrayList<Object> protoTuple = new ArrayList<Object>(); for (int i = offset; i < length; i++) { if (buf[i] == fieldDel) { readField(protoTuple, buf, start, i); start = i + 1; } } // pick up the last field if (start <= length) { readField(protoTuple, buf, start, length); } return TupleFactory.getInstance().newTupleNoCopy(protoTuple); }
Example #4
Source File: BagCount.java From spork with Apache License 2.0 | 6 votes |
public Integer exec(Tuple tuple) throws IOException { DataBag databag = (DataBag)tuple.get(0); if(databag == null) { return new Integer(0); } int count = 0; Iterator<Tuple> iterator = databag.iterator(); while(iterator.hasNext()) { iterator.next(); count++; } return new Integer(count); }
Example #5
Source File: TestRank1.java From spork with Apache License 2.0 | 6 votes |
@Test public void testRank07RankBy() throws IOException { String query = "A = LOAD 'test01' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);" + "C = rank A by f1..f3;" + "store C into 'result' using mock.Storage();"; Util.registerMultiLineQuery(pigServer, query); Set<Tuple> expected = ImmutableSet.of( tf.newTuple(ImmutableList.of((long) 1, "A", 1, "N")), tf.newTuple(ImmutableList.of((long) 2, "B", 2, "N")), tf.newTuple(ImmutableList.of((long) 3, "C", 3, "M")), tf.newTuple(ImmutableList.of((long) 4, "D", 4, "P")), tf.newTuple(ImmutableList.of((long) 5, "E", 4, "Q")), tf.newTuple(ImmutableList.of((long) 5, "E", 4, "Q")), tf.newTuple(ImmutableList.of((long) 7, "F", 7, "Q")), tf.newTuple(ImmutableList.of((long) 8, "F", 8, "Q")), tf.newTuple(ImmutableList.of((long) 8, "F", 8, "Q")), tf.newTuple(ImmutableList.of((long) 10, "F", 8, "T")), tf.newTuple(ImmutableList.of((long) 11, "G", 10, "V"))); verifyExpected(data.get("result"), expected); }
Example #6
Source File: ReverseEnumerate.java From datafu with Apache License 2.0 | 6 votes |
public DataBag call(DataBag inputBag) throws IOException { DataBag outputBag = BagFactory.getInstance().newDefaultBag(); long i = start, count = 0; i = inputBag.size() - 1 + start; for (Tuple t : inputBag) { Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll()); t1.append(i); outputBag.add(t1); if (count % 1000000 == 0) { outputBag.spill(); count = 0; } i--; count++; } return outputBag; }
Example #7
Source File: DateTimeMax.java From spork with Apache License 2.0 | 6 votes |
@Override public void accumulate(Tuple b) throws IOException { try { DateTime curMax = max(b); if (curMax == null) { return; } // check curMax if (intermediateMax == null || curMax.isAfter(intermediateMax)) { intermediateMax = curMax; } } catch (ExecException ee) { throw ee; } catch (Exception e) { int errCode = 2106; String msg = "Error while computing max in " + this.getClass().getSimpleName(); throw new ExecException(msg, errCode, PigException.BUG, e); } }
Example #8
Source File: PerfTest.java From parquet-mr with Apache License 2.0 | 6 votes |
private static void load(String out, int colsToLoad) throws ExecException, IOException { long t0 = System.currentTimeMillis(); StringBuilder schemaString = new StringBuilder("a0: chararray"); for (int i = 1; i < colsToLoad; i++) { schemaString.append(", a" + i + ": chararray"); } PigServer pigServer = new PigServer(ExecType.LOCAL); pigServer.registerQuery("B = LOAD '"+out+"' USING "+ParquetLoader.class.getName()+"('"+schemaString+"');"); pigServer.registerQuery("C = FOREACH (GROUP B ALL) GENERATE COUNT(B);"); Iterator<Tuple> it = pigServer.openIterator("C"); if (!it.hasNext()) { throw new RuntimeException("Job failed: no tuple to read"); } Long count = (Long)it.next().get(0); assertEquals(ROW_COUNT, count.longValue()); long t1 = System.currentTimeMillis(); results.append((t1-t0)+" ms to read "+colsToLoad+" columns\n"); }
Example #9
Source File: RAD_Test.java From Surus with Apache License 2.0 | 6 votes |
private Boolean approximateCompareBags(DataBag inputBag1, DataBag inputBag2) throws ExecException { // Hardcode Acceptable Error double errorLimit = 0.0000001; Iterator<Tuple> iter1 = inputBag1.iterator(); Iterator<Tuple> iter2 = inputBag2.iterator(); while (iter1.hasNext()) { Tuple tuple1 = iter1.next(); Tuple tuple2 = iter2.next(); // Check error if (Math.abs((Double) tuple1.get(0) - (Double) tuple2.get(0)) > errorLimit) return false; // TODO: Add unit test for differenced case //if (Math.abs((Double) tuple1.get(1) - (Double) tuple2.get(1)) > errorLimit) return false; if (Math.abs((Double) tuple1.get(2) - (Double) tuple2.get(2)) > errorLimit) return false; if (Math.abs((Double) tuple1.get(3) - (Double) tuple2.get(3)) > errorLimit) return false; if (Math.abs((Double) tuple1.get(4) - (Double) tuple2.get(4)) > errorLimit) return false; } return true; }
Example #10
Source File: DoubleCopySign.java From spork with Apache License 2.0 | 6 votes |
/** * java level API * @param input expects a tuple containing two numeric DataAtom value * @param output returns a single numeric DataAtom value, which is * first floating-point argument with the sign of the second * floating-point argument. */ @Override public Double exec(Tuple input) throws IOException { if (input == null || input.size() < 2) return null; if (input.get(0) == null || input.get(1) == null) { return null; } try{ double first = (Double)input.get(0); double second = (Double)input.get(1); return Math.copySign(first, second); } catch(Exception e){ throw new IOException("Caught exception processing input row ", e); } }
Example #11
Source File: TestOrcStorage.java From spork with Apache License 2.0 | 6 votes |
@Test public void testLoadStoreMoreDataType() throws Exception { pigServer.registerQuery("A = load '" + basedir + "orc-file-11-format.orc'" + " using OrcStorage();" ); pigServer.registerQuery("B = foreach A generate boolean1..double1, '' as bytes1, string1..;"); pigServer.store("B", OUTPUT4, "OrcStorage"); // A bug in ORC InputFormat does not allow empty file in input directory fs.delete(new Path(OUTPUT4, "_SUCCESS"), true); pigServer.registerQuery("A = load '" + OUTPUT4 + "' using OrcStorage();" ); Iterator<Tuple> iter = pigServer.openIterator("A"); Tuple t = iter.next(); assertTrue(t.toString().startsWith("(false,1,1024,65536,9223372036854775807,1.0,-15.0," + ",hi,({(1,bye),(2,sigh)}),{(3,good),(4,bad)},[],")); assertTrue(t.get(12).toString().matches("2000-03-12T15:00:00.000.*")); assertTrue(t.toString().endsWith(",12345678.6547456)")); }
Example #12
Source File: TestProject.java From spork with Apache License 2.0 | 6 votes |
@Test public void testNullTupleCols() throws Exception { String inputFileName = "TestProject-testNullTupleCols-input.txt"; String input[] = { "1\t(hello,world)", "2\t(good)", "3" }; Util.createLocalInputFile(inputFileName, input); // PigStorage will return null as the value for the tuple field in the // second record since it does not comply with the schema and in the // third record since the field is absent String query = "a = load '" + inputFileName + "' as (i:int, " + "t:tuple(s1:chararray, s2:chararray));" + "b = foreach a generate t.s1, t.s2;"; PigServer ps = new PigServer(ExecType.LOCAL); Util.registerMultiLineQuery(ps, query); Iterator<Tuple> it = ps.openIterator("b"); Tuple[] expectedResults = new Tuple[] { (Tuple)Util.getPigConstant("('hello', 'world')"), (Tuple)Util.getPigConstant("(null, null)"), (Tuple)Util.getPigConstant("(null, null)") }; int i = 0; while (it.hasNext()) { assertEquals(expectedResults[i++], it.next()); } }
Example #13
Source File: AlgebraicBigDecimalMathBase.java From spork with Apache License 2.0 | 6 votes |
@Override public void accumulate(Tuple b) throws IOException { try { BigDecimal curVal = doTupleWork(b, opProvider); if (curVal == null) { return; } if (intermediateVal == null) { intermediateVal = getSeed(opProvider.getOp()); } intermediateVal = doWork(intermediateVal, curVal, opProvider.getOp()); } catch (ExecException ee) { throw ee; } catch (Exception e) { int errCode = 2106; throw new ExecException("Error executing function on BigDecimal", errCode, PigException.BUG, e); } }
Example #14
Source File: TestEvalPipeline.java From spork with Apache License 2.0 | 6 votes |
@Test public void testJoin() throws Exception{ File f1 = Util.createFile(new String[]{"a:1","b:1","a:1"}); File f2 = Util.createFile(new String[]{"b","b","a"}); pigServer.registerQuery("a = load '" + Util.generateURI(f1.toString(), pigContext) + "' using " + PigStorage.class.getName() + "(':');"); pigServer.registerQuery("b = load '" + Util.generateURI(f2.toString(), pigContext) + "';"); pigServer.registerQuery("c = cogroup a by $0, b by $0;"); pigServer.registerQuery("d = foreach c generate flatten($1),flatten($2);"); Iterator<Tuple> iter = pigServer.openIterator("d"); int count = 0; while(iter.hasNext()){ Tuple t = iter.next(); Assert.assertTrue(t.get(0).toString().equals(t.get(2).toString())); count++; } Assert.assertEquals(count, 4); }
Example #15
Source File: ThriftColumnFamilyTest.java From stratio-cassandra with Apache License 2.0 | 6 votes |
@Test public void testCassandraStorageCounterCF() throws IOException, ClassNotFoundException, TException, TimedOutException, NotFoundException, InvalidRequestException, NoSuchFieldException, UnavailableException, IllegalAccessException, InstantiationException, AuthenticationException, AuthorizationException { pig.registerQuery("rows = LOAD 'cassandra://thriftKs/SomeApp?" + defaultParameters + "' USING CassandraStorage();"); //Test counter column family support pig.registerQuery("CC = load 'cassandra://thriftKs/CC?" + defaultParameters + "' using CassandraStorage();"); pig.registerQuery("total_hits = foreach CC generate key, SUM(columns.value);"); //(chuck,4) Iterator<Tuple> it = pig.openIterator("total_hits"); if (it.hasNext()) { Tuple t = it.next(); Assert.assertEquals(t.get(0), "chuck"); Assert.assertEquals(t.get(1), 4l); } }
Example #16
Source File: ReservoirSample.java From datafu with Apache License 2.0 | 6 votes |
@Override public DataBag exec(Tuple input) throws IOException { getReservoir().clear(); DataBag bagOfSamples = (DataBag) input.get(0); for (Tuple innerTuple : bagOfSamples) { DataBag samples = (DataBag) innerTuple.get(0); for (Tuple sample : samples) { // use the same score as previously generated getReservoir().consider(ScoredTuple.fromIntermediateTuple(sample)); } } DataBag output = BagFactory.getInstance().newDefaultBag(); for (ScoredTuple scoredTuple : getReservoir()) { // output the original tuple output.add(scoredTuple.getTuple()); } return output; }
Example #17
Source File: LitePackager.java From spork with Apache License 2.0 | 6 votes |
@Override public Tuple illustratorMarkup(Object in, Object out, int eqClassIndex) { if (illustrator != null) { ExampleTuple tOut = new ExampleTuple((Tuple) out); LineageTracer lineageTracer = illustrator.getLineage(); lineageTracer.insert(tOut); if (illustrator.getEquivalenceClasses() == null) { LinkedList<IdentityHashSet<Tuple>> equivalenceClasses = new LinkedList<IdentityHashSet<Tuple>>(); for (int i = 0; i < numInputs; ++i) { IdentityHashSet<Tuple> equivalenceClass = new IdentityHashSet<Tuple>(); equivalenceClasses.add(equivalenceClass); } illustrator.setEquivalenceClasses(equivalenceClasses, parent); } illustrator.getEquivalenceClasses().get(eqClassIndex).add(tOut); tOut.synthetic = false; // not expect this to be really used illustrator.addData((Tuple) tOut); return tOut; } else return (Tuple) out; }
Example #18
Source File: AlgebraicLongMathBase.java From spork with Apache License 2.0 | 6 votes |
@Override public void accumulate(Tuple b) throws IOException { try { Long curVal = doTupleWork(b, opProvider); if (curVal == null) { return; } if (intermediateVal == null) { intermediateVal = getSeed(opProvider.getOp()); } intermediateVal = doWork(intermediateVal, curVal, opProvider.getOp()); } catch (ExecException ee) { throw ee; } catch (Exception e) { int errCode = 2106; throw new ExecException("Error executing function on Longs", errCode, PigException.BUG, e); } }
Example #19
Source File: AlgebraicFloatMathBase.java From spork with Apache License 2.0 | 5 votes |
@Override public Tuple exec(Tuple input) throws IOException { try { return tfact.newTuple(doTupleWork(input, this)); } catch (ExecException ee) { throw ee; } catch (Exception e) { int errCode = 2106; throw new ExecException("Error executing function on Floats", errCode, PigException.BUG, e); } }
Example #20
Source File: TestPOSort.java From spork with Apache License 2.0 | 5 votes |
public void poSortAscInt( DataBag input) throws ExecException { List<PhysicalPlan> sortPlans = new LinkedList<PhysicalPlan>(); POProject pr1 = new POProject(new OperatorKey("", r.nextLong()), -1, 1); pr1.setResultType(DataType.INTEGER); PhysicalPlan expPlan = new PhysicalPlan(); expPlan.add(pr1); sortPlans.add(expPlan); List<Boolean> mAscCols = new LinkedList<Boolean>(); mAscCols.add(true); PORead read = new PORead(new OperatorKey("", r.nextLong()), input); List<PhysicalOperator> inputs = new LinkedList<PhysicalOperator>(); inputs.add(read); POSort sort = new POSort(new OperatorKey("", r.nextLong()), -1, inputs, sortPlans, mAscCols, null); Tuple t = null; Result res1 = sort.getNextTuple(); // System.out.println(res1.result); Result res2 = sort.getNextTuple(); while (res2.returnStatus != POStatus.STATUS_EOP) { Object i1 = ((Tuple) res1.result).get(1); Object i2 = ((Tuple) res2.result).get(1); int i = DataType.compare(i1, i2); assertEquals(true, (i <= 0)); // System.out.println(res2.result); res1 = res2; res2 = sort.getNextTuple(); } }
Example #21
Source File: LongAvg.java From spork with Apache License 2.0 | 5 votes |
static protected Tuple combine(DataBag values) throws ExecException { long sum = 0; long count = 0; // combine is called from Intermediate and Final // In either case, Initial would have been called // before and would have sent in valid tuples // Hence we don't need to check if incoming bag // is empty Tuple output = mTupleFactory.newTuple(2); boolean sawNonNull = false; for (Iterator<Tuple> it = values.iterator(); it.hasNext();) { Tuple t = it.next(); Long l = (Long)t.get(0); // we count nulls in avg as contributing 0 // a departure from SQL for performance of // COUNT() which implemented by just inspecting // size of the bag if(l == null) { l = 0L; } else { sawNonNull = true; } sum += l; count += (Long)t.get(1); } if(sawNonNull) { output.set(0, Long.valueOf(sum)); } else { output.set(0, null); } output.set(1, Long.valueOf(count)); return output; }
Example #22
Source File: IntAvg.java From spork with Apache License 2.0 | 5 votes |
@Override public Tuple exec(Tuple input) throws IOException { try { DataBag b = (DataBag)input.get(0); return combine(b); } catch (ExecException ee) { throw ee; } catch (Exception e) { int errCode = 2106; String msg = "Error while computing average in " + this.getClass().getSimpleName(); throw new ExecException(msg, errCode, PigException.BUG, e); } }
Example #23
Source File: SerializedTupleStore.java From Cubert with Apache License 2.0 | 5 votes |
public SerializedTupleStore(BlockSchema schema,String[] comparatorKeys) throws IOException { this.schema = schema; this.comparatorKeys = comparatorKeys; this.createOffsetList = (comparatorKeys != null); this.pbaos = new PagedByteArrayOutputStream(CHUNK_SIZE); if (PhaseContext.getConf().getBoolean(CubertStrings.USE_COMPACT_SERIALIZATION, false) && schema.isFlatSchema()) { serializer = new CompactSerializer<Tuple>(schema); writablesDeserializer = new CompactDeserializer<Tuple>(schema); deserializer = new CompactDeserializer<Tuple>(schema); } else { serializer = new DefaultTupleSerializer(); deserializer = new DefaultTupleDeserializer(); writablesDeserializer = deserializer; } serializer.open(pbaos); if (createOffsetList) { startOffsetList = new ArrayList<Integer>(); keyIndices = new int[comparatorKeys.length]; for (int i = 0; i < keyIndices.length; i++) keyIndices[i] = schema.getIndex(comparatorKeys[i]); } reader = new SerializedTupleStoreReader(pbaos.getPagedByteArray(), true); }
Example #24
Source File: ToDate2ARGS.java From spork with Apache License 2.0 | 5 votes |
public DateTime exec(Tuple input) throws IOException { if (input == null || input.size() < 1 || input.get(0) == null) { return null; } String dtStr = DataType.toString(input.get(0)); //DateTimeZone dtz = extractDateTimeZone(dtStr); //The timezone in the customized format is not predictable DateTimeFormatter dtf = DateTimeFormat.forPattern(DataType .toString(input.get(1))); //if (dtz == null) { return dtf.parseDateTime(dtStr); //} else { // return dtf.withZone(dtz).parseDateTime(dtStr); //} }
Example #25
Source File: JsFunction.java From spork with Apache License 2.0 | 5 votes |
private DataBag jsToPigBag(Scriptable array, Schema schema, int depth) throws FrontendException, ExecException { debugConvertJSToPig(depth, "Bag", array, schema); if (schema.size() == 1 && schema.getField(0).type == DataType.TUPLE) { schema = schema.getField(0).schema; } List<Tuple> bag = new ArrayList<Tuple>(); for (Object id : array.getIds()) { Scriptable arrayValue = (Scriptable)array.get(((Integer)id).intValue(), null); bag.add(jsToPigTuple(arrayValue, schema, depth + 1)); } DataBag result = BagFactory.getInstance().newDefaultBag(bag); debugReturn(depth, result); return result; }
Example #26
Source File: IndexedStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public RecordWriter<WritableComparable, Tuple> getRecordWriter( TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); Path file = this.getDefaultWorkFile(context, ""); FSDataOutputStream fileOut = fs.create(file, false); IndexManager indexManager = new IndexManager(offsetsToIndexKeys); indexManager.createIndexFile(fs, file); return new IndexedStorageRecordWriter(fileOut, this.fieldDelimiter, indexManager); }
Example #27
Source File: LSHPigTest.java From datafu with Apache License 2.0 | 5 votes |
@Test public void testL1UDFSparse() throws Exception { setMemorySettings(); RandomGenerator rg = new JDKRandomGenerator(); rg.setSeed(0); RandomData rd = new RandomDataImpl(rg); int n = 1000; List<RealVector> vectors = LSHTest.getVectors(rd, 1000, n); PigTest test = createPigTestFromString(l1SparseTest); writeLinesToFile("input", getSparseLines(vectors)); List<RealVector> queries = LSHTest.getVectors(rd, 1000, 10); writeLinesToFile("queries", getSparseLines(queries)); test.runScript(); List<Tuple> neighbors = this.getLinesForAlias(test, "NEIGHBOR_CNT"); Assert.assertEquals( queries.size(), neighbors.size() ); for(long cnt : getCounts(neighbors)) { Assert.assertTrue(cnt >= 3); } Distance d = new Distance() { @Override public double distance(RealVector v1, RealVector v2) { return L1.distance(v1, v2); } }; verifyPoints(neighbors, d, 1000); }
Example #28
Source File: PigMapReduceCounter.java From spork with Apache License 2.0 | 5 votes |
/** * While tuples are collected, they are counted one by one by a global counter per task. **/ @Override public void collect(Context context, Tuple tuple) throws InterruptedException, IOException { context.write(null, tuple); PigStatusReporter reporter = PigStatusReporter.getInstance(); if (reporter != null) { reporter.incrCounter( JobControlCompiler.PIG_MAP_RANK_NAME + context.getJobID().toString(), taskID, 1); } }
Example #29
Source File: TestOrderBy2.java From spork with Apache License 2.0 | 5 votes |
/*** * For generating a sample dataset */ private List<Tuple> genDataSetFile3() throws IOException { int dataLength = 256; List<Tuple> tuples = Lists.newArrayList(); DecimalFormat formatter = new DecimalFormat("0000000"); for (int i = 0; i < dataLength; i++) { tuples.add(tuple(formatter.format(i), formatter.format(i % 20))); } return tuples; }
Example #30
Source File: POPartialAgg.java From spork with Apache License 2.0 | 5 votes |
/** * For each entry in rawInputMap, feed the list of tuples into the aggregator funcs * and add the results to processedInputMap. Remove the entries from rawInputMap as we go. * @throws ExecException */ private int aggregate(Map<Object, List<Tuple>> fromMap, Map<Object, List<Tuple>> toMap, int numEntriesInTarget) throws ExecException { Iterator<Map.Entry<Object, List<Tuple>>> iter = fromMap.entrySet().iterator(); while (iter.hasNext()) { Map.Entry<Object, List<Tuple>> entry = iter.next(); Tuple valueTuple = createValueTuple(entry.getKey(), entry.getValue()); Result res = getOutput(entry.getKey(), valueTuple); iter.remove(); addKeyValToMap(toMap, entry.getKey(), getAggResultTuple(res.result)); numEntriesInTarget++; } return numEntriesInTarget; }