Java Code Examples for org.apache.pig.data.Tuple#size()
The following examples show how to use
org.apache.pig.data.Tuple#size() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestXMLLoader.java From spork with Apache License 2.0 | 6 votes |
public void testXMLLoaderShouldSupportNestedTagWithSameName() throws Exception { String filename = TestHelper.createTempFile(nestedTags, ""); PigServer pig = new PigServer(LOCAL); filename = filename.replace("\\", "\\\\"); String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('event') as (doc:chararray);"; pig.registerQuery(query); Iterator<?> it = pig.openIterator("A"); int tupleCount = 0; while (it.hasNext()) { Tuple tuple = (Tuple) it.next(); if (tuple == null) break; else { if (tuple.size() > 0) { tupleCount++; } } } assertEquals(3, tupleCount); }
Example 2
Source File: SetUnion.java From datafu with Apache License 2.0 | 6 votes |
@Override public DataBag exec(Tuple input) throws IOException { DataBag outputBag = bagFactory.newDistinctBag(); try { for (int i=0; i < input.size(); i++) { Object o = input.get(i); if (!(o instanceof DataBag)) throw new RuntimeException("parameters must be databags"); DataBag inputBag = (DataBag) o; for (Tuple elem : inputBag) { outputBag.add(elem); } } return outputBag; } catch (Exception e) { throw new IOException(e); } }
Example 3
Source File: VespaDocumentOperation.java From vespa with Apache License 2.0 | 6 votes |
private static boolean shouldWriteTupleStart(Tuple tuple, String name, Properties properties) { if (tuple.size() > 1 || properties == null) { return true; } String simpleArrayFields = properties.getProperty(SIMPLE_ARRAY_FIELDS); if (simpleArrayFields == null) { return true; } if (simpleArrayFields.equals("*")) { return false; } String[] fields = simpleArrayFields.split(","); for (String field : fields) { if (field.trim().equalsIgnoreCase(name)) { return false; } } return true; }
Example 4
Source File: BuildBloom.java From spork with Apache License 2.0 | 6 votes |
@Override public Tuple exec(Tuple input) throws IOException { if (input == null || input.size() == 0) return null; // Strip off the initial level of bag DataBag values = (DataBag)input.get(0); Iterator<Tuple> it = values.iterator(); Tuple t = it.next(); // If the input tuple has only one field, then we'll extract // that field and serialize it into a key. If it has multiple // fields, we'll serialize the whole tuple. byte[] b; if (t.size() == 1) b = DataType.toBytes(t.get(0)); else b = DataType.toBytes(t, DataType.TUPLE); Key k = new Key(b); filter = new BloomFilter(vSize, numHash, hType); filter.add(k); return TupleFactory.getInstance().newTuple(bloomOut()); }
Example 5
Source File: TransposeTupleToBag.java From datafu with Apache License 2.0 | 6 votes |
@Override public DataBag exec(Tuple input) throws IOException { // initialize a reverse mapping HashMap<Integer, String> positionToAlias = new HashMap<Integer, String>(); for (String alias : getFieldAliases().keySet()) { positionToAlias.put(getFieldAliases().get(alias), alias); } DataBag output = BagFactory.getInstance().newDefaultBag(); for (int i=0; i<input.size(); i++) { Tuple tuple = TupleFactory.getInstance().newTuple(); tuple.append(positionToAlias.get(i)); tuple.append(input.get(i)); output.add(tuple); } return output; }
Example 6
Source File: ROUND.java From spork with Apache License 2.0 | 6 votes |
/** * java level API * @param input expects a single numeric value * @param output returns a single numeric value, * the closest long to the argument */ @Override public Long exec(Tuple input) throws IOException { if (input == null || input.size() == 0 || input.get(0) == null) return null; try{ Double d = DataType.toDouble(input.get(0)); return Math.round(d); } catch (NumberFormatException nfe){ System.err.println("Failed to process input; error - " + nfe.getMessage()); return null; } catch (Exception e){ throw new IOException("Caught exception processing input row ", e); } }
Example 7
Source File: DoubleNextup.java From spork with Apache License 2.0 | 5 votes |
/** * java level API * @param input expects a single numeric value * @param output returns a single numeric value, nextup value of the argument */ public Double exec(Tuple input) throws IOException { if (input == null || input.size() == 0 || input.get(0) == null) return null; Double d; try{ d = (Double)input.get(0); } catch (Exception e){ throw new IOException("Caught exception processing input row ", e); } return Math.nextUp(d); }
Example 8
Source File: DIFF.java From spork with Apache License 2.0 | 5 votes |
/** * Compares a tuple with two fields. Emits any differences. * @param input a tuple with exactly two fields. * @throws IOException if there are not exactly two fields in a tuple */ @Override public DataBag exec(Tuple input) throws IOException { if (input.size() != 2) { int errCode = 2107; String msg = "DIFF expected two inputs but received " + input.size() + " inputs."; throw new ExecException(msg, errCode, PigException.BUG); } try { DataBag output = mBagFactory.newDefaultBag(); Object o1 = input.get(0); if (o1 instanceof DataBag) { DataBag bag1 = (DataBag)o1; DataBag bag2 = (DataBag)input.get(1); computeDiff(bag1, bag2, output); } else { Object d1 = input.get(0); Object d2 = input.get(1); if (!d1.equals(d2)) { output.add(mTupleFactory.newTuple(d1)); output.add(mTupleFactory.newTuple(d2)); } } return output; } catch (ExecException ee) { throw ee; } }
Example 9
Source File: LTRIM.java From spork with Apache License 2.0 | 5 votes |
@Override public String exec(Tuple input) throws IOException { if (input == null || input.size() == 0) { return null; } try { String str = (String) input.get(0); if (str == null) return null; if (str.length() == 0) return str; return str.replaceFirst("^ +", ""); } catch (ExecException e) { warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1); return null; } }
Example 10
Source File: FloatVAR.java From datafu with Apache License 2.0 | 5 votes |
static protected long count(Tuple input) throws ExecException { DataBag values = (DataBag)input.get(0); long cnt = 0; Iterator<Tuple> it = values.iterator(); while (it.hasNext()){ Tuple t = (Tuple)it.next(); if (t != null && t.size() > 0 && t.get(0) != null) cnt ++; } return cnt; }
Example 11
Source File: ExtractHour.java From spork with Apache License 2.0 | 5 votes |
public String exec(Tuple input) throws IOException { if (input == null || input.size() == 0) return null; try{ String timestamp = (String)input.get(0); return timestamp.substring(6, 8); }catch(Exception e){ System.err.println("ExtractHour: failed to proces input; error - " + e.getMessage()); return null; } }
Example 12
Source File: TestUDF.java From spork with Apache License 2.0 | 5 votes |
@Override public Integer exec(Tuple input) throws IOException { int res = 0; if (input == null || input.size() == 0) { return res; } for (int i = 0; i < input.size(); i++) { res += (Integer)input.get(i); } return res; }
Example 13
Source File: WeightedRangePartitioner.java From spork with Apache License 2.0 | 5 votes |
/** * @param value * @return * @throws ExecException */ protected float[] getProbVec(Tuple values) throws ExecException { float[] probVec = new float[values.size()]; for(int i = 0; i < values.size(); i++) { probVec[i] = (Float)values.get(i); } return probVec; }
Example 14
Source File: UCFIRST.java From spork with Apache License 2.0 | 5 votes |
@Override public String exec(Tuple input) throws IOException { if (input == null || input.size() == 0) { return null; } try { String str = (String) input.get(0); if (str == null) return null; if (str.length() == 0) return str; return Character.toUpperCase(str.charAt(0))+str.substring(1); } catch (ExecException e) { warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1); return null; } }
Example 15
Source File: TestXMLLoader.java From spork with Apache License 2.0 | 5 votes |
public void testXMLLoaderShouldLoadBasicBzip2Files() throws Exception { String filename = TestHelper.createTempFile(data, ""); Process bzipProc = Runtime.getRuntime().exec("bzip2 "+filename); int waitFor = bzipProc.waitFor(); if(waitFor != 0) { fail ("Failed to create the class"); } filename = filename + ".bz2"; try { PigServer pigServer = new PigServer (ExecType.LOCAL); String loadQuery = "A = LOAD '" + Util.encodeEscape(filename) + "' USING org.apache.pig.piggybank.storage.XMLLoader('property') as (doc:chararray);"; pigServer.registerQuery(loadQuery); Iterator<Tuple> it = pigServer.openIterator("A"); int tupleCount = 0; while (it.hasNext()) { Tuple tuple = (Tuple) it.next(); if (tuple == null) break; else { //TestHelper.examineTuple(expected, tuple, tupleCount); if (tuple.size() > 0) { tupleCount++; } } } assertEquals(2, tupleCount); } finally { new File(filename).delete(); } }
Example 16
Source File: TRIM.java From spork with Apache License 2.0 | 5 votes |
@Override public String exec(Tuple input) throws IOException { if (input == null || input.size() == 0) { return null; } try { String str = (String) input.get(0); if (str == null) return null; if (str.length() == 0) return str; return str.trim(); } catch (ExecException e) { warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1); return null; } }
Example 17
Source File: AllFirstLetter.java From spork with Apache License 2.0 | 5 votes |
public String exec(Tuple input) throws IOException { result = ""; DataBag bag = (DataBag) input.get(0); Iterator<Tuple> it = bag.iterator(); while (it.hasNext()) { Tuple t = it.next(); if (t != null && t.size() > 0 && t.get(0) != null) result += t.get(0).toString().substring(0, 1); } return result; }
Example 18
Source File: SubtractDuration.java From spork with Apache License 2.0 | 5 votes |
@Override public DateTime exec(Tuple input) throws IOException { if (input == null || input.size() < 2 || input.get(0) == null || input.get(1) == null) { return null; } return ((DateTime) input.get(0)).minus(new Period((String) input.get(1))); }
Example 19
Source File: PigQueryInterpreter.java From zeppelin with Apache License 2.0 | 4 votes |
@Override public InterpreterResult interpret(String st, InterpreterContext context) { // '-' is invalid for pig alias String alias = "paragraph_" + context.getParagraphId().replace("-", "_"); String[] lines = st.split("\n"); List<String> queries = new ArrayList<>(); for (int i = 0; i < lines.length; ++i) { if (i == lines.length - 1) { lines[i] = alias + " = " + lines[i]; } queries.add(lines[i]); } StringBuilder resultBuilder = new StringBuilder("%table "); try { pigServer.setJobName(createJobName(st, context)); File tmpScriptFile = PigUtils.createTempPigScript(queries); // each thread should its own ScriptState & PigStats ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState()); // reset PigStats, otherwise you may get the PigStats of last job in the same thread // because PigStats is ThreadLocal variable PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats()); PigScriptListener scriptListener = new PigScriptListener(); ScriptState.get().registerListener(scriptListener); listenerMap.put(context.getParagraphId(), scriptListener); pigServer.registerScript(tmpScriptFile.getAbsolutePath()); Schema schema = pigServer.dumpSchema(alias); boolean schemaKnown = (schema != null); if (schemaKnown) { for (int i = 0; i < schema.size(); ++i) { Schema.FieldSchema field = schema.getField(i); resultBuilder.append(field.alias != null ? field.alias : "col_" + i); if (i != schema.size() - 1) { resultBuilder.append("\t"); } } resultBuilder.append("\n"); } Iterator<Tuple> iter = pigServer.openIterator(alias); boolean firstRow = true; int index = 0; while (iter.hasNext() && index < maxResult) { index++; Tuple tuple = iter.next(); if (firstRow && !schemaKnown) { for (int i = 0; i < tuple.size(); ++i) { resultBuilder.append("c_" + i + "\t"); } resultBuilder.append("\n"); firstRow = false; } resultBuilder.append(StringUtils.join(tuple.iterator(), "\t")); resultBuilder.append("\n"); } if (index >= maxResult && iter.hasNext()) { resultBuilder.append("\n"); resultBuilder.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, MAX_RESULTS)); } } catch (IOException e) { // Extract error in the following order // 1. catch FrontendException, FrontendException happens in the query compilation phase. // 2. catch ParseException for syntax error // 3. PigStats, This is execution error // 4. Other errors. if (e instanceof FrontendException) { FrontendException fe = (FrontendException) e; if (!fe.getMessage().contains("Backend error :")) { LOGGER.error("Fail to run pig query.", e); return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e)); } } if (e.getCause() instanceof ParseException) { return new InterpreterResult(Code.ERROR, e.getMessage()); } PigStats stats = PigStats.get(); if (stats != null) { String errorMsg = stats.getDisplayString(); if (errorMsg != null) { return new InterpreterResult(Code.ERROR, errorMsg); } } LOGGER.error("Fail to run pig query.", e); return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e)); } finally { listenerMap.remove(context.getParagraphId()); } return new InterpreterResult(Code.SUCCESS, resultBuilder.toString()); }
Example 20
Source File: BigIntegerAbs.java From spork with Apache License 2.0 | 4 votes |
@Override public BigInteger exec(Tuple input) throws IOException { if (input == null || input.size() == 0 || input.get(0) == null) return null; return ((BigInteger)input.get(0)).abs(); }