Java Code Examples for org.apache.pig.data.Tuple#append()
The following examples show how to use
org.apache.pig.data.Tuple#append() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestForEachStar.java From spork with Apache License 2.0 | 6 votes |
@Test public void testForeachStarSchemaUnkown() throws IOException, ParserException{ PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INPUT_FILE + "' ;" + "f1 = foreach l1 generate * ;" ; Util.registerMultiLineQuery(pig, query); pig.explain("f1",System.out); Iterator<Tuple> it = pig.openIterator("f1"); Tuple expectedResCharArray = (Tuple)Util.getPigConstant("('one','two')"); Tuple expectedRes = TupleFactory.getInstance().newTuple(); for(Object field : expectedResCharArray.getAll() ){ expectedRes.append(new DataByteArray(field.toString())); } assertTrue("has output", it.hasNext()); assertEquals(expectedRes, it.next()); }
Example 2
Source File: TestBinInterSedes.java From spork with Apache License 2.0 | 6 votes |
@Test public void testTupleWriteReadLongDiffSizes() throws IOException { Random r = new Random(100L); Tuple tuple = TupleFactory.getInstance().newTuple(); tuple.append(new Long(0)); tuple.append(new Long(1)); tuple.append(new Long(-1)); tuple.append(new Long(300)); tuple.append(new Long(600)); tuple.append(new Long(10000)); tuple.append(new Long(-10000)); tuple.append(new Long(5000000000000000000L)); tuple.append(new Long(-5000000000000000000L)); for (int i = 0; i < 100000; i++) { tuple.append(new Long(r.nextLong())); } testTupleSedes(tuple); }
Example 3
Source File: TestBinInterSedes.java From spork with Apache License 2.0 | 5 votes |
/** * test sedes of int of diff sizes * @throws IOException */ @Test public void testTupleWriteReadIntDiffSizes() throws IOException { //create a tuple with integer columns of different sizes Tuple tuple = TupleFactory.getInstance().newTuple(); tuple.append(new Integer(0)); //boolean rep tuple.append(new Integer(1)); //boolean rep tuple.append(new Integer(125)); //fits into byte tuple.append(new Integer(1024)); //fits into short tuple.append(new Integer(1024*1024*1024)); //fits into int (=~ 2 ^30) testTupleSedes(tuple); }
Example 4
Source File: AppendIndex.java From spork with Apache License 2.0 | 5 votes |
@Override public void accumulate(Tuple input) throws IOException { if (interBag == null) { interBag = mBagFactory.newDefaultBag(); ct = 0; } for (Tuple t : (DataBag)input.get(0)) { Tuple t2 = mTupleFactory.newTupleNoCopy(t.getAll()); t2.append(++ct); interBag.add(t2); } }
Example 5
Source File: TestFilterUDF.java From spork with Apache License 2.0 | 5 votes |
@Test public void testFilterUDFusingDefine() throws Exception { File inputFile = createFile( new String[] { "www.paulisageek.com\t4", "www.yahoo.com\t12344", "google.com\t1", "us2.amazon.com\t4141" } ); File filterFile = createFile( new String[] { "12344" } ); pigServer.registerQuery("define FILTER_CRITERION " + FILTERFROMFILE.class.getName() + "('" + Util.generateURI(filterFile.toString(), pigServer .getPigContext()) + "');"); pigServer.registerQuery("a = LOAD '" + Util.generateURI(inputFile.toString(), pigServer .getPigContext()) + "' as (url:chararray, numvisits:int);"); pigServer.registerQuery("b = filter a by FILTER_CRITERION(numvisits);"); Tuple expectedTuple = tf.newTuple(); expectedTuple.append(new String("www.yahoo.com")); expectedTuple.append(new Integer("12344")); Iterator<Tuple> iter = pigServer.openIterator("b"); while (iter.hasNext()) { Tuple t = iter.next(); assertTrue(t.equals(expectedTuple)); } }
Example 6
Source File: TestLocalRearrange.java From spork with Apache License 2.0 | 5 votes |
private void setUp1() throws PlanException, ExecException{ lr = GenPhyOp.topLocalRearrangeOPWithPlanPlain(0,0,db.iterator().next()); POProject proj = GenPhyOp.exprProject(); proj.setColumn(0); proj.setResultType(DataType.TUPLE); proj.setOverloaded(true); Tuple t = new DefaultTuple(); t.append(db); proj.attachInput(t); List<PhysicalOperator> inputs = new ArrayList<PhysicalOperator>(); inputs.add(proj); lr.setInputs(inputs); }
Example 7
Source File: TestBinInterSedes.java From spork with Apache License 2.0 | 5 votes |
private Tuple createTupleWithManyCols(int size) { Tuple t = TupleFactory.getInstance().newTuple(size); Integer col = Integer.valueOf(1); for(int i=0; i<size; i++){ t.append(col); } return t; }
Example 8
Source File: GenRandomData.java From spork with Apache License 2.0 | 5 votes |
public static DataBag genFloatDataBag(Random r, int column, int row) { DataBag db = DefaultBagFactory.getInstance().newDefaultBag(); for (int i=0;i<row;i++) { Tuple t = TupleFactory.getInstance().newTuple(); for (int j=0;j<column;j++) { t.append(r.nextFloat()*1000); } db.add(t); } return db; }
Example 9
Source File: TestTuple.java From spork with Apache License 2.0 | 5 votes |
@Test public void testTupleSizeWithBooleans() { Tuple t = mTupleFactory.newTuple(); t.append(new Boolean(true)); t.append(new Boolean(false)); long size = t.getMemorySize(); assertEquals("tuple size", 120, size); }
Example 10
Source File: TestHelper.java From spork with Apache License 2.0 | 5 votes |
private static Tuple trimTuple(Tuple t){ Tuple ret = TupleFactory.getInstance().newTuple(); for (Object o : t.getAll()) { DataByteArray dba = (DataByteArray)o; DataByteArray nDba = new DataByteArray(dba.toString().trim().getBytes()); ret.append(nDba); } return ret; }
Example 11
Source File: TestToBagToTuple.java From spork with Apache License 2.0 | 5 votes |
@Test public void toTuple() throws Exception{ ToTuple tb = new ToTuple(); Tuple input = TupleFactory.getInstance().newTuple(); for (int i = 0; i < 100; ++i) { input.append(i); } Tuple output = tb.exec(input); Assert.assertFalse(input == output); Assert.assertEquals(input, output); }
Example 12
Source File: TestBinInterSedes.java From spork with Apache License 2.0 | 5 votes |
/** * create bag having given number of tuples * @param size * @return */ private DataBag createBag(int size) { Tuple innerTuple = TupleFactory.getInstance().newTuple(); innerTuple.append(Integer.valueOf(1)); DataBag bag = BagFactory.getInstance().newDefaultBag(); for(int i=0; i<size; i++){ bag.add(innerTuple); } return bag; }
Example 13
Source File: DumpStreamer.java From spork with Apache License 2.0 | 5 votes |
@Override public Tuple deserialize(byte[] bytes) throws IOException { String line = new String(bytes, utf8); Tuple t = DefaultTupleFactory.getInstance().newTuple(); String tmp = line.substring(1, line.length() - 2); String[] fields = tmp.split(","); int i; for (i = 0; i < fields.length; i++) t.append(fields[i].trim()); return t; }
Example 14
Source File: TestPODistinct.java From spork with Apache License 2.0 | 5 votes |
@Test public void testPODistictWithNullValues() throws ExecException { input = BagFactory.getInstance().newDefaultBag(); TupleFactory tf = TupleFactory.getInstance(); for (int i = 0; i < MAX_SAMPLES; i++) { Tuple t = tf.newTuple(); t.append(null); input.add(t); // System.out.println(t); } confirmDistinct(); }
Example 15
Source File: TestDataModel.java From spork with Apache License 2.0 | 4 votes |
@Test public void testMultiFieldTupleCompareTo() throws Exception { TupleFactory tf = TupleFactory.getInstance(); Tuple t1 = tf.newTuple(); Tuple t2 = tf.newTuple(); t1.append(new DataByteArray("bbb")); t1.append(new DataByteArray("bbb")); t2.append(new DataByteArray("bbb")); t2.append(new DataByteArray("bbb")); assertEquals("same data equal", 0, t1.compareTo(t2)); t2 = tf.newTuple(); t2.append(new DataByteArray("aaa")); t2.append(new DataByteArray("aaa")); assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2)); t2 = tf.newTuple(); t2.append(new DataByteArray("ddd")); t2.append(new DataByteArray("ddd")); assertTrue("less than tuple with greater value", 0 > t1.compareTo(t2)); // First column same, second lesser t2 = tf.newTuple(); t2.append(new DataByteArray("bbb")); t2.append(new DataByteArray("aaa")); assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2)); // First column same, second greater t2 = tf.newTuple(); t2.append(new DataByteArray("bbb")); t2.append(new DataByteArray("ccc")); assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2)); // First column less, second same t2 = tf.newTuple(); t2.append(new DataByteArray("aaa")); t2.append(new DataByteArray("bbb")); assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2)); // First column greater, second same t2 = tf.newTuple(); t2.append(new DataByteArray("ccc")); t2.append(new DataByteArray("bbb")); assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2)); // First column less, second greater t2 = tf.newTuple(); t2.append(new DataByteArray("aaa")); t2.append(new DataByteArray("ccc")); assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2)); // First column greater, second same t2 = tf.newTuple(); t2.append(new DataByteArray("ccc")); t2.append(new DataByteArray("aaa")); assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2)); }
Example 16
Source File: PhoenixHBaseStorerIT.java From phoenix with Apache License 2.0 | 4 votes |
/** * Basic test - writes data to a Phoenix table and compares the data written * to expected * * @throws Exception */ @Test public void testStorer() throws Exception { final String tableName = "TABLE1"; final Statement stmt = conn.createStatement(); stmt.execute("CREATE TABLE " + tableName + " (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR)"); final Data data = Storage.resetData(pigServer); final Collection<Tuple> list = Lists.newArrayList(); // Create input dataset int rows = 100; for (int i = 0; i < rows; i++) { Tuple t = tupleFactory.newTuple(); t.append(i); t.append("a" + i); list.add(t); } data.set("in", "id:int, name:chararray", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.registerQuery("Store A into 'hbase://" + tableName + "' using " + PhoenixHBaseStorage.class.getName() + "('" + zkQuorum + "', '-batchSize 1000');"); // Now run the Pig script if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) { throw new RuntimeException("Job failed", pigServer.executeBatch() .get(0).getException()); } // Compare data in Phoenix table to the expected final ResultSet rs = stmt .executeQuery("SELECT id, name FROM table1 ORDER BY id"); for (int i = 0; i < rows; i++) { assertTrue(rs.next()); assertEquals(i, rs.getInt(1)); assertEquals("a" + i, rs.getString(2)); } }
Example 17
Source File: TestStore.java From spork with Apache License 2.0 | 4 votes |
@Test public void testStoreComplexDataWithNull() throws Exception { Tuple inputTuple = GenRandomData.genRandSmallBagTextTupleWithNulls(new Random(), 10, 100); inpDB = DefaultBagFactory.getInstance().newDefaultBag(); inpDB.add(inputTuple); storeAndCopyLocally(inpDB); PigStorage ps = new PigStorage("\t"); BufferedReader br = new BufferedReader(new FileReader(outputFileName)); for(String line=br.readLine();line!=null;line=br.readLine()){ System.err.println("Complex data: "); System.err.println(line); String[] flds = line.split("\t",-1); Tuple t = new DefaultTuple(); ResourceFieldSchema stringfs = new ResourceFieldSchema(); stringfs.setType(DataType.CHARARRAY); ResourceFieldSchema intfs = new ResourceFieldSchema(); intfs.setType(DataType.INTEGER); ResourceFieldSchema bytefs = new ResourceFieldSchema(); bytefs.setType(DataType.BYTEARRAY); ResourceSchema tupleSchema = new ResourceSchema(); tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, intfs}); ResourceFieldSchema tuplefs = new ResourceFieldSchema(); tuplefs.setSchema(tupleSchema); tuplefs.setType(DataType.TUPLE); ResourceSchema bagSchema = new ResourceSchema(); bagSchema.setFields(new ResourceFieldSchema[]{tuplefs}); ResourceFieldSchema bagfs = new ResourceFieldSchema(); bagfs.setSchema(bagSchema); bagfs.setType(DataType.BAG); ResourceSchema mapSchema = new ResourceSchema(); mapSchema.setFields(new ResourceFieldSchema[]{bytefs}); ResourceFieldSchema mapfs = new ResourceFieldSchema(); mapfs.setSchema(mapSchema); mapfs.setType(DataType.MAP); t.append(flds[0].compareTo("")!=0 ? ps.getLoadCaster().bytesToBag(flds[0].getBytes(), bagfs) : null); t.append(flds[1].compareTo("")!=0 ? new DataByteArray(flds[1].getBytes()) : null); t.append(flds[2].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[2].getBytes()) : null); t.append(flds[3].compareTo("")!=0 ? ps.getLoadCaster().bytesToDouble(flds[3].getBytes()) : null); t.append(flds[4].compareTo("")!=0 ? ps.getLoadCaster().bytesToFloat(flds[4].getBytes()) : null); t.append(flds[5].compareTo("")!=0 ? ps.getLoadCaster().bytesToInteger(flds[5].getBytes()) : null); t.append(flds[6].compareTo("")!=0 ? ps.getLoadCaster().bytesToLong(flds[6].getBytes()) : null); t.append(flds[7].compareTo("")!=0 ? ps.getLoadCaster().bytesToMap(flds[7].getBytes(), mapfs) : null); t.append(flds[8].compareTo("")!=0 ? ps.getLoadCaster().bytesToTuple(flds[8].getBytes(), tuplefs) : null); t.append(flds[9].compareTo("")!=0 ? ps.getLoadCaster().bytesToBoolean(flds[9].getBytes()) : null); t.append(flds[10].compareTo("")!=0 ? ps.getLoadCaster().bytesToDateTime(flds[10].getBytes()) : null); t.append(flds[11].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[10].getBytes()) : null); assertEquals(inputTuple, t); } br.close(); }
Example 18
Source File: PhoenixHBaseStorerIT.java From phoenix with Apache License 2.0 | 4 votes |
/** * Test storage of DataByteArray columns to Phoenix * Maps the DataByteArray with the target PhoenixDataType and persists in HBase. * @throws Exception */ @Test public void testStoreWithBinaryDataTypes() throws Exception { final String tableName = "TABLE3"; final Statement stmt = conn.createStatement(); stmt.execute("CREATE TABLE " + tableName + " (col1 BIGINT NOT NULL, col2 INTEGER , col3 FLOAT, col4 DOUBLE , col5 TINYINT , " + " col6 BOOLEAN , col7 VARBINARY CONSTRAINT my_pk PRIMARY KEY (col1))"); final Data data = Storage.resetData(pigServer); final Collection<Tuple> list = Lists.newArrayList(); int rows = 10; for (int i = 1; i <= rows; i++) { Tuple t = tupleFactory.newTuple(); t.append(i); t.append(new DataByteArray(Bytes.toBytes(i * 5))); t.append(new DataByteArray(Bytes.toBytes(i * 10.0F))); t.append(new DataByteArray(Bytes.toBytes(i * 15.0D))); t.append(new DataByteArray(Bytes.toBytes(i))); t.append(new DataByteArray(Bytes.toBytes( i % 2 == 0))); t.append(new DataByteArray(Bytes.toBytes(i))); list.add(t); } data.set("in", "col1:int,col2:bytearray,col3:bytearray,col4:bytearray,col5:bytearray,col6:bytearray,col7:bytearray ", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.registerQuery("Store A into 'hbase://" + tableName + "' using " + PhoenixHBaseStorage.class.getName() + "('" + zkQuorum + "', '-batchSize 1000');"); if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) { throw new RuntimeException("Job failed", pigServer.executeBatch() .get(0).getException()); } final ResultSet rs = stmt .executeQuery(String.format("SELECT col1 , col2 , col3 , col4 , col5 , col6, col7 FROM %s ORDER BY col1" , tableName)); int count = 0; for (int i = 1; i <= rows; i++) { assertTrue(rs.next()); assertEquals(i, rs.getInt(1)); assertEquals(i * 5, rs.getInt(2)); assertEquals(i * 10.0F, rs.getFloat(3),0.0); assertEquals(i * 15.0D, rs.getInt(4),0.0); assertEquals(i,rs.getInt(5)); assertEquals(i % 2 == 0, rs.getBoolean(6)); assertArrayEquals(Bytes.toBytes(i), rs.getBytes(7)); count++; } assertEquals(rows, count); }
Example 19
Source File: PhoenixHBaseStorerIT.java From phoenix with Apache License 2.0 | 4 votes |
@Test public void testStoreWithDateTime() throws Exception { final String tableName = "TABLE4"; final Statement stmt = conn.createStatement(); stmt.execute("CREATE TABLE " + tableName + " (col1 BIGINT NOT NULL, col2 DATE , col3 TIME, " + " col4 TIMESTAMP CONSTRAINT my_pk PRIMARY KEY (col1))"); long now = System.currentTimeMillis(); final DateTime dt = new DateTime(now); final Data data = Storage.resetData(pigServer); final Collection<Tuple> list = Lists.newArrayList(); Tuple t = tupleFactory.newTuple(); t.append(1); t.append(dt); t.append(dt); t.append(dt); list.add(t); data.set("in", "col1:int,col2:datetime,col3:datetime,col4:datetime", list); pigServer.setBatchOn(); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.registerQuery("Store A into 'hbase://" + tableName + "' using " + PhoenixHBaseStorage.class.getName() + "('" + zkQuorum + "', '-batchSize 1000');"); if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) { throw new RuntimeException("Job failed", pigServer.executeBatch() .get(0).getException()); } final ResultSet rs = stmt .executeQuery(String.format("SELECT col1 , col2 , col3 , col4 FROM %s " , tableName)); assertTrue(rs.next()); assertEquals(1, rs.getInt(1)); assertEquals(now, rs.getDate(2).getTime()); assertEquals(now, rs.getTime(3).getTime()); assertEquals(now, rs.getTimestamp(4).getTime()); }
Example 20
Source File: TestPOUserFunc.java From spork with Apache License 2.0 | 4 votes |
public void algebraicAVG( Integer[] input , Double initialExpectedSum, Long initialExpectedCount , Double intermedExpectedSum, Long intermedExpectedCount , Double expectedAvg ) throws IOException, ExecException { // generate data byte INIT = 0; byte INTERMED = 1; byte FINAL = 2; Tuple tup1 = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input); Tuple tup2 = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input); // System.out.println("Input = " + tup1); String funcSpec = AVG.class.getName() + "()"; POUserFunc po = new POUserFunc(new OperatorKey("", r.nextLong()), -1, null, new FuncSpec(funcSpec)); //************ Initial Calculations ****************** TupleFactory tf = TupleFactory.getInstance(); po.setAlgebraicFunction(INIT); po.attachInput(tup1); Tuple t = null; Result res = po.getNextTuple(); Tuple outputInitial1 = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result : null; Tuple outputInitial2 = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result : null; System.out.println(outputInitial1 + " " + outputInitial2); assertEquals(outputInitial1, outputInitial2); Double sum = (Double) outputInitial1.get(0); Long count = (Long) outputInitial1.get(1); assertEquals(initialExpectedSum, sum); assertEquals(initialExpectedCount, count); //************ Intermediate Data and Calculations ****************** DataBag bag = BagFactory.getInstance().newDefaultBag(); bag.add(outputInitial1); bag.add(outputInitial2); Tuple outputInitial = tf.newTuple(); outputInitial.append(bag); // Tuple outputIntermed = intermed.exec(outputInitial); po = new POUserFunc(new OperatorKey("", r.nextLong()), -1, null, new FuncSpec(funcSpec)); po.setAlgebraicFunction(INTERMED); po.attachInput(outputInitial); res = po.getNextTuple(); Tuple outputIntermed = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result : null; sum = (Double) outputIntermed.get(0); count = (Long) outputIntermed.get(1); assertEquals(intermedExpectedSum, sum); assertEquals(intermedExpectedCount, count); System.out.println(outputIntermed); //************ Final Calculations ****************** po = new POUserFunc(new OperatorKey("", r.nextLong()), -1, null, new FuncSpec(funcSpec)); po.setAlgebraicFunction(FINAL); po.attachInput(outputInitial); res = po.getNextTuple(); Double output = (res.returnStatus == POStatus.STATUS_OK) ? (Double) res.result : null; // Double output = fin.exec(outputInitial); assertEquals((Double)expectedAvg, output); // System.out.println("output = " + output); }