Java Code Examples for org.apache.pig.data.BagFactory#newDefaultBag()

The following examples show how to use org.apache.pig.data.BagFactory#newDefaultBag() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testUseDefaultDelimiterBagToStringUDF() throws Exception {
	BagFactory bf = BagFactory.getInstance();
	TupleFactory tf = TupleFactory.getInstance();

	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple t2 = tf.newTuple(2);
	t2.set(0, "c");
	t2.set(1, 6);

	DataBag bag = bf.newDefaultBag();
	bag.add(t1);
	bag.add(t2);

	BagToString udf = new BagToString();
	Tuple udfInput = tf.newTuple(1);
	udfInput.set(0, bag);
	String result = udf.exec(udfInput);

	assertEquals("a_5_c_6", result);
}
 
Example 2
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testBasicBagToStringUDF() throws Exception {
	BagFactory bf = BagFactory.getInstance();
	TupleFactory tf = TupleFactory.getInstance();

	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple t2 = tf.newTuple(2);
	t2.set(0, "c");
	t2.set(1, 6);

	DataBag bag = bf.newDefaultBag();
	bag.add(t1);
	bag.add(t2);

	BagToString udf = new BagToString();
	Tuple udfInput = tf.newTuple(2);
	udfInput.set(0, bag);
	udfInput.set(1, "-");
	String result = udf.exec(udfInput);

	assertEquals("a-5-c-6", result);
}
 
Example 3
Source File: PigRelSqlUdfs.java    From calcite with Apache License 2.0 5 votes vote down vote up
/**
 * Implementation for PIG_BAG functions. Builds a Pig DataBag from
 * the corresponding input
 *
 * @param elements Input that contains a bag
 * @return Pig Tuple
 */
public static Tuple buildBag(Object... elements) {
  final TupleFactory tupleFactory = TupleFactory.getInstance();
  final BagFactory bagFactory = BagFactory.getInstance();
  // Convert each row into a Tuple
  List<Tuple> tupleList = new ArrayList<>();
  if (elements != null) {
    // The first input contains a list of rows for the bag
    final List bag = (elements[0] instanceof List)
        ? (List) elements[0]
        : Collections.singletonList(elements[0]);
    for (Object row : bag) {
      tupleList.add(tupleFactory.newTuple(Arrays.asList(row)));
    }
  }

  // Then build a bag from the tuple list
  DataBag resultBag = bagFactory.newDefaultBag(tupleList);

  // The returned result is a new Tuple with the newly constructed DataBag
  // as the first item.
  List<Object> finalTuple = new ArrayList<>();
  finalTuple.add(resultBag);

  if (elements != null) {
    // Add the remaining elements from the input
    for (int i = 1; i < elements.length; i++) {
      finalTuple.add(elements[i]);
    }
  }

  return tupleFactory.newTuple(finalTuple);
}
 
Example 4
Source File: TestEvalPipelineLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, Object> exec(Tuple input) throws IOException {

    TupleFactory tupleFactory = TupleFactory.getInstance();
    ArrayList<Object> objList = new ArrayList<Object>();
    objList.add(new Integer(1));
    objList.add(new Double(1.0));
    objList.add(new Float(1.0));
    objList.add(new String("World!"));
    Tuple tuple = tupleFactory.newTuple(objList);

    BagFactory bagFactory = BagFactory.getInstance();
    DataBag bag = bagFactory.newDefaultBag();
    bag.add(tuple);

    Map<String, Object> mapInMap = new HashMap<String, Object>();
    mapInMap.put("int", new Integer(10));
    mapInMap.put("float", new Float(10.0));

    Map<String, Object> myMap = new HashMap<String, Object>();
    myMap.put("string", new String("Hello"));
    myMap.put("int", new Integer(1));
    myMap.put("long", new Long(1));
    myMap.put("float", new Float(1.0));
    myMap.put("double", new Double(1.0));
    myMap.put("dba", new DataByteArray(new String("bytes").getBytes()));
    myMap.put("map", mapInMap);
    myMap.put("tuple", tuple);
    myMap.put("bag", bag);
    return myMap; 
}
 
Example 5
Source File: TestBloom.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMap() throws Exception {
    String size = "100";
    String numHash = "3";
    String hashFunc = "JENKINS";
    TupleFactory tf = TupleFactory.getInstance();
    BagFactory bf = BagFactory.getInstance();

    Tuple t = tf.newTuple(1);
    t.set(0, 1);
    DataBag b = bf.newDefaultBag();
    b.add(t);
    Tuple input = tf.newTuple(b);

    BuildBloom.Initial map =
            new BuildBloom.Initial(hashFunc, "fixed", size, numHash);
    t = map.exec(input);

    Bloom bloom = new Bloom("bla");
    bloom.setFilter((DataByteArray)t.get(0));

    // Test that everything we put in passes.
    Tuple t1 = tf.newTuple(1);
    t1.set(0, 1);
    assertTrue(bloom.exec(t1));

    // A few that don't pass
    for (int i = 100; i < 10; i++) {
        Tuple t2 = tf.newTuple(1);
        t2.set(0, i);
        assertFalse(bloom.exec(t2));
    }
}
 
Example 6
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedTupleForBagToStringUDF() throws Exception {
	BagFactory bf = BagFactory.getInstance();
	TupleFactory tf = TupleFactory.getInstance();

	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple nestedTuple = tf.newTuple(2);
	nestedTuple.set(0, "d");
	nestedTuple.set(1, 7);

	Tuple t2 = tf.newTuple(3);
	t2.set(0, "c");
	t2.set(1, 6);
	t2.set(2, nestedTuple);

	DataBag inputBag = bf.newDefaultBag();
	inputBag.add(t1);
	inputBag.add(t2);

	BagToString udf = new BagToString();
	Tuple udfInput = tf.newTuple(2);
	udfInput.set(0, inputBag);
	udfInput.set(1, "_");
	String result = udf.exec(udfInput);

	assertEquals("a_5_c_6_(d,7)", result);
}
 
Example 7
Source File: TestEvalPipeline.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, Object> exec(Tuple input) throws IOException {

    TupleFactory tupleFactory = TupleFactory.getInstance();
    ArrayList<Object> objList = new ArrayList<Object>();
    objList.add(new Integer(1));
    objList.add(new Double(1.0));
    objList.add(new Float(1.0));
    objList.add(new String("World!"));
    Tuple tuple = tupleFactory.newTuple(objList);

    BagFactory bagFactory = BagFactory.getInstance();
    DataBag bag = bagFactory.newDefaultBag();
    bag.add(tuple);

    Map<String, Object> mapInMap = new HashMap<String, Object>();
    mapInMap.put("int", new Integer(10));
    mapInMap.put("float", new Float(10.0));

    Map<String, Object> myMap = new HashMap<String, Object>();
    myMap.put("string", new String("Hello"));
    myMap.put("int", new Integer(1));
    myMap.put("long", new Long(1));
    myMap.put("float", new Float(1.0));
    myMap.put("double", new Double(1.0));
    myMap.put("dba", new DataByteArray(new String("bytes").getBytes()));
    myMap.put("map", mapInMap);
    myMap.put("tuple", tuple);
    myMap.put("bag", bag);
    return myMap;
}
 
Example 8
Source File: LogicalPlanBuilder.java    From spork with Apache License 2.0 4 votes vote down vote up
static DataBag createDataBag() {
    BagFactory bagFactory = BagFactory.getInstance();
    return bagFactory.newDefaultBag();
}