Java Code Examples for org.apache.pig.pigunit.PigTest#runScript()
The following examples show how to use
org.apache.pig.pigunit.PigTest#runScript() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ChaoShenEntropyTests.java From datafu with Apache License 2.0 | 6 votes |
@Test public void singleElemInputBagChaoShenEntropoyTest() throws Exception { PigTest test = createPigTestFromString(entropy, "type=chaosh", "base=log"); writeLinesToFile("input", "98.94791"); test.runScript(); /* Add expected values, computed using R: * * e.g. * * > count=c(1) * > library(entropy) * > entropy(count,count/sum(count),c("CS"),c("log")) * [1] 0 * */ List<Double> expectedOutput = new ArrayList<Double>(); expectedOutput.add(0.0); List<Tuple> output = this.getLinesForAlias(test, "data_out"); verifyEqualEntropyOutput(expectedOutput, output, 5); }
Example 2
Source File: QuantileTests.java From datafu with Apache License 2.0 | 6 votes |
@Test public void quantile3Test() throws Exception { PigTest test = createPigTestFromString(quantileTest, "QUANTILES='0.0013','0.0228','0.1587','0.5','0.8413','0.9772','0.9987'"); List<String> input = new ArrayList<String>(); for (int i=100000; i>=0; i--) { input.add(Integer.toString(i)); } writeLinesToFile("input", input.toArray(new String[0])); test.runScript(); List<Tuple> output = getLinesForAlias(test, "data_out", true); assertEquals(output.size(),1); assertEquals(output.get(0).toString(), "(130.0,2280.0,15870.0,50000.0,84130.0,97720.0,99870.0)"); }
Example 3
Source File: MacroTests.java From datafu with Apache License 2.0 | 6 votes |
@Test public void countTest() throws Exception { PigTest test = createPigTestFromString(countTest); writeLinesToFile("input", "A1\t1","A1\t4","A1\t4","A1\t4", "A2\t4","A2\t4", "A3\t3","A3\t1","A3\t77", "A4\t3","A4\t3","A4\t59","A4\t29", "A5\t4", "A6\t3","A6\t55","A6\t1", "A7\t39","A7\t27","A7\t85", "A8\t4","A8\t45", "A9\t92", "A9\t42","A9\t1","A9\t0", "A10\t7","A10\t23","A10\t1","A10\t41","A10\t52"); test.runScript(); assertOutput(test, "cnt", "(31)"); }
Example 4
Source File: MarkovPairTests.java From datafu with Apache License 2.0 | 6 votes |
@Test public void markovPairDefaultTest() throws Exception { PigTest test = createPigTestFromString(markovPairDefault, "schema=(data: bag {t: tuple(val:int)})"); writeLinesToFile("input", "{(10),(20),(30),(40),(50),(60)}"); String[] expectedOutput = { "({((10),(20)),((20),(30)),((30),(40)),((40),(50)),((50),(60))})" }; test.runScript(); Iterator<Tuple> actualOutput = test.getAlias("data_out"); assertTuplesMatch(expectedOutput, actualOutput); }
Example 5
Source File: BagTests.java From datafu with Apache License 2.0 | 6 votes |
@Test public void bagSplitTest() throws Exception { PigTest test = createPigTestFromString(bagSplitTest, "MAX=5"); writeLinesToFile("input", "{(1,11),(2,22),(3,33),(4,44),(5,55),(6,66),(7,77),(8,88),(9,99),(10,1010),(11,1111),(12,1212)}"); test.runScript(); assertOutput(test, "data3", "({(1,11),(2,22),(3,33),(4,44),(5,55)})", "({(6,66),(7,77),(8,88),(9,99),(10,1010)})", "({(11,1111),(12,1212)})"); }
Example 6
Source File: CondEntropyTests.java From datafu with Apache License 2.0 | 6 votes |
@Test public void noOrderEmpiricalCondEntropoyTest() throws Exception { PigTest test = createPigTestFromString(noOrderCondEntropy); writeLinesToFile("input", "98.94791 click", "38.61010 view", "97.10575 view", "62.28313 click", "38.83960 click", "32.05370 view", "96.10962 view", "28.72388 click", "96.65888 view", "20.41135 click"); try { test.runScript(); List<Tuple> output = this.getLinesForAlias(test, "data_out"); fail( "Testcase should fail"); } catch(Exception ex) {} }
Example 7
Source File: CondEntropyTests.java From datafu with Apache License 2.0 | 5 votes |
@Test public void dupValEmpiricalCondEntropoyTest() throws Exception { PigTest test = createPigTestFromString(condEntropy); writeLinesToFile("input", "98.94791 click", "38.61010 click", "97.10575 view", "62.28313 view", "38.61010 view", "32.05370 view", "96.10962 click", "38.61010 click", "96.10962 view", "20.41135 click"); test.runScript(); /* * library(infotheo) * X=c("98.94791","38.61010","97.10575","62.28313","38.61010","32.05370","96.10962","38.61010","96.10962","20.41135") * Y=c("click","click","view","view","view","view","click","click","view","click") * condentropy(Y,X) * [1] 0.3295837 */ List<Double> expectedOutput = new ArrayList<Double>(); expectedOutput.add(0.3295837); List<Tuple> output = this.getLinesForAlias(test, "data_out"); verifyEqualEntropyOutput(expectedOutput, output, 5); }
Example 8
Source File: EntropyTests.java From datafu with Apache License 2.0 | 5 votes |
@Test public void uniqValEmpiricalEntropoyTest() throws Exception { PigTest test = createPigTestFromString(entropy); writeLinesToFile("input", "98.94791", "38.61010", "97.10575", "62.28313", "38.83960", "32.05370", "96.10962", "28.72388", "96.65888", "20.41135"); test.runScript(); /* Add expected values, computed using R: * * e.g. * * > v=c(98.94791,38.61010,97.10575,62.28313,38.83960,32.05370,96.10962,28.72388,96.65888,20.41135) * > table(v) * v * 20.41135 28.72388 32.0537 38.6101 38.8396 62.28313 96.10962 96.65888 97.10575 98.94791 * 1 1 1 1 1 1 1 1 1 1 * > count=c(1,1,1,1,1,1,1,1,1,1) * > library(entropy) * > entropy(count) * [1] 2.302585 * */ List<Double> expectedOutput = new ArrayList<Double>(); expectedOutput.add(2.302585); List<Tuple> output = this.getLinesForAlias(test, "data_out"); verifyEqualEntropyOutput(expectedOutput, output, 5); }
Example 9
Source File: EmpiricalCountEntropyTests.java From datafu with Apache License 2.0 | 5 votes |
@Test public void uniqValEntropyTest() throws Exception { PigTest test = createPigTestFromString(entropy); writeLinesToFile("input", "98.94791", "38.61010", "97.10575", "62.28313", "38.83960", "32.05370", "96.10962", "28.72388", "96.65888", "20.41135"); test.runScript(); /* Add expected values, computed using R: * * e.g. * * > v=c(98.94791,38.61010,97.10575,62.28313,38.83960,32.05370,96.10962,28.72388,96.65888,20.41135) * > table(v) * v * 20.41135 28.72388 32.0537 38.6101 38.8396 62.28313 96.10962 96.65888 97.10575 98.94791 * 1 1 1 1 1 1 1 1 1 1 * > count=c(1,1,1,1,1,1,1,1,1,1) * > library(entropy) * > entropy(count) * [1] 2.302585 * */ List<Double> expectedOutput = new ArrayList<Double>(); expectedOutput.add(2.302585); List<Tuple> output = this.getLinesForAlias(test, "data_out"); verifyEqualEntropyOutput(expectedOutput, output, 5); }
Example 10
Source File: BagTests.java From datafu with Apache License 2.0 | 5 votes |
@Test public void distinctByMultiComplexFieldTest() throws Exception { PigTest test = createPigTestFromString(distinctByMultiComplexFieldTest); writeLinesToFile("input", "({(a-b,[b#1],{(a-b,0),(a-b,1)}),(a-c,[b#1],{(a-b,0),(a-b,1)}),(a-d,[b#0],{(a-b,1),(a-b,2)})})"); test.runScript(); assertOutput(test, "data2", "({(a-b,[b#1],{(a-b,0),(a-b,1)}),(a-d,[b#0],{(a-b,1),(a-b,2)})})"); }
Example 11
Source File: WeightedReservoirSamplingTests.java From datafu with Apache License 2.0 | 5 votes |
@Test public void invalidWeightFieldSchemaTest() throws Exception { PigTest test = createPigTestFromString(invalidWeightFieldSchemaTest); writeLinesToFile("input", "a\t100","b\t1","c\t5","d\t2"); try { test.runScript(); List<Tuple> output = this.getLinesForAlias(test, "sampled"); Assert.fail( "Testcase should fail"); } catch (Exception ex) { Assert.assertTrue(ex.getMessage().indexOf("Expect the type of the weight field of the input tuple to be of ([int, long, float, double]), but instead found (chararray), weight field: 0") >= 0); } }
Example 12
Source File: BagTests.java From datafu with Apache License 2.0 | 5 votes |
@Test public void distinctByDelimTest() throws Exception { PigTest test = createPigTestFromString(distinctByDelimTest); writeLinesToFile("input", "({(a-b,c),(a-b,d)})"); test.runScript(); assertOutput(test, "data2", "({(a-b,c),(a-b,d)})"); }
Example 13
Source File: LSHPigTest.java From datafu with Apache License 2.0 | 5 votes |
@Test public void testSparseVectors() throws IOException, ParseException { RandomGenerator rg = new JDKRandomGenerator(); rg.setSeed(0); RandomData rd = new RandomDataImpl(rg); int n = 20; List<RealVector> vectors = LSHTest.getVectors(rd, 1000, n); PigTest test = createPigTestFromString(sparseVectorTest); writeLinesToFile("input", getSparseLines(vectors)); test.runScript(); List<Tuple> neighbors = this.getLinesForAlias(test, "PTS"); Assert.assertEquals(neighbors.size(), n); int idx = 0; for(Tuple t : neighbors) { Assert.assertTrue(t.get(0) instanceof DataBag); Assert.assertEquals(t.size(), 1); RealVector interpreted = DataTypeUtil.INSTANCE.convert(t, 3); RealVector original = vectors.get(idx); Assert.assertEquals(original.getDimension(), interpreted.getDimension()); for(int i = 0;i < interpreted.getDimension();++i) { double originalField = original.getEntry(i); double interpretedField = interpreted.getEntry(i); Assert.assertTrue(Math.abs(originalField - interpretedField) < 1e-5); } idx++; } }
Example 14
Source File: EmpiricalCountEntropyTests.java From datafu with Apache License 2.0 | 5 votes |
@Test public void singleValEntropyTest() throws Exception { PigTest test = createPigTestFromString(entropy); writeLinesToFile("input", "98.94791", "98.94791", "98.94791", "98.94791", "98.94791", "98.94791", "98.94791", "98.94791", "98.94791", "98.94791"); test.runScript(); /* Add expected values, computed using R: * * e.g. * * > v=c(98.94791,98.94791,98.94791,98.94791,98.94791,98.94791,98.94791,98.94791,98.94791,98.94791) * > table(v) * v * 98.94791 * 10 * > count=(10) * > library(entropy) * > entropy(count) * [1] 0 * */ List<Double> expectedOutput = new ArrayList<Double>(); expectedOutput.add(0.0); List<Tuple> output = this.getLinesForAlias(test, "data_out"); verifyEqualEntropyOutput(expectedOutput, output, 5); }
Example 15
Source File: ZipBagsTests.java From datafu with Apache License 2.0 | 5 votes |
@Test(expectedExceptions = FrontendException.class) public void duplicateAliasTest() throws Exception { PigTest test = createPigTestFromString(duplicateAliasTest); writeLinesToFile("input", "{(1,2),(3,4),(5,6)}\t{(7,8),(9,10),(11,12)}"); test.runScript(); }
Example 16
Source File: BagTests.java From datafu with Apache License 2.0 | 5 votes |
@Test public void distinctByTest() throws Exception { PigTest test = createPigTestFromString(distinctByTest); writeLinesToFile("input", "({(Z,1,0),(A,1,0),(A,1,0),(B,2,0),(B,22,1),(C,3,0),(D,4,0),(E,5,0)})", "({(A,10,2),(M,50,3),(A,34,49), (A,24,42), (Z,49,22),(B,1,1)},(B,2,2))"); test.runScript(); assertOutput(test, "data2", "({(Z,1,0),(A,1,0),(B,2,0),(C,3,0),(D,4,0),(E,5,0)})", "({(A,10,2),(M,50,3),(Z,49,22),(B,1,1)})"); }
Example 17
Source File: ChaoShenEntropyTests.java From datafu with Apache License 2.0 | 5 votes |
@Test public void emptyInputBagChaoShenEntropoyTest() throws Exception { PigTest test = createPigTestFromString(entropy, "type=chaosh", "base=log"); writeLinesToFile("input" ); test.runScript(); /* Add expected values, computed using R: * * e.g. * * > v=c() * > table(v) * < table of extent 0 > * > count=c() * > library(entropy) * > entropy(count,count/sum(count),c("CS"),c("log")) * [1] 0 * */ List<Double> expectedOutput = new ArrayList<Double>(); List<Tuple> output = this.getLinesForAlias(test, "data_out"); verifyEqualEntropyOutput(expectedOutput, output, 5); }
Example 18
Source File: SamplingTests.java From datafu with Apache License 2.0 | 4 votes |
@Test public void sampleByKeyMultipleKeyTest() throws Exception { PigTest test = createPigTestFromString(sampleByKeyMultipleKeyTest); writeLinesToFile("input", "A1\tB1\t1","A1\tB1\t4", "A1\tB3\t4", "A1\tB4\t4", "A2\tB1\t4", "A2\tB2\t4", "A3\tB1\t3","A3\tB1\t1", "A3\tB3\t77", "A4\tB1\t3", "A4\tB2\t3", "A4\tB3\t59", "A4\tB4\t29", "A5\tB1\t4", "A6\tB2\t3","A6\tB2\t55", "A6\tB3\t1", "A7\tB1\t39", "A7\tB2\t27", "A7\tB3\t85", "A8\tB1\t4", "A8\tB2\t45", "A9\tB3\t92","A9\tB3\t0", "A9\tB6\t42","A9\tB5\t1", "A10\tB1\t7", "A10\tB2\t23","A10\tB2\t1","A10\tB2\t31", "A10\tB6\t41", "A10\tB7\t52"); test.runScript(); assertOutput(test, "sampled", "(A1,B1,1)","(A1,B1,4)", "(A1,B4,4)", "(A2,B1,4)", "(A2,B2,4)", "(A3,B1,3)","(A3,B1,1)", "(A4,B4,29)", "(A5,B1,4)", "(A6,B3,1)", "(A7,B1,39)", "(A8,B1,4)", "(A9,B3,92)","(A9,B3,0)", "(A10,B2,23)","(A10,B2,1)","(A10,B2,31)" ); }
Example 19
Source File: ChaoShenEntropyTests.java From datafu with Apache License 2.0 | 4 votes |
@Test public void dupValChaoShenEntropoyLog2Test() throws Exception { PigTest test = createPigTestFromString(entropy,"type=chaosh", "base=log2"); writeLinesToFile("input", "98.94791", "38.61010", "97.10575", "62.28313", "38.61010", "32.05370", "96.10962", "38.61010", "96.10962", "20.41135"); test.runScript(); /* Add expected values, computed using R: * * e.g. * * > v=c(98.94791,38.61010,97.10575,62.28313,38.61010,32.05370,96.10962,38.61010,96.10962,20.41135) * > table(v) * v * 20.41135 28.72388 32.0537 38.6101 38.8396 62.28313 96.10962 96.65888 97.10575 98.94791 * 1 1 3 1 2 1 1 * > count=c(1,1,3,1,2,1,1) * > freqs=count/sum(count) * > library(entropy) * > entropy(count,count/sum(count),c("CS"),c("log2")) * [1] 3.713915 * */ List<Double> expectedOutput = new ArrayList<Double>(); expectedOutput.add(3.713915); List<Tuple> output = this.getLinesForAlias(test, "data_out"); verifyEqualEntropyOutput(expectedOutput, output, 5); }
Example 20
Source File: WilsonBinConfTests.java From datafu with Apache License 2.0 | 4 votes |
@Test public void wilsonTest() throws Exception { PigTest test = createPigTestFromString(wilsonBinConf, "alpha=0.05"); // alpha is 0.05 for 95% confidence writeLinesToFile("input", "1\t1", "1\t2", "50\t100", "500\t1000", "999\t1000", "1000\t1000", "998\t1000"); test.runScript(); /* Add expected values, computed using R: * * e.g. * * library(Hmisc) * * binconf(50,100) * binconf(500,1000) * */ List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("0.05129,1.00000"); expectedOutput.add("0.02565,0.97435"); expectedOutput.add("0.40383,0.59617"); expectedOutput.add("0.46907,0.53093"); expectedOutput.add("0.99436,0.99995"); expectedOutput.add("0.99617,1.00000"); expectedOutput.add("0.99274,0.99945"); List<Tuple> output = this.getLinesForAlias(test, "data_out"); Iterator<String> expectationIterator = expectedOutput.iterator(); for (Tuple t : output) { assertTrue(expectationIterator.hasNext()); Double lower = (Double)t.get(0); Double upper = (Double)t.get(1); assertEquals(String.format("%.5f,%.5f",lower,upper),expectationIterator.next()); } }