org.apache.pig.impl.logicalLayer.schema.Schema Java Examples
The following examples show how to use
org.apache.pig.impl.logicalLayer.schema.Schema.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestResourceSchema.java From spork with Apache License 2.0 | 6 votes |
/** * Test one-level Pig Schema: multiple fields for a bag */ @Test public void testResourceSchemaWithInvalidPigSchema() throws FrontendException { String [] aliases ={"f1", "f2"}; byte[] types = {DataType.CHARARRAY, DataType.INTEGER}; Schema level0 = TypeCheckingTestUtil.genFlatSchema( aliases,types); Schema.FieldSchema fld0 = new Schema.FieldSchema("f0", level0, DataType.BAG); Schema level1 = new Schema(fld0); try { Schema.getPigSchema(new ResourceSchema(level1)); Assert.fail(); } catch(FrontendException e) { assertTrue(e.getErrorCode()==2218); } }
Example #2
Source File: TestEvalPipeline2.java From spork with Apache License 2.0 | 6 votes |
@Test public void testDescribeNestedAlias() throws Exception{ String[] input = { "1\t3", "2\t4", "3\t5" }; Util.createInputFile(cluster, "table_testDescribeNestedAlias", input); pigServer.registerQuery("A = LOAD 'table_testDescribeNestedAlias' as (a0, a1);"); pigServer.registerQuery("P = GROUP A by a1;"); // Test RelationalOperator pigServer.registerQuery("B = FOREACH P { D = ORDER A by $0; generate group, D.$0; };"); // Test ExpressionOperator - negative test case pigServer.registerQuery("C = FOREACH A { D = a0/a1; E=a1/a0; generate E as newcol; };"); Schema schema = pigServer.dumpSchemaNested("B", "D"); Assert.assertTrue(schema.toString().equalsIgnoreCase("{a0: bytearray,a1: bytearray}")); try { schema = pigServer.dumpSchemaNested("C", "E"); } catch (FrontendException e) { Assert.assertTrue(e.getErrorCode() == 1113); } }
Example #3
Source File: AliasEvalFuncTest.java From datafu with Apache License 2.0 | 6 votes |
@Test public void getBagTest() throws Exception { ReportBuilder udf = new ReportBuilder(); udf.setUDFContextSignature("test"); List<Schema.FieldSchema> fieldSchemaList = new ArrayList<Schema.FieldSchema>(); fieldSchemaList.add(new Schema.FieldSchema("msisdn", DataType.LONG)); fieldSchemaList.add(new Schema.FieldSchema("ts", DataType.INTEGER)); fieldSchemaList.add(new Schema.FieldSchema("center_lon", DataType.DOUBLE)); fieldSchemaList.add(new Schema.FieldSchema("center_lat", DataType.DOUBLE)); Schema schemaTuple = new Schema(fieldSchemaList); Schema schemaBag = new Schema(new Schema.FieldSchema(ReportBuilder.ORDERED_ROUTES, schemaTuple, DataType.BAG)); udf.outputSchema(schemaBag); Tuple inputTuple = TupleFactory.getInstance().newTuple(); DataBag inputBag = BagFactory.getInstance().newDefaultBag(); inputBag.add(TupleFactory.getInstance().newTuple(Arrays.asList(71230000000L, 1382351612, 10.697, 20.713))); inputTuple.append(inputBag); DataBag outputBag = udf.exec(inputTuple); Assert.assertEquals(inputBag, outputBag); }
Example #4
Source File: GroovyEvalFunc.java From spork with Apache License 2.0 | 6 votes |
@Override public Schema outputSchema(Schema input) { if (null != this.schemaFunction) { try { Tuple t = TupleFactory.getInstance().newTuple(1); // Strip enclosing '{}' from schema t.set(0, input.toString().replaceAll("^\\{", "").replaceAll("\\}$", "")); return Utils.getSchemaFromString((String) this.schemaFunction.exec(t)); } catch (ParserException pe) { throw new RuntimeException(pe); } catch (IOException ioe) { throw new RuntimeException(ioe); } } else { return this.schema; } }
Example #5
Source File: TestLimitSchemaStore.java From spork with Apache License 2.0 | 6 votes |
@Test //end to end test public void testLimitStoreSchema1() throws Exception{ Util.createLocalInputFile("student", new String[]{"joe smith:18:3.5","amy brown:25:2.5","jim fox:20:4.0","leo fu:55:3.0"}); pigServer.registerQuery("a = load 'student' using " + PigStorage.class.getName() + "(':') as (name, age, gpa);"); pigServer.registerQuery("d = distinct a;"); pigServer.registerQuery("lim = limit d 1;"); String outFile = "limitSchemaOut"; Util.deleteDirectory(new File(outFile)); pigServer.store("lim", outFile, "PigStorage('\\t', '-schema')"); pigServer.dumpSchema("lim"); pigServer.registerQuery("b = LOAD '" + outFile + "' using PigStorage('\\t', '-schema');"); Schema genSchema = pigServer.dumpSchema("b"); System.err.println(genSchema); Assert.assertNotNull(genSchema); }
Example #6
Source File: ScorePMML_ElNinoTest.java From Surus with Apache License 2.0 | 6 votes |
private Schema buildElNinoInputSchema() throws FrontendException { // Build Field Schema List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>(); fieldSchemas.add(new Schema.FieldSchema("buoy_day_ID", DataType.CHARARRAY)); fieldSchemas.add(new Schema.FieldSchema("buoy" , DataType.CHARARRAY)); fieldSchemas.add(new Schema.FieldSchema("day" , DataType.CHARARRAY)); fieldSchemas.add(new Schema.FieldSchema("latitude" , DataType.DOUBLE )); fieldSchemas.add(new Schema.FieldSchema("longitude" , DataType.DOUBLE )); fieldSchemas.add(new Schema.FieldSchema("zon_winds" , DataType.DOUBLE )); fieldSchemas.add(new Schema.FieldSchema("mer_winds" , DataType.DOUBLE )); fieldSchemas.add(new Schema.FieldSchema("humidity" , DataType.DOUBLE )); fieldSchemas.add(new Schema.FieldSchema("airtemp" , DataType.DOUBLE )); fieldSchemas.add(new Schema.FieldSchema("s_s_temp" , DataType.DOUBLE )); return new Schema(fieldSchemas); }
Example #7
Source File: VespaDocumentOperation.java From vespa with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private static void writeField(String name, Object value, Byte type, JsonGenerator g, Properties properties, Schema schema, Operation op, int depth) throws IOException { if (shouldWriteField(name, properties, depth)) { String operation = getPartialOperation(mapPartialOperationMap, name, properties); // check if the name has the property update-map-fields/remove-map-fields // if yes, we need special treatments here as we need to loop through the tuple // be aware the the operation here is not vespa operation such as "put" and "update" // operation here are the field name we wish use to such as "assign" and "remove" if (operation != null) { writePartialUpdateAndRemoveMap(name, value, g, properties, schema, op, depth, operation); } else { g.writeFieldName(name); if (shouldWritePartialUpdate(op, depth)) { writePartialUpdate(value, type, g, name, properties, schema, op, depth); } else { writeValue(value, type, g, name, properties, schema, op, depth); } } } }
Example #8
Source File: TestOrderBy3.java From spork with Apache License 2.0 | 6 votes |
@Before public void setUp() throws Exception { ArrayList<Tuple> tuples = new ArrayList<Tuple>(); log.info("Setting up"); pigServer = new PigServer(ExecType.LOCAL); data = resetData(pigServer); Random r = new Random(); for (int i = 0; i < MAX; i++) { tuples.add(tuple(i,GenRandomData.genRandString(r))); } Schema s = new Schema(); s.add(new Schema.FieldSchema("index", DataType.INTEGER)); s.add(new Schema.FieldSchema("name", DataType.CHARARRAY)); data.set("test", s, tuples); }
Example #9
Source File: TestThriftToPigCompatibility.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * <ul> steps: * <li>Writes using the thrift mapping * <li>Reads using the pig mapping * <li>Use Elephant bird to convert from thrift to pig * <li>Check that both transformations give the same result * @param o the object to convert * @throws TException */ public static <T extends TBase<?,?>> void validateSameTupleAsEB(T o) throws TException { final ThriftSchemaConverter thriftSchemaConverter = new ThriftSchemaConverter(); @SuppressWarnings("unchecked") final Class<T> class1 = (Class<T>) o.getClass(); final MessageType schema = thriftSchemaConverter.convert(class1); final StructType structType = ThriftSchemaConverter.toStructType(class1); final ThriftToPig<T> thriftToPig = new ThriftToPig<T>(class1); final Schema pigSchema = thriftToPig.toSchema(); final TupleRecordMaterializer tupleRecordConverter = new TupleRecordMaterializer(schema, pigSchema, true); RecordConsumer recordConsumer = new ConverterConsumer(tupleRecordConverter.getRootConverter(), schema); final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema); ParquetWriteProtocol p = new ParquetWriteProtocol(new RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType); o.write(p); final Tuple t = tupleRecordConverter.getCurrentRecord(); final Tuple expected = thriftToPig.getPigTuple(o); assertEquals(expected.toString(), t.toString()); final MessageType filtered = new PigSchemaConverter().filter(schema, pigSchema); assertEquals(schema.toString(), filtered.toString()); }
Example #10
Source File: XPath.java From spork with Apache License 2.0 | 6 votes |
@Override public List<FuncSpec> getArgToFuncMapping() throws FrontendException { final List<FuncSpec> funcList = new ArrayList<FuncSpec>(); /*either two chararray arguments*/ List<FieldSchema> fields = new ArrayList<FieldSchema>(); fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY)); fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY)); Schema twoArgInSchema = new Schema(fields); funcList.add(new FuncSpec(this.getClass().getName(), twoArgInSchema)); /*or two chararray and a boolean argument*/ fields = new ArrayList<FieldSchema>(); fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY)); fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY)); fields.add(new Schema.FieldSchema(null, DataType.BOOLEAN)); Schema threeArgInSchema = new Schema(fields); funcList.add(new FuncSpec(this.getClass().getName(), threeArgInSchema)); return funcList; }
Example #11
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) { List<FieldSchema> fields = requestedPigSchema.getFields(); List<Type> newFields = new ArrayList<Type>(); for (int i = 0; i < fields.size(); i++) { FieldSchema fieldSchema = fields.get(i); String name = name(fieldSchema.alias, "field_"+i); if (schemaToFilter.containsField(name)) { newFields.add(filter(schemaToFilter.getType(name), fieldSchema)); } } return newFields; }
Example #12
Source File: TestMergeJoin.java From spork with Apache License 2.0 | 5 votes |
@Test public void testMergeJoinSch2() throws IOException{ pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "';"); pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "';"); Schema mjSch = null, shjSch = null; pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1) using 'merge';"); mjSch = pigServer.dumpSchema("C"); pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1);"); shjSch = pigServer.dumpSchema("C"); Assert.assertTrue(shjSch == null); }
Example #13
Source File: TestUnionOnSchema.java From spork with Apache License 2.0 | 5 votes |
/** * Test UNION ONSCHEMA where a common column has additional 'namespace' part * in the column name in one of the inputs * @throws IOException * @throws ParserException */ @Test public void testUnionOnSchemaScopedColumnName() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL); String query_prefix = " l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " + "g = group l1 by i; " + "f = foreach g generate flatten(l1); " + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); "; String query = query_prefix + "u = union onschema f, l2; " ; Util.registerMultiLineQuery(pig, query); Schema sch = pig.dumpSchema("u"); Schema expectedSch = Utils.getSchemaFromString("i: int, j: int"); assertEquals("Checking expected schema",sch, expectedSch); Iterator<Tuple> it = pig.openIterator("u"); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(1,2)", "(5,3)", "(1,2)", "(5,3)" }); Util.checkQueryOutputsAfterSort(it, expectedRes); // now try reversing the order of relation query = query_prefix + "u = union onschema l2, f; " ; Util.registerMultiLineQuery(pig, query); sch = pig.dumpSchema("u"); expectedSch = Utils.getSchemaFromString("i: int, j: int"); assertEquals("Checking expected schema",sch, expectedSch); it = pig.openIterator("u"); Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example #14
Source File: TupleDiff.java From datafu with Apache License 2.0 | 5 votes |
private FieldSchema getFieldSchema(FieldSchema fieldSchema, int fieldNum) throws ExecException, FrontendException { if (fieldSchema == null) { return null; } Schema schema = fieldSchema.schema; return schema.size() < (fieldNum + 1) ? null : schema.getField(fieldNum); }
Example #15
Source File: GetSecond.java From spork with Apache License 2.0 | 5 votes |
@Override public List<FuncSpec> getArgToFuncMapping() throws FrontendException { List<FuncSpec> funcList = new ArrayList<FuncSpec>(); funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DATETIME)))); return funcList; }
Example #16
Source File: ISOToSecond.java From spork with Apache License 2.0 | 5 votes |
@Override public List<FuncSpec> getArgToFuncMapping() throws FrontendException { List<FuncSpec> funcList = new ArrayList<FuncSpec>(); funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY)))); return funcList; }
Example #17
Source File: TestSecondarySort.java From spork with Apache License 2.0 | 5 votes |
@Test public void testNestedSortEndToEnd1() throws Exception { File tmpFile1 = Util.createTempFileDelOnExit("test", "txt"); PrintStream ps1 = new PrintStream(new FileOutputStream(tmpFile1)); ps1.println("1\t2\t3"); ps1.println("1\t3\t4"); ps1.println("1\t2\t4"); ps1.println("1\t2\t4"); ps1.println("1\t2\t4"); ps1.println("2\t3\t4"); ps1.close(); String expected[] = { "(2,{(2,3,4)})", "(1,{(1,2,3),(1,2,4),(1,2,4),(1,2,4),(1,3,4)})" }; String clusterPath = Util.removeColon(tmpFile1.getCanonicalPath()); Util.copyFromLocalToCluster(cluster, tmpFile1.getCanonicalPath(), clusterPath); pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(clusterPath) + "' AS (a0, a1, a2);"); pigServer.registerQuery("B = group A by $0 parallel 2;"); pigServer.registerQuery("C = foreach B { D = limit A 10; E = order D by $1; generate group, E;};"); Iterator<Tuple> iter = pigServer.openIterator("C"); Schema s = pigServer.dumpSchema("C"); Util.checkQueryOutputsAfterSortRecursive(iter, expected, org.apache.pig.newplan.logical.Util.translateSchema(s)); Util.deleteFile(cluster, clusterPath); }
Example #18
Source File: ISODaysBetween.java From spork with Apache License 2.0 | 5 votes |
@Override public List<FuncSpec> getArgToFuncMapping() throws FrontendException { List<FuncSpec> funcList = new ArrayList<FuncSpec>(); Schema s = new Schema(); s.add(new Schema.FieldSchema(null, DataType.CHARARRAY)); s.add(new Schema.FieldSchema(null, DataType.CHARARRAY)); funcList.add(new FuncSpec(this.getClass().getName(), s)); return funcList; }
Example #19
Source File: TestTypedMap.java From spork with Apache License 2.0 | 5 votes |
@Test public void testUnTypedMap() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL, new Properties()); String[] input = { "[key#1,key2#2]", }; Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testUnTypedMap", input); String query = "a = load '" + tmpDirName + "/testUnTypedMap' as (m:[]);"; Util.registerMultiLineQuery(pig, query); Schema sch = pig.dumpSchema("a"); assertEquals("Checking expected schema",sch.toString(), "{m: map[]}"); Iterator<Tuple> it = pig.openIterator("a"); Assert.assertTrue(it.hasNext()); Tuple t = it.next(); Assert.assertTrue(t.size()==1); Assert.assertTrue(t.get(0) instanceof Map); Assert.assertTrue(((Map)t.get(0)).containsKey("key")); Assert.assertTrue(((Map)t.get(0)).containsKey("key2")); Assert.assertTrue(((Map)t.get(0)).get("key") instanceof DataByteArray); Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1")); Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof DataByteArray); Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2")); Assert.assertFalse(it.hasNext()); }
Example #20
Source File: ExampleEasyCubeAggregator.java From Cubert with Apache License 2.0 | 5 votes |
@Override public FieldSchema outputSchema(Schema inputSchema) throws IOException { List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>(); fieldSchemas.add(new FieldSchema("sum", DataType.LONG)); fieldSchemas.add(new FieldSchema("sum_squared", DataType.LONG)); Schema nestedTupleSchema = new Schema(fieldSchemas); return new FieldSchema("resultsTuple", nestedTupleSchema, DataType.TUPLE); }
Example #21
Source File: ROUND.java From spork with Apache License 2.0 | 5 votes |
@Override public List<FuncSpec> getArgToFuncMapping() throws FrontendException { List<FuncSpec> funcList = new ArrayList<FuncSpec>(); funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.BYTEARRAY)))); funcList.add(new FuncSpec(DoubleRound.class.getName(), new Schema(new Schema.FieldSchema(null, DataType.DOUBLE)))); funcList.add(new FuncSpec(FloatRound.class.getName(), new Schema(new Schema.FieldSchema(null, DataType.FLOAT)))); return funcList; }
Example #22
Source File: RubySchema.java From spork with Apache License 2.0 | 5 votes |
/** * This method will fix any name conflicts in a schema. It's important to note that * this will change the Schema object itself. It will deal with any collisions in things * named tuple_#, bag_#, map_#, or val_#, as these are generally names generated by * Util.getSchemaFromString. In the case of another name conflict, it will not be * changed, as that name conflict was created by the user. * * @param s a Schema object to fix in place */ private static void fixSchemaNames(Schema s) { if (s == null) return; // This regex detects names that could possibly collide that we should change Pattern p = Pattern.compile("(bag_|tuple_|map_|val_)(\\d+)", Pattern.CASE_INSENSITIVE); Set<String> names = new HashSet<String>(s.size(), 1.0f); for (Schema.FieldSchema fs : s.getFields()) { if (fs.alias == null) continue; Matcher m = p.matcher(fs.alias); if (m.matches() && names.contains(fs.alias)) { String prefix = m.group(1); int suffix = Integer.parseInt(m.group(2)); while (names.contains(prefix + suffix)) suffix++; fs.alias = prefix + suffix; } names.add(fs.alias); if (fs.schema != null) { if (fs.type == DataType.BAG) { try { fixSchemaNames(fs.schema.getField(0).schema); } catch (FrontendException e) { throw new RuntimeException("Error recursively fixing schema: " + s, e); } } else { fixSchemaNames(fs.schema); } } } }
Example #23
Source File: ToMilliSeconds.java From spork with Apache License 2.0 | 5 votes |
@Override public List<FuncSpec> getArgToFuncMapping() throws FrontendException { List<FuncSpec> funcList = new ArrayList<FuncSpec>(); funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DATETIME)))); return funcList; }
Example #24
Source File: TupleReadSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public ReadContext init(InitContext initContext) { Schema pigSchema = getPigSchema(initContext.getConfiguration()); RequiredFieldList requiredFields = getRequiredFields(initContext.getConfiguration()); boolean columnIndexAccess = initContext.getConfiguration().getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false); if (pigSchema == null) { return new ReadContext(initContext.getFileSchema()); } else { // project the file schema according to the requested Pig schema MessageType parquetRequestedSchema = new PigSchemaConverter(columnIndexAccess).filter(initContext.getFileSchema(), pigSchema, requiredFields); return new ReadContext(parquetRequestedSchema); } }
Example #25
Source File: TestProjectRange.java From spork with Apache License 2.0 | 5 votes |
@Test public void testRangeOrderByMixNOSchema() throws IOException, ParserException{ String query; query = " l1 = load '" + INP_FILE_5FIELDS + "';" + " o = order l1 by $1 .. $2 DESC, $0 , $4 .. DESC;" ; compileAndCompareSchema((Schema)null, query, "o"); //check number of sort expression plans LogicalPlan lp = createAndProcessLPlan(query); boolean[] isAsc = {false, false,true,false}; checkNumExpressionPlansForSort(lp, 4, isAsc); Util.registerMultiLineQuery(pigServer, query); pigServer.explain("o", System.err); Iterator<Tuple> it = pigServer.openIterator("o"); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(11,21,31,41,51)", "(10,20,30,40,50)", }); Util.checkQueryOutputs(it, expectedRes); }
Example #26
Source File: StyTest.java From validatar with Apache License 2.0 | 5 votes |
@Test public void testNullTypeInTuple() throws IOException { Query query = new Query(); query.value = ""; Schema fakeSchema = getSchema(makeFieldSchema("a", DataType.NULL)); Tuple fakeTuple = makeTuple("something"); sty = getSty(withMockResult(withMockSchema(getServer(), fakeSchema), fakeTuple)); runWithoutOutput(() -> sty.execute(query)); Assert.assertFalse(query.failed()); List<TypedObject> result = query.getResult().getColumn("a").getValues(); Assert.assertNotNull(result); Assert.assertEquals(result.size(), 1); Assert.assertNull(result.get(0)); }
Example #27
Source File: TypeCheckingTestUtil.java From spork with Apache License 2.0 | 5 votes |
public static Schema genFlatSchema(String[] aliases, byte[] types) { if (aliases.length != types.length) { throw new AssertionError(" aliase number and type number don't match") ; } List<Schema.FieldSchema> fsList = new ArrayList<Schema.FieldSchema>() ; for(int i=0; i<aliases.length ;i++) { fsList.add(new Schema.FieldSchema(aliases[i], types[i])) ; } return new Schema(fsList) ; }
Example #28
Source File: MapSummaryData.java From parquet-mr with Apache License 2.0 | 5 votes |
public void add(Schema schema, Map<?, ?> m) { super.add(m); size.add(m.size()); FieldSchema field = getField(schema, 0); if (m.size() > 0 && key == null) { key = new FieldSummaryData(); key.setName(getName(field)); value = new FieldSummaryData(); value.setName(getName(field)); } for (Map.Entry<?, ?> entry : m.entrySet()) { key.add(null, entry.getKey()); value.add(getSchema(field), entry.getValue()); } }
Example #29
Source File: TestSchemaUtil.java From spork with Apache License 2.0 | 5 votes |
@Test public void testBagSchema() throws Exception { String bagName="mybag"; String tupleName = "mytuple"; String[] fieldNames = new String[] { "field_0", "field_1" }; Byte[] dataTypes = new Byte[] { DataType.LONG, DataType.CHARARRAY }; String expected = "{mybag: {mytuple: (field_0: long,field_1: chararray)}}"; Schema bagSchema = SchemaUtil.newBagSchema(bagName,tupleName, fieldNames, dataTypes); assertEquals(expected, bagSchema.toString()); bagSchema = SchemaUtil.newBagSchema(bagName,tupleName, Arrays .asList(fieldNames), Arrays.asList(dataTypes)); assertEquals(expected, bagSchema.toString()); expected = "{b: {t: (field_0: long,field_1: chararray)}}"; bagSchema = SchemaUtil.newBagSchema(fieldNames, dataTypes); assertEquals(expected, bagSchema.toString()); bagSchema = SchemaUtil.newBagSchema(Arrays.asList(fieldNames), Arrays.asList(dataTypes)); assertEquals(expected, bagSchema.toString()); expected = "{b: {t: (f0: long,f1: chararray)}}"; bagSchema = SchemaUtil.newBagSchema(dataTypes); assertEquals(expected, bagSchema.toString()); bagSchema = SchemaUtil.newBagSchema(Arrays.asList(dataTypes)); assertEquals(expected, bagSchema.toString()); }
Example #30
Source File: Entropy.java From datafu with Apache License 2.0 | 5 votes |
@Override public Schema outputSchema(Schema input) { try { Schema.FieldSchema inputFieldSchema = input.getField(0); if (inputFieldSchema.type != DataType.BAG) { throw new RuntimeException("Expected a BAG as input"); } Schema inputBagSchema = inputFieldSchema.schema; if (inputBagSchema.getField(0).type != DataType.TUPLE) { throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s", DataType.findTypeName(inputBagSchema.getField(0).type))); } return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass() .getName() .toLowerCase(), input), DataType.DOUBLE)); } catch (FrontendException e) { throw new RuntimeException(e); } }