org.apache.pig.ResourceSchema Java Examples
The following examples show how to use
org.apache.pig.ResourceSchema.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestResourceSchema.java From spork with Apache License 2.0 | 6 votes |
/** * Test invalid Resource Schema: multiple fields for a bag * @throws IOException */ @Test(expected=FrontendException.class) public void testToPigSchemaWithInvalidSchema() throws IOException { ResourceFieldSchema[] level0 = new ResourceFieldSchema[] { new ResourceFieldSchema() .setName("fld0").setType(DataType.CHARARRAY), new ResourceFieldSchema() .setName("fld1").setType(DataType.DOUBLE), new ResourceFieldSchema() .setName("fld2").setType(DataType.INTEGER) }; ResourceSchema rSchema0 = new ResourceSchema() .setFields(level0); ResourceFieldSchema[] level2 = new ResourceFieldSchema[] { new ResourceFieldSchema() .setName("t2").setType(DataType.BAG).setSchema(rSchema0) }; }
Example #2
Source File: CSVExcelStorage.java From spork with Apache License 2.0 | 6 votes |
public void prepareToWrite(RecordWriter writer) { // Get the schema string from the UDFContext object. UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{ udfContextSignature }); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema != null) { // Parse the schema from the string stored in the properties object. try { schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); } catch (ParserException pex) { logger.warn("Could not parse schema for storing."); } } if (headerTreatment == Headers.DEFAULT) { headerTreatment = Headers.SKIP_OUTPUT_HEADER; } // PigStorage's prepareToWrite() super.prepareToWrite(writer); }
Example #3
Source File: FixedWidthLoader.java From spork with Apache License 2.0 | 6 votes |
@Override public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { // Save reader to use in getNext() this.reader = reader; splitIndex = split.getSplitIndex(); // Get schema from front-end UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfContextSignature }); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema == null) { throw new IOException("Could not find schema in UDF context"); } schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); requiredFields = (boolean[]) ObjectSerializer.deserialize(p.getProperty(REQUIRED_FIELDS_SIGNATURE)); if (requiredFields != null) { numRequiredFields = 0; for (int i = 0; i < requiredFields.length; i++) { if (requiredFields[i]) numRequiredFields++; } } }
Example #4
Source File: SchemaUtilTest.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testTupleInMap() throws IOException { Schema icebergSchema = new Schema( optional( 1, "nested_list", MapType.ofOptional( 2, 3, StringType.get(), ListType.ofOptional( 4, StructType.of( required(5, "id", LongType.get()), optional(6, "data", StringType.get())))))); ResourceSchema pigSchema = SchemaUtil.convert(icebergSchema); // The output should contain a nested struct within a list within a map, I think. assertEquals("nested_list:[{(id:long,data:chararray)}]", pigSchema.toString()); }
Example #5
Source File: SchemaUtilTest.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testPrimitive() throws IOException { Schema icebergSchema = new Schema( optional(1, "b", BooleanType.get()), optional(2, "i", IntegerType.get()), optional(3, "l", LongType.get()), optional(4, "f", FloatType.get()), optional(5, "d", DoubleType.get()), optional(6, "dec", DecimalType.of(0, 2)), optional(7, "s", StringType.get()), optional(8, "bi", BinaryType.get()) ); ResourceSchema pigSchema = SchemaUtil.convert(icebergSchema); assertEquals( "b:boolean,i:int,l:long,f:float,d:double,dec:bigdecimal,s:chararray,bi:bytearray", pigSchema.toString()); }
Example #6
Source File: SchemaUtilTest.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testPrimitive() throws IOException { Schema icebergSchema = new Schema( optional(1, "b", BooleanType.get()), optional(1, "i", IntegerType.get()), optional(2, "l", LongType.get()), optional(3, "f", FloatType.get()), optional(4, "d", DoubleType.get()), optional(5, "dec", DecimalType.of(0,2)), optional(5, "s", StringType.get()), optional(6,"bi", BinaryType.get()) ); ResourceSchema pigSchema = SchemaUtil.convert(icebergSchema); assertEquals("b:boolean,i:int,l:long,f:float,d:double,dec:bigdecimal,s:chararray,bi:bytearray", pigSchema.toString()); }
Example #7
Source File: TestResourceSchema.java From spork with Apache License 2.0 | 6 votes |
/** * Test invalid Resource Schema: bag without tuple field * @throws IOException */ @Test(expected=FrontendException.class) public void testToPigSchemaWithInvalidSchema2() throws IOException { ResourceFieldSchema[] level0 = new ResourceFieldSchema[] { new ResourceFieldSchema() .setName("fld0").setType(DataType.CHARARRAY) }; ResourceSchema rSchema0 = new ResourceSchema() .setFields(level0); ResourceFieldSchema[] level2 = new ResourceFieldSchema[] { new ResourceFieldSchema() .setName("t2").setType(DataType.BAG).setSchema(rSchema0) }; }
Example #8
Source File: SchemaUtilTest.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testTupleInMap() throws IOException { Schema icebergSchema = new Schema( optional( 1, "nested_list", MapType.ofOptional( 2, 3, StringType.get(), ListType.ofOptional( 4, StructType.of( required(5, "id", LongType.get()), optional(6, "data", StringType.get())))))); ResourceSchema pigSchema = SchemaUtil.convert(icebergSchema); assertEquals("nested_list:[{(id:long,data:chararray)}]", pigSchema.toString()); // The output should contain a nested struct within a list within a map, I think. }
Example #9
Source File: AegisthusLoader.java From aegisthus with Apache License 2.0 | 6 votes |
protected ResourceSchema columnSchema() throws IOException { ResourceSchema schema = new ResourceSchema(); List<ResourceFieldSchema> fields = new ArrayList<>(); fields.add(field("name", DataType.BYTEARRAY)); fields.add(field("value", DataType.BYTEARRAY)); fields.add(field("ts", DataType.LONG)); fields.add(field("status", DataType.CHARARRAY)); fields.add(field("ttl", DataType.LONG)); ResourceSchema tuple = new ResourceSchema(); tuple.setFields(fields.toArray(new ResourceFieldSchema[0])); ResourceFieldSchema fs = new ResourceFieldSchema(); fs.setName("column"); fs.setType(DataType.TUPLE); fs.setSchema(tuple); fields.clear(); fields.add(fs); schema.setFields(fields.toArray(new ResourceFieldSchema[0])); return schema; }
Example #10
Source File: OrcStorage.java From spork with Apache License 2.0 | 6 votes |
@Override public List<String> getPredicateFields(String location, Job job) throws IOException { ResourceSchema schema = getSchema(location, job); List<String> predicateFields = new ArrayList<String>(); for (ResourceFieldSchema field : schema.getFields()) { switch(field.getType()) { case DataType.BOOLEAN: case DataType.INTEGER: case DataType.LONG: case DataType.FLOAT: case DataType.DOUBLE: case DataType.DATETIME: case DataType.CHARARRAY: case DataType.BIGINTEGER: case DataType.BIGDECIMAL: predicateFields.add(field.getName()); break; default: // Skip DataType.BYTEARRAY, DataType.TUPLE, DataType.MAP and DataType.BAG break; } } return predicateFields; }
Example #11
Source File: TestResourceSchema.java From spork with Apache License 2.0 | 6 votes |
/** * Test that ResourceSchema is correctly created given a * pig.Schema and vice versa */ @Test public void testResourceFlatSchemaCreation() throws ExecException, SchemaMergeException, FrontendException { String [] aliases ={"f1", "f2"}; byte[] types = {DataType.CHARARRAY, DataType.INTEGER}; Schema origSchema = TypeCheckingTestUtil.genFlatSchema( aliases,types); ResourceSchema rsSchema = new ResourceSchema(origSchema); assertEquals("num fields", aliases.length, rsSchema.getFields().length); ResourceSchema.ResourceFieldSchema[] fields = rsSchema.getFields(); for (int i=0; i<fields.length; i++) { assertEquals(fields[i].getName(), aliases[i]); assertEquals(fields[i].getType(), types[i]); } Schema genSchema = Schema.getPigSchema(rsSchema); assertTrue("generated schema equals original", Schema.equals(genSchema, origSchema, true, false)); }
Example #12
Source File: JsonLoader.java From spork with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { this.reader = reader; // Get the schema string from the UDFContext object. UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature}); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema == null) { throw new IOException("Could not find schema in UDF context"); } // Parse the schema from the string stored in the properties object. schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); jsonFactory = new JsonFactory(); }
Example #13
Source File: AvroStorage.java From spork with Apache License 2.0 | 6 votes |
@Override public final void checkSchema(final ResourceSchema rs) throws IOException { if (rs == null) { throw new IOException("checkSchema: called with null ResourceSchema"); } Schema avroSchema = AvroStorageSchemaConversionUtilities .resourceSchemaToAvroSchema(rs, (schemaName == null || schemaName.length() == 0) ? "pig_output" : schemaName, schemaNameSpace, Maps.<String, List<Schema>> newHashMap(), doubleColonsToDoubleUnderscores); if (avroSchema == null) { throw new IOException("checkSchema: could not translate ResourceSchema to Avro Schema"); } setOutputAvroSchema(avroSchema); }
Example #14
Source File: TestResourceSchema.java From spork with Apache License 2.0 | 6 votes |
/** * Test one-level Pig Schema: multiple fields for a bag */ @Test public void testResourceSchemaWithInvalidPigSchema() throws FrontendException { String [] aliases ={"f1", "f2"}; byte[] types = {DataType.CHARARRAY, DataType.INTEGER}; Schema level0 = TypeCheckingTestUtil.genFlatSchema( aliases,types); Schema.FieldSchema fld0 = new Schema.FieldSchema("f0", level0, DataType.BAG); Schema level1 = new Schema(fld0); try { Schema.getPigSchema(new ResourceSchema(level1)); Assert.fail(); } catch(FrontendException e) { assertTrue(e.getErrorCode()==2218); } }
Example #15
Source File: JsonStorage.java From spork with Apache License 2.0 | 6 votes |
@Override public void prepareToWrite(RecordWriter writer) throws IOException { // Store the record writer reference so we can use it when it's time // to write tuples this.writer = writer; // Get the schema string from the UDFContext object. UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature}); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema == null) { throw new IOException("Could not find schema in UDF context"); } // Parse the schema from the string stored in the properties object. schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); // Build a Json factory jsonFactory = new JsonFactory(); }
Example #16
Source File: PigTuple.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
public void setSchema(ResourceSchema schema) { schemaField = new ResourceFieldSchema(); schemaField.setType(DataType.TUPLE); try { schemaField.setSchema(schema); } catch (IOException ex) { throw new EsHadoopIllegalStateException(String.format("Cannot use schema [%s]", schema), ex); } }
Example #17
Source File: Utils.java From spork with Apache License 2.0 | 5 votes |
/** * This method adds FieldSchema of 'input source tag/path' as the first * field. This will be called only when PigStorage is invoked with * '-tagFile' or '-tagPath' option and the schema file is present to be * loaded. * * @param schema * @param fieldName * @return ResourceSchema */ public static ResourceSchema getSchemaWithInputSourceTag(ResourceSchema schema, String fieldName) { ResourceFieldSchema[] fieldSchemas = schema.getFields(); ResourceFieldSchema sourceTagSchema = new ResourceFieldSchema(new FieldSchema(fieldName, DataType.CHARARRAY)); ResourceFieldSchema[] fieldSchemasWithSourceTag = new ResourceFieldSchema[fieldSchemas.length + 1]; fieldSchemasWithSourceTag[0] = sourceTagSchema; for(int j = 0; j < fieldSchemas.length; j++) { fieldSchemasWithSourceTag[j + 1] = fieldSchemas[j]; } return schema.setFields(fieldSchemasWithSourceTag); }
Example #18
Source File: ReadToEndLoader.java From spork with Apache License 2.0 | 5 votes |
@Override public ResourceSchema getSchema(String location, Job job) throws IOException { if (wrappedLoadFunc instanceof LoadMetadata) { return ((LoadMetadata) wrappedLoadFunc).getSchema(location, job); } else { return null; } }
Example #19
Source File: TestStore.java From spork with Apache License 2.0 | 5 votes |
@Override public void storeSchema(ResourceSchema schema, String location, Job job) throws IOException { FileSystem fs = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fs.listStatus(new Path(location), Util.getSuccessMarkerPathFilter()); // verify that output is available prior to storeSchema call Path resultPath = null; if (outputFiles != null && outputFiles.length > 0 && outputFiles[0].getPath().getName().startsWith("part-")) { resultPath = outputFiles[0].getPath(); } if (resultPath == null) { FileStatus[] listing = fs.listStatus(new Path(location)); for (FileStatus fstat : listing) { System.err.println("Output File:" + fstat.getPath()); } // not creating the marker file below fails the test throw new IOException("" + resultPath + " not available in storeSchema"); } // create a file to test that this method got called - if it gets called // multiple times, the create will throw an Exception fs.create( new Path(location + "_storeSchema_test"), false); }
Example #20
Source File: HBaseStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void checkSchema(ResourceSchema s) throws IOException { if (! (caster_ instanceof LoadStoreCaster)) { LOG.error("Caster must implement LoadStoreCaster for writing to HBase."); throw new IOException("Bad Caster " + caster_.getClass()); } schema_ = s; getUDFProperties().setProperty(contextSignature + "_schema", ObjectSerializer.serialize(schema_)); }
Example #21
Source File: TestTextDataParser.java From spork with Apache License 2.0 | 5 votes |
ResourceFieldSchema getTupleFieldSchema() throws IOException { ResourceFieldSchema stringfs = new ResourceFieldSchema(); stringfs.setType(DataType.CHARARRAY); ResourceFieldSchema intfs = new ResourceFieldSchema(); intfs.setType(DataType.INTEGER); ResourceSchema tupleSchema = new ResourceSchema(); tupleSchema.setFields(new ResourceFieldSchema[]{intfs, stringfs}); ResourceFieldSchema tuplefs = new ResourceFieldSchema(); tuplefs.setSchema(tupleSchema); tuplefs.setType(DataType.TUPLE); return tuplefs; }
Example #22
Source File: DBStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void checkSchema(ResourceSchema s) throws IOException { // We won't really check the schema here, we'll store it in our // UDFContext properties object so we have it when we need it on the // backend UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature}); p.setProperty(SCHEMA_SIGNATURE, s.toString()); }
Example #23
Source File: PigStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void storeSchema(ResourceSchema schema, String location, Job job) throws IOException { if (isSchemaOn) { JsonMetadata metadataWriter = new JsonMetadata(); byte recordDel = '\n'; metadataWriter.setFieldDel(fieldDel); metadataWriter.setRecordDel(recordDel); metadataWriter.storeSchema(schema, location, job); } }
Example #24
Source File: FixedWidthLoader.java From spork with Apache License 2.0 | 5 votes |
public FixedWidthLoader(String columnSpec, String skipHeaderStr, String schemaStr) { try { columns = parseColumnSpec(columnSpec); schemaStr = schemaStr.replaceAll("[\\s\\r\\n]", ""); schema = new ResourceSchema(Utils.getSchemaFromString(schemaStr)); fields = schema.getFields(); for (int i = 0; i < fields.length; i++) { byte fieldType = fields[i].getType(); if (fieldType == DataType.MAP || fieldType == DataType.TUPLE || fieldType == DataType.BAG) { throw new IllegalArgumentException( "Field \"" + fields[i].getName() + "\" is an object type (map, tuple, or bag). " + "Object types are not supported by FixedWidthLoader." ); } } if (fields.length < columns.size()) warn("More columns specified in column spec than fields specified in schema. Only loading fields specified in schema.", PigWarning.UDF_WARNING_2); else if (fields.length > columns.size()) throw new IllegalArgumentException("More fields specified in schema than columns specified in column spec."); } catch (ParserException e) { throw new IllegalArgumentException("Invalid schema format: " + e.getMessage()); } if (skipHeaderStr.equalsIgnoreCase("SKIP_HEADER")) skipHeader = true; }
Example #25
Source File: JsonStorage.java From spork with Apache License 2.0 | 5 votes |
public ResourceSchema fixSchema(ResourceSchema s){ for (ResourceFieldSchema filed : s.getFields()) { if(filed.getType() == DataType.NULL) filed.setType(DataType.BYTEARRAY); } return s; }
Example #26
Source File: TestTextDataParser.java From spork with Apache License 2.0 | 5 votes |
@Test public void testMapDoubleValueType() throws Exception{ String myMap = "[key1#0.1]"; Schema schema = Utils.getSchemaFromString("m:map[double]"); ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0]; Map<String, Object> map = ps.getLoadCaster().bytesToMap(myMap.getBytes(), rfs); String key = map.keySet().iterator().next(); Object v = map.get("key1"); assertEquals("key1", key); assertTrue(v instanceof Double); String value = String.valueOf(v); assertEquals("0.1", value); }
Example #27
Source File: TestTextDataParser.java From spork with Apache License 2.0 | 5 votes |
@Test public void testMapStringValueType() throws Exception{ String myMap = "[key1#value1]"; Schema schema = Utils.getSchemaFromString("m:map[chararray]"); ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0]; Map<String, Object> map = ps.getLoadCaster().bytesToMap(myMap.getBytes(), rfs); String key = map.keySet().iterator().next(); Object v = map.get("key1"); assertEquals("key1", key); assertTrue(v instanceof String); String value = String.valueOf(v); assertEquals("value1", value); }
Example #28
Source File: JsonStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void checkSchema(ResourceSchema s) throws IOException { // We won't really check the schema here, we'll store it in our // UDFContext properties object so we have it when we need it on the // backend UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature}); p.setProperty(SCHEMA_SIGNATURE, fixSchema(s).toString()); }
Example #29
Source File: GenRandomData.java From spork with Apache License 2.0 | 5 votes |
public static ResourceFieldSchema getSmallTupDataBagFieldSchema() throws IOException { ResourceFieldSchema tuplefs = getSmallTupleFieldSchema(); ResourceSchema bagSchema = new ResourceSchema(); bagSchema.setFields(new ResourceFieldSchema[]{tuplefs}); ResourceFieldSchema bagfs = new ResourceFieldSchema(); bagfs.setSchema(bagSchema); bagfs.setType(DataType.BAG); return bagfs; }
Example #30
Source File: GenRandomData.java From spork with Apache License 2.0 | 5 votes |
public static ResourceFieldSchema getFullTupTextDataBagFieldSchema() throws IOException{ ResourceFieldSchema tuplefs = getSmallBagTextTupleFieldSchema(); ResourceSchema outBagSchema = new ResourceSchema(); outBagSchema.setFields(new ResourceFieldSchema[]{tuplefs}); ResourceFieldSchema outBagfs = new ResourceFieldSchema(); outBagfs.setSchema(outBagSchema); outBagfs.setType(DataType.BAG); return outBagfs; }